Package Bio :: Package Data :: Module IUPACData
[hide private]
[frames] | no frames]

Source Code for Module Bio.Data.IUPACData

  1  # Information about the IUPAC alphabets 
  2   
  3  protein_letters = "ACDEFGHIKLMNPQRSTVWY" 
  4  extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO" 
  5  #   B = "Asx";  aspartic acid or asparagine (D or N) 
  6  #   X = "Xxx";  unknown or 'other' amino acid 
  7  #   Z = "Glx";  glutamic acid or glutamine (E or Q) 
  8  #   J = "Xle";  leucine or isoleucine (L or I, used in mass-spec) 
  9  #   U = "Sec";  selenocysteine 
 10  #   O = "Pyl";  pyrrolysine 
 11  ambiguous_dna_letters = "GATCRYWSMKHBVDN" 
 12  unambiguous_dna_letters = "GATC" 
 13  ambiguous_rna_letters = "GAUCRYWSMKHBVDN" 
 14  unambiguous_rna_letters = "GAUC" 
 15   
 16  #   B == 5-bromouridine 
 17  #   D == 5,6-dihydrouridine 
 18  #   S == thiouridine 
 19  #   W == wyosine 
 20  extended_dna_letters = "GATCBDSW" 
 21   
 22  # are there extended forms? 
 23  #extended_rna_letters = "GAUCBDSW" 
 24   
 25  ambiguous_dna_values = { 
 26      "A": "A", 
 27      "C": "C", 
 28      "G": "G", 
 29      "T": "T", 
 30      "M": "AC", 
 31      "R": "AG", 
 32      "W": "AT", 
 33      "S": "CG", 
 34      "Y": "CT", 
 35      "K": "GT", 
 36      "V": "ACG", 
 37      "H": "ACT", 
 38      "D": "AGT", 
 39      "B": "CGT", 
 40      "X": "GATC", 
 41      "N": "GATC", 
 42      } 
 43  ambiguous_rna_values = { 
 44      "A": "A", 
 45      "C": "C", 
 46      "G": "G", 
 47      "U": "U", 
 48      "M": "AC", 
 49      "R": "AG", 
 50      "W": "AU", 
 51      "S": "CG", 
 52      "Y": "CU", 
 53      "K": "GU", 
 54      "V": "ACG", 
 55      "H": "ACU", 
 56      "D": "AGU", 
 57      "B": "CGU", 
 58      "X": "GAUC", 
 59      "N": "GAUC", 
 60      } 
 61   
 62  ambiguous_dna_complement = { 
 63      "A": "T", 
 64      "C": "G", 
 65      "G": "C", 
 66      "T": "A", 
 67      "M": "K", 
 68      "R": "Y", 
 69      "W": "W", 
 70      "S": "S", 
 71      "Y": "R", 
 72      "K": "M", 
 73      "V": "B", 
 74      "H": "D", 
 75      "D": "H", 
 76      "B": "V", 
 77      "X": "X", 
 78      "N": "N", 
 79      } 
 80   
 81  ambiguous_rna_complement = { 
 82      "A": "U", 
 83      "C": "G", 
 84      "G": "C", 
 85      "U": "A", 
 86      "M": "K", 
 87      "R": "Y", 
 88      "W": "W", 
 89      "S": "S", 
 90      "Y": "R", 
 91      "K": "M", 
 92      "V": "B", 
 93      "H": "D", 
 94      "D": "H", 
 95      "B": "V", 
 96      "X": "X", 
 97      "N": "N", 
 98      } 
 99   
100   
101 -def _make_ranges(mydict):
102 d = {} 103 for key, value in mydict.iteritems(): 104 d[key] = (value, value) 105 return d
106 107 # From bioperl's SeqStats.pm 108 unambiguous_dna_weights = { 109 "A": 347., 110 "C": 323., 111 "G": 363., 112 "T": 322., 113 } 114 unambiguous_dna_weight_ranges = _make_ranges(unambiguous_dna_weights) 115 116 unambiguous_rna_weights = { 117 "A": unambiguous_dna_weights["A"] + 16., # 16 for the oxygen 118 "C": unambiguous_dna_weights["C"] + 16., 119 "G": unambiguous_dna_weights["G"] + 16., 120 "U": 340., 121 } 122 unambiguous_rna_weight_ranges = _make_ranges(unambiguous_rna_weights) 123
124 -def _make_ambiguous_ranges(mydict, weight_table):
125 range_d = {} 126 avg_d = {} 127 for letter, values in mydict.iteritems(): 128 #Following line is a quick hack to skip undefined weights for U and O 129 if len(values)==1 and values[0] not in weight_table : continue 130 weights = map(weight_table.get, values) 131 range_d[letter] = (min(weights), max(weights)) 132 total_w = 0.0 133 for w in weights: 134 total_w = total_w + w 135 avg_d[letter] = total_w / len(weights) 136 return range_d, avg_d
137 138 ambiguous_dna_weight_ranges, avg_ambiguous_dna_weights = \ 139 _make_ambiguous_ranges(ambiguous_dna_values, 140 unambiguous_dna_weights) 141 142 ambiguous_rna_weight_ranges, avg_ambiguous_rna_weights = \ 143 _make_ambiguous_ranges(ambiguous_rna_values, 144 unambiguous_rna_weights) 145 146 protein_weights = { 147 "A": 89.09, 148 "C": 121.16, 149 "D": 133.10, 150 "E": 147.13, 151 "F": 165.19, 152 "G": 75.07, 153 "H": 155.16, 154 "I": 131.18, 155 "K": 146.19, 156 "L": 131.18, 157 "M": 149.21, 158 "N": 132.12, 159 #"O": 0.0, # Needs to be recorded! 160 "P": 115.13, 161 "Q": 146.15, 162 "R": 174.20, 163 "S": 105.09, 164 "T": 119.12, 165 #"U": 168.05, # To be confirmed 166 "V": 117.15, 167 "W": 204.23, 168 "Y": 181.19 169 } 170 171 extended_protein_values = { 172 "A": "A", 173 "B": "ND", 174 "C": "C", 175 "D": "D", 176 "E": "E", 177 "F": "F", 178 "G": "G", 179 "H": "H", 180 "I": "I", 181 "J": "IL", 182 "K": "K", 183 "L": "L", 184 "M": "M", 185 "N": "N", 186 "O": "O", 187 "P": "P", 188 "Q": "Q", 189 "R": "R", 190 "S": "S", 191 "T": "T", 192 "U": "U", 193 "V": "V", 194 "W": "W", 195 "X": "ACDEFGHIKLMNPQRSTVWY", 196 #TODO - Include U and O in the possible values of X? 197 #This could alter the extended_protein_weight_ranges ... 198 "Y": "Y", 199 "Z": "QE", 200 } 201 202 protein_weight_ranges = _make_ranges(protein_weights) 203 204 extended_protein_weight_ranges, avg_extended_protein_weights = \ 205 _make_ambiguous_ranges(extended_protein_values, 206 protein_weights) 207