Package Bio :: Module SeqFeature
[hide private]
[frames] | no frames]

Source Code for Module Bio.SeqFeature

  1  # Copyright 2000-2003 Jeff Chang. 
  2  # Copyright 2001-2008 Brad Chapman. 
  3  # Copyright 2005-2010 by Peter Cock. 
  4  # Copyright 2006-2009 Michiel de Hoon. 
  5  # All rights reserved. 
  6  # This code is part of the Biopython distribution and governed by its 
  7  # license.  Please see the LICENSE file that should have been included 
  8  # as part of this package. 
  9  """Represent a Sequence Feature holding info about a part of a sequence. 
 10   
 11  This is heavily modeled after the Biocorba SeqFeature objects, and 
 12  may be pretty biased towards GenBank stuff since I'm writing it 
 13  for the GenBank parser output... 
 14   
 15  What's here: 
 16   
 17  Base class to hold a Feature. 
 18  ---------------------------- 
 19  classes: 
 20  o SeqFeature 
 21   
 22  Hold information about a Reference. 
 23  ---------------------------------- 
 24   
 25  This is an attempt to create a General class to hold Reference type 
 26  information. 
 27   
 28  classes: 
 29  o Reference 
 30   
 31  Specify locations of a feature on a Sequence. 
 32  --------------------------------------------- 
 33   
 34  This aims to handle, in Ewan's words, 'the dreaded fuzziness issue' in 
 35  much the same way as Biocorba. This has the advantages of allowing us 
 36  to handle fuzzy stuff in case anyone needs it, and also be compatible 
 37  with Biocorba. 
 38   
 39  classes: 
 40  o FeatureLocation - Specify the start and end location of a feature. 
 41   
 42  o ExactPosition - Specify the position as being exact. 
 43  o WithinPosition - Specify a position occuring within some range. 
 44  o BetweenPosition - Specify a position occuring between a range (OBSOLETE?). 
 45  o BeforePosition - Specify the position as being found before some base. 
 46  o AfterPosition - Specify the position as being found after some base. 
 47  o OneOfPosition - Specify a position where the location can be multiple positions. 
 48  """ 
 49   
 50  from Bio.Seq import MutableSeq, reverse_complement 
 51   
52 -class SeqFeature(object):
53 """Represent a Sequence Feature on an object. 54 55 Attributes: 56 o location - the location of the feature on the sequence (FeatureLocation) 57 o type - the specified type of the feature (ie. CDS, exon, repeat...) 58 o location_operator - a string specifying how this SeqFeature may 59 be related to others. For example, in the example GenBank feature 60 shown below, the location_operator would be "join" 61 o strand - A value specifying on which strand (of a DNA sequence, for 62 instance) the feature deals with. 1 indicates the plus strand, -1 63 indicates the minus strand, 0 indicates both strands, and None indicates 64 that strand doesn't apply (ie. for proteins) or is not known. 65 o id - A string identifier for the feature. 66 o ref - A reference to another sequence. This could be an accession 67 number for some different sequence. 68 o ref_db - A different database for the reference accession number. 69 o qualifiers - A dictionary of qualifiers on the feature. These are 70 analagous to the qualifiers from a GenBank feature table. The keys of 71 the dictionary are qualifier names, the values are the qualifier 72 values. 73 o sub_features - Additional SeqFeatures which fall under this 'parent' 74 feature. For instance, if we having something like: 75 76 CDS join(1..10,30..40,50..60) 77 78 Then the top level feature would be of type 'CDS' from 1 to 60 (actually 0 79 to 60 in Python counting) with location_operator='join', and the three sub- 80 features would also be of type 'CDS', and would be from 1 to 10, 30 to 81 40 and 50 to 60, respectively (although actually using Python counting). 82 83 To get the nucleotide sequence for this CDS, you would need to take the 84 parent sequence and do seq[0:10]+seq[29:40]+seq[49:60] (Python counting). 85 Things are more complicated with strands and fuzzy positions. To save you 86 dealing with all these special cases, the SeqFeature provides an extract 87 method to do this for you. 88 """
89 - def __init__(self, location = None, type = '', location_operator = '', 90 strand = None, id = "<unknown id>", 91 qualifiers = None, sub_features = None, 92 ref = None, ref_db = None):
93 """Initialize a SeqFeature on a Sequence. 94 95 location can either be a FeatureLocation (with strand argument also 96 given if required), or None. 97 98 e.g. With no strand, on the forward strand, and on the reverse strand: 99 100 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 101 >>> f1 = SeqFeature(FeatureLocation(5,10), type="domain") 102 >>> f2 = SeqFeature(FeatureLocation(7,110), strand=1, type="CDS") 103 >>> f3 = SeqFeature(FeatureLocation(9,108), strand=-1, type="CDS") 104 105 An invalid strand will trigger an exception: 106 107 >>> f4 = SeqFeature(FeatureLocation(50,60), strand=2) 108 Traceback (most recent call last): 109 ... 110 ValueError: Strand should be +1, -1, 0 or None, not 2 111 112 For exact start/end positions, an integer can be used (as shown above) 113 as shorthand for the ExactPosition object. For non-exact locations, the 114 FeatureLocation must be specified via the appropriate position objects. 115 """ 116 if strand not in [-1, 0, 1, None] : 117 raise ValueError("Strand should be +1, -1, 0 or None, not %s" \ 118 % repr(strand)) 119 if location is not None and not isinstance(location, FeatureLocation): 120 raise TypeError("FeatureLocation (or None) required for the location") 121 self.location = location 122 123 self.type = type 124 self.location_operator = location_operator 125 self.strand = strand 126 self.id = id 127 if qualifiers is None: 128 qualifiers = {} 129 self.qualifiers = qualifiers 130 if sub_features is None: 131 sub_features = [] 132 self.sub_features = sub_features 133 self.ref = ref 134 self.ref_db = ref_db
135
136 - def __repr__(self):
137 """A string representation of the record for debugging.""" 138 answer = "%s(%s" % (self.__class__.__name__, repr(self.location)) 139 if self.type: 140 answer += ", type=%s" % repr(self.type) 141 if self.location_operator: 142 answer += ", location_operator=%s" % repr(self.location_operator) 143 if self.strand: 144 answer += ", strand=%s" % repr(self.strand) 145 if self.id and self.id != "<unknown id>": 146 answer += ", id=%s" % repr(self.id) 147 if self.ref: 148 answer += ", ref=%s" % repr(self.ref) 149 if self.ref_db: 150 answer += ", ref_db=%s" % repr(self.ref_db) 151 answer += ")" 152 return answer
153
154 - def __str__(self):
155 """A readable summary of the feature intended to be printed to screen. 156 """ 157 out = "type: %s\n" % self.type 158 out += "location: %s\n" % self.location 159 if self.id and self.id != "<unknown id>": 160 out += "id: %s\n" % self.id 161 if self.ref or self.ref_db: 162 out += "ref: %s:%s\n" % (self.ref, self.ref_db) 163 out += "strand: %s\n" % self.strand 164 out += "qualifiers: \n" 165 for qual_key in sorted(self.qualifiers): 166 out += " Key: %s, Value: %s\n" % (qual_key, 167 self.qualifiers[qual_key]) 168 if len(self.sub_features) != 0: 169 out += "Sub-Features\n" 170 for sub_feature in self.sub_features: 171 out +="%s\n" % sub_feature 172 return out
173
174 - def _shift(self, offset):
175 """Returns a copy of the feature with its location shifted (PRIVATE). 176 177 The annotation qaulifiers are copied.""" 178 return SeqFeature(location = self.location._shift(offset), 179 type = self.type, 180 location_operator = self.location_operator, 181 strand = self.strand, 182 id = self.id, 183 qualifiers = dict(self.qualifiers.iteritems()), 184 sub_features = [f._shift(offset) for f in self.sub_features], 185 ref = self.ref, 186 ref_db = self.ref_db)
187
188 - def extract(self, parent_sequence):
189 """Extract feature sequence from the supplied parent sequence. 190 191 The parent_sequence can be a Seq like object or a string, and will 192 generally return an object of the same type. The exception to this is 193 a MutableSeq as the parent sequence will return a Seq object. 194 195 This should cope with complex locations including complements, joins 196 and fuzzy positions. Even mixed strand features should work! This 197 also covers features on protein sequences (e.g. domains), although 198 here reverse strand features are not permitted. 199 200 >>> from Bio.Seq import Seq 201 >>> from Bio.Alphabet import generic_protein 202 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 203 >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL", generic_protein) 204 >>> f = SeqFeature(FeatureLocation(8,15), type="domain") 205 >>> f.extract(seq) 206 Seq('VALIVIC', ProteinAlphabet()) 207 208 Note - currently only sub-features of type "join" are supported. 209 """ 210 if isinstance(parent_sequence, MutableSeq): 211 #This avoids complications with reverse complements 212 #(the MutableSeq reverse complement acts in situ) 213 parent_sequence = parent_sequence.toseq() 214 if self.sub_features: 215 if self.location_operator!="join": 216 raise ValueError(self.location_operator) 217 if self.strand == -1: 218 #This is a special case given how the GenBank parser works. 219 #Must avoid doing the reverse complement twice. 220 parts = [] 221 for f_sub in self.sub_features: 222 assert f_sub.strand==-1 223 parts.append(parent_sequence[f_sub.location.nofuzzy_start:\ 224 f_sub.location.nofuzzy_end]) 225 else: 226 #This copes with mixed strand features: 227 parts = [f_sub.extract(parent_sequence) \ 228 for f_sub in self.sub_features] 229 #We use addition rather than a join to avoid alphabet issues: 230 f_seq = parts[0] 231 for part in parts[1:] : f_seq += part 232 else: 233 f_seq = parent_sequence[self.location.nofuzzy_start:\ 234 self.location.nofuzzy_end] 235 if self.strand == -1: 236 #TODO - MutableSeq? 237 try: 238 f_seq = f_seq.reverse_complement() 239 except AttributeError: 240 assert isinstance(f_seq, str) 241 f_seq = reverse_complement(f_seq) 242 return f_seq
243
244 - def __nonzero__(self):
245 """Returns True regardless of the length of the feature. 246 247 This behaviour is for backwards compatibility, since until the 248 __len__ method was added, a SeqFeature always evaluated as True. 249 250 Note that in comparison, Seq objects, strings, lists, etc, will all 251 evaluate to False if they have length zero. 252 253 WARNING: The SeqFeature may in future evaluate to False when its 254 length is zero (in order to better match normal python behaviour)! 255 """ 256 return True
257
258 - def __len__(self):
259 """Returns the length of the region described by a feature. 260 261 >>> from Bio.Seq import Seq 262 >>> from Bio.Alphabet import generic_protein 263 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 264 >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL", generic_protein) 265 >>> f = SeqFeature(FeatureLocation(8,15), type="domain") 266 >>> len(f) 267 7 268 >>> f.extract(seq) 269 Seq('VALIVIC', ProteinAlphabet()) 270 >>> len(f.extract(seq)) 271 7 272 273 For simple features without subfeatures this is the same as the region 274 spanned (end position minus start position). However, for a feature 275 defined by combining several subfeatures (e.g. a CDS as the join of 276 several exons) the gaps are not counted (e.g. introns). This ensures 277 that len(f) == len(f.extract(parent_seq)), and also makes sure things 278 work properly with features wrapping the origin etc. 279 """ 280 if self.sub_features: 281 return sum(len(f) for f in self.sub_features) 282 else: 283 return len(self.location)
284
285 - def __iter__(self):
286 """Iterate over the parent positions within the feature. 287 288 The iteration order is strand aware, and can be thought of as moving 289 along the feature using the parent sequence coordinates: 290 291 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 292 >>> f = SeqFeature(FeatureLocation(5,10), type="domain", strand=-1) 293 >>> len(f) 294 5 295 >>> for i in f: print i 296 9 297 8 298 7 299 6 300 5 301 >>> list(f) 302 [9, 8, 7, 6, 5] 303 """ 304 if self.sub_features: 305 if self.strand == -1: 306 for f in self.sub_features[::-1]: 307 for i in f.location: 308 yield i 309 else: 310 for f in self.sub_features: 311 for i in f.location: 312 yield i 313 elif self.strand == -1: 314 for i in range(self.location.nofuzzy_end-1, 315 self.location.nofuzzy_start-1, -1): 316 yield i 317 else: 318 for i in range(self.location.nofuzzy_start, 319 self.location.nofuzzy_end): 320 yield i
321
322 - def __contains__(self, value):
323 """Check if an integer position is within the feature. 324 325 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 326 >>> f = SeqFeature(FeatureLocation(5,10), type="domain", strand=-1) 327 >>> len(f) 328 5 329 >>> [i for i in range(15) if i in f] 330 [5, 6, 7, 8, 9] 331 332 For example, to see which features include a SNP position, you could 333 use this: 334 335 >>> from Bio import SeqIO 336 >>> record = SeqIO.read("GenBank/NC_000932.gb", "gb") 337 >>> for f in record.features: 338 ... if 1750 in f: 339 ... print f.type, f.strand, f.location 340 source 1 [0:154478] 341 gene -1 [1716:4347] 342 tRNA -1 [1716:4347] 343 344 Note that for a feature defined as a join of several subfeatures (e.g. 345 the union of several exons) the gaps are not checked (e.g. introns). 346 In this example, the tRNA location is defined in the GenBank file as 347 complement(join(1717..1751,4311..4347)), so that position 1760 falls 348 in the gap: 349 350 >>> for f in record.features: 351 ... if 1760 in f: 352 ... print f.type, f.strand, f.location 353 source 1 [0:154478] 354 gene -1 [1716:4347] 355 356 Note that additional care may be required with fuzzy locations, for 357 example just before a BeforePosition: 358 359 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 360 >>> from Bio.SeqFeature import BeforePosition 361 >>> f = SeqFeature(FeatureLocation(BeforePosition(3),8), type="domain") 362 >>> len(f) 363 5 364 >>> [i for i in range(10) if i in f] 365 [3, 4, 5, 6, 7] 366 """ 367 if not isinstance(value, int): 368 raise ValueError("Currently we only support checking for integer " 369 "positions being within a SeqFeature.") 370 if self.sub_features: 371 for f in self.sub_features: 372 if value in f: 373 return True 374 return False 375 else: 376 return value in self.location
377 378 # --- References 379 380 # TODO -- Will this hold PubMed and Medline information decently?
381 -class Reference(object):
382 """Represent a Generic Reference object. 383 384 Attributes: 385 o location - A list of Location objects specifying regions of 386 the sequence that the references correspond to. If no locations are 387 specified, the entire sequence is assumed. 388 o authors - A big old string, or a list split by author, of authors 389 for the reference. 390 o title - The title of the reference. 391 o journal - Journal the reference was published in. 392 o medline_id - A medline reference for the article. 393 o pubmed_id - A pubmed reference for the article. 394 o comment - A place to stick any comments about the reference. 395 """
396 - def __init__(self):
397 self.location = [] 398 self.authors = '' 399 self.consrtm = '' 400 self.title = '' 401 self.journal = '' 402 self.medline_id = '' 403 self.pubmed_id = '' 404 self.comment = ''
405
406 - def __str__(self):
407 """Output an informative string for debugging. 408 """ 409 out = "" 410 for single_location in self.location: 411 out += "location: %s\n" % single_location 412 out += "authors: %s\n" % self.authors 413 if self.consrtm: 414 out += "consrtm: %s\n" % self.consrtm 415 out += "title: %s\n" % self.title 416 out += "journal: %s\n" % self.journal 417 out += "medline id: %s\n" % self.medline_id 418 out += "pubmed id: %s\n" % self.pubmed_id 419 out += "comment: %s\n" % self.comment 420 return out
421
422 - def __repr__(self):
423 #TODO - Update this is __init__ later accpets values 424 return "%s(title=%s, ...)" % (self.__class__.__name__, 425 repr(self.title))
426 427 # --- Handling feature locations 428
429 -class FeatureLocation(object):
430 """Specify the location of a feature along a sequence. 431 432 This attempts to deal with fuzziness of position ends, but also 433 make it easy to get the start and end in the 'normal' case (no 434 fuzziness). 435 436 You should access the start and end attributes with 437 your_location.start and your_location.end. If the start and 438 end are exact, this will return the positions, if not, we'll return 439 the approriate Fuzzy class with info about the position and fuzziness. 440 441 Note that the start and end location numbering follow Python's scheme, 442 thus a GenBank entry of 123..150 (one based counting) becomes a location 443 of [122:150] (zero based counting). 444 """
445 - def __init__(self, start, end):
446 """Specify the start and end of a sequence feature. 447 448 start and end arguments specify the values where the feature begins 449 and ends. These can either by any of the *Position objects that 450 inherit from AbstractPosition, or can just be integers specifying the 451 position. In the case of integers, the values are assumed to be 452 exact and are converted in ExactPosition arguments. This is meant 453 to make it easy to deal with non-fuzzy ends. 454 455 i.e. Short form: 456 457 >>> from Bio.SeqFeature import FeatureLocation 458 >>> loc = FeatureLocation(5,10) 459 460 Explicit form: 461 462 >>> from Bio.SeqFeature import FeatureLocation, ExactPosition 463 >>> loc = FeatureLocation(ExactPosition(5),ExactPosition(10)) 464 465 Other fuzzy positions are used similarly, 466 467 >>> from Bio.SeqFeature import FeatureLocation 468 >>> from Bio.SeqFeature import BeforePosition, AfterPosition 469 >>> loc2 = FeatureLocation(BeforePosition(5),AfterPosition(10)) 470 471 """ 472 if isinstance(start, AbstractPosition): 473 self._start = start 474 else: 475 self._start = ExactPosition(start) 476 477 if isinstance(end, AbstractPosition): 478 self._end = end 479 else: 480 self._end = ExactPosition(end)
481
482 - def __str__(self):
483 """Returns a representation of the location (with python counting). 484 485 For the simple case this uses the python splicing syntax, [122:150] 486 (zero based counting) which GenBank would call 123..150 (one based 487 counting). 488 """ 489 return "[%s:%s]" % (self._start, self._end)
490
491 - def __repr__(self):
492 """A string representation of the location for debugging.""" 493 return "%s(%s,%s)" \ 494 % (self.__class__.__name__, repr(self.start), repr(self.end))
495
496 - def __nonzero__(self):
497 """Returns True regardless of the length of the feature. 498 499 This behaviour is for backwards compatibility, since until the 500 __len__ method was added, a FeatureLocation always evaluated as True. 501 502 Note that in comparison, Seq objects, strings, lists, etc, will all 503 evaluate to False if they have length zero. 504 505 WARNING: The FeatureLocation may in future evaluate to False when its 506 length is zero (in order to better match normal python behaviour)! 507 """ 508 return True
509
510 - def __len__(self):
511 """Returns the length of the region described by the FeatureLocation. 512 513 Note that extra care may be needed for fuzzy locations, e.g. 514 515 >>> from Bio.SeqFeature import FeatureLocation 516 >>> from Bio.SeqFeature import BeforePosition, AfterPosition 517 >>> loc = FeatureLocation(BeforePosition(5),AfterPosition(10)) 518 >>> len(loc) 519 5 520 """ 521 #TODO - Should we use nofuzzy_start and nofuzzy_end here? 522 return self._end.position + self._end.extension - self._start.position
523
524 - def __contains__(self, value):
525 """Check if an integer position is within the FeatureLocation. 526 527 Note that extra care may be needed for fuzzy locations, e.g. 528 529 >>> from Bio.SeqFeature import FeatureLocation 530 >>> from Bio.SeqFeature import BeforePosition, AfterPosition 531 >>> loc = FeatureLocation(BeforePosition(5),AfterPosition(10)) 532 >>> len(loc) 533 5 534 >>> [i for i in range(15) if i in loc] 535 [5, 6, 7, 8, 9] 536 """ 537 if not isinstance(value, int): 538 raise ValueError("Currently we only support checking for integer " 539 "positions being within a FeatureLocation.") 540 #TODO - Should we use nofuzzy_start and nofuzzy_end here? 541 if value < self._start.position \ 542 or value >= self._end.position + self._end.extension: 543 return False 544 else: 545 return True
546
547 - def __iter__(self):
548 """Iterate over the parent positions within the FeatureLocation. 549 550 >>> from Bio.SeqFeature import FeatureLocation 551 >>> from Bio.SeqFeature import BeforePosition, AfterPosition 552 >>> loc = FeatureLocation(BeforePosition(5),AfterPosition(10)) 553 >>> len(loc) 554 5 555 >>> for i in loc: print i 556 5 557 6 558 7 559 8 560 9 561 >>> list(loc) 562 [5, 6, 7, 8, 9] 563 >>> [i for i in range(15) if i in loc] 564 [5, 6, 7, 8, 9] 565 """ 566 #TODO - Should we use nofuzzy_start and nofuzzy_end here? 567 for i in range(self._start.position, 568 self._end.position + self._end.extension): 569 yield i
570
571 - def _shift(self, offset):
572 """Returns a copy of the location shifted by the offset (PRIVATE).""" 573 return FeatureLocation(start = self._start._shift(offset), 574 end = self._end._shift(offset))
575 576 start = property(fget= lambda self : self._start, 577 doc="Start location (possibly a fuzzy position, read only).") 578 579 end = property(fget= lambda self : self._end, 580 doc="End location (possibly a fuzzy position, read only).") 581 582 nofuzzy_start = property( 583 fget=lambda self: self._start.position, 584 doc="""Start position (integer, approximated if fuzzy, read only). 585 586 To get non-fuzzy attributes (ie. the position only) ask for 587 'location.nofuzzy_start', 'location.nofuzzy_end'. These should return 588 the largest range of the fuzzy position. So something like: 589 (10.20)..(30.40) should return 10 for start, and 40 for end. 590 """) 591 592 nofuzzy_end = property( 593 fget=lambda self: self._end.position + self._end.extension, 594 doc="""End position (integer, approximated if fuzzy, read only). 595 596 To get non-fuzzy attributes (ie. the position only) ask for 597 'location.nofuzzy_start', 'location.nofuzzy_end'. These should return 598 the largest range of the fuzzy position. So something like: 599 (10.20)..(30.40) should return 10 for start, and 40 for end. 600 """)
601
602 -class AbstractPosition(object):
603 """Abstract base class representing a position. 604 """
605 - def __init__(self, position, extension):
606 self.position = position 607 assert extension >= 0, extension 608 self.extension = extension
609
610 - def __repr__(self):
611 """String representation of the location for debugging.""" 612 return "%s(%s,%s)" % (self.__class__.__name__, \ 613 repr(self.position), repr(self.extension))
614
615 - def __hash__(self):
616 """Simple position based hash.""" 617 #Note __hash__ must be implemented on Python 3.x if overriding __eq__ 618 return hash(self.position)
619
620 - def __eq__(self, other):
621 """A simple equality for positions. 622 623 This is very simple-minded and just compares the position attribute 624 of the features; extensions are not considered at all. This could 625 potentially be expanded to try to take advantage of extensions. 626 """ 627 assert isinstance(other, AbstractPosition), \ 628 "We can only do comparisons between Biopython Position objects." 629 return self.position == other.position
630
631 - def __ne__(self, other):
632 """A simple non-equality for positions. 633 634 This is very simple-minded and just compares the position attribute 635 of the features; extensions are not considered at all. This could 636 potentially be expanded to try to take advantage of extensions. 637 """ 638 assert isinstance(other, AbstractPosition), \ 639 "We can only do comparisons between Biopython Position objects." 640 return self.position != other.position
641
642 - def __le__(self, other):
643 """A simple less than or equal for positions. 644 645 This is very simple-minded and just compares the position attribute 646 of the features; extensions are not considered at all. This could 647 potentially be expanded to try to take advantage of extensions. 648 """ 649 assert isinstance(other, AbstractPosition), \ 650 "We can only do comparisons between Biopython Position objects." 651 return self.position <= other.position
652
653 - def __lt__(self, other):
654 """A simple less than or equal for positions. 655 656 This is very simple-minded and just compares the position attribute 657 of the features; extensions are not considered at all. This could 658 potentially be expanded to try to take advantage of extensions. 659 """ 660 assert isinstance(other, AbstractPosition), \ 661 "We can only do comparisons between Biopython Position objects." 662 return self.position < other.position
663
664 - def __ge__(self, other):
665 """A simple less than or equal for positions. 666 667 This is very simple-minded and just compares the position attribute 668 of the features; extensions are not considered at all. This could 669 potentially be expanded to try to take advantage of extensions. 670 """ 671 assert isinstance(other, AbstractPosition), \ 672 "We can only do comparisons between Biopython Position objects." 673 return self.position >= other.position
674
675 - def __gt__(self, other):
676 """A simple less than or equal for positions. 677 678 This is very simple-minded and just compares the position attribute 679 of the features; extensions are not considered at all. This could 680 potentially be expanded to try to take advantage of extensions. 681 """ 682 assert isinstance(other, AbstractPosition), \ 683 "We can only do comparisons between Biopython Position objects." 684 return self.position > other.position
685
686 - def _shift(self, offset):
687 #We want this to maintain the subclass when called from a subclass 688 return self.__class__(self.position + offset, self.extension)
689
690 -class ExactPosition(AbstractPosition):
691 """Specify the specific position of a boundary. 692 693 o position - The position of the boundary. 694 o extension - An optional argument which must be zero since we don't 695 have an extension. The argument is provided so that the same number of 696 arguments can be passed to all position types. 697 698 In this case, there is no fuzziness associated with the position. 699 """
700 - def __init__(self, position, extension = 0):
701 if extension != 0: 702 raise AttributeError("Non-zero extension %s for exact position." 703 % extension) 704 AbstractPosition.__init__(self, position, 0)
705
706 - def __repr__(self):
707 """String representation of the ExactPosition location for debugging.""" 708 assert self.extension == 0 709 return "%s(%s)" % (self.__class__.__name__, repr(self.position))
710
711 - def __str__(self):
712 return str(self.position)
713
714 -class UncertainPosition(ExactPosition):
715 """Specify a specific position which is uncertain. 716 717 This is used in UniProt, e.g. ?222 for uncertain position 222, or in the 718 XML format explicitly marked as uncertain. Does not apply to GenBank/EMBL. 719 """ 720 pass
721
722 -class UnknownPosition(AbstractPosition):
723 """Specify a specific position which is unknown (has no position). 724 725 This is used in UniProt, e.g. ? or in the XML as unknown. 726 """
727 - def __init__(self):
728 self.position = None 729 self.extension = None 730 pass
731
732 - def __repr__(self):
733 """String representation of the UnknownPosition location for debugging.""" 734 return "%s()" % self.__class__.__name__
735
736 -class WithinPosition(AbstractPosition):
737 """Specify the position of a boundary within some coordinates. 738 739 Arguments: 740 o position - The start position of the boundary 741 o extension - The range to which the boundary can extend. 742 743 This allows dealing with a position like ((1.4)..100). This 744 indicates that the start of the sequence is somewhere between 1 745 and 4. To represent that with this class we would set position as 746 1 and extension as 3. 747 """
748 - def __init__(self, position, extension = 0):
749 AbstractPosition.__init__(self, position, extension)
750
751 - def __str__(self):
752 return "(%s.%s)" % (self.position, self.position + self.extension)
753
754 -class BetweenPosition(AbstractPosition):
755 """Specify the position of a boundary between two coordinates (OBSOLETE?). 756 757 Arguments: 758 o position - The start position of the boundary. 759 o extension - The range to the other position of a boundary. 760 761 This specifies a coordinate which is found between the two positions. 762 So this allows us to deal with a position like ((1^2)..100). To 763 represent that with this class we set position as 1 and the 764 extension as 1. 765 """
766 - def __init__(self, position, extension = 0):
767 AbstractPosition.__init__(self, position, extension)
768
769 - def __str__(self):
770 return "(%s^%s)" % (self.position, self.position + self.extension)
771
772 -class BeforePosition(AbstractPosition):
773 """Specify a position where the actual location occurs before it. 774 775 Arguments: 776 o position - The upper boundary of where the location can occur. 777 o extension - An optional argument which must be zero since we don't 778 have an extension. The argument is provided so that the same number of 779 arguments can be passed to all position types. 780 781 This is used to specify positions like (<10..100) where the location 782 occurs somewhere before position 10. 783 """
784 - def __init__(self, position, extension = 0):
785 if extension != 0: 786 raise AttributeError("Non-zero extension %s for exact position." 787 % extension) 788 AbstractPosition.__init__(self, position, 0)
789
790 - def __repr__(self):
791 """A string representation of the location for debugging.""" 792 assert self.extension == 0 793 return "%s(%s)" % (self.__class__.__name__, repr(self.position))
794
795 - def __str__(self):
796 return "<%s" % self.position
797
798 -class AfterPosition(AbstractPosition):
799 """Specify a position where the actual location is found after it. 800 801 Arguments: 802 o position - The lower boundary of where the location can occur. 803 o extension - An optional argument which must be zero since we don't 804 have an extension. The argument is provided so that the same number of 805 arguments can be passed to all position types. 806 807 This is used to specify positions like (>10..100) where the location 808 occurs somewhere after position 10. 809 """
810 - def __init__(self, position, extension = 0):
811 if extension != 0: 812 raise AttributeError("Non-zero extension %s for exact position." 813 % extension) 814 AbstractPosition.__init__(self, position, 0)
815
816 - def __repr__(self):
817 """A string representation of the location for debugging.""" 818 assert self.extension == 0 819 return "%s(%s)" % (self.__class__.__name__, repr(self.position))
820
821 - def __str__(self):
822 return ">%s" % self.position
823
824 -class OneOfPosition(AbstractPosition):
825 """Specify a position where the location can be multiple positions. 826 827 This models the GenBank 'one-of(1888,1901)' function, and tries 828 to make this fit within the Biopython Position models. In our case 829 the position of the "one-of" is set as the lowest choice, and the 830 extension is the range to the highest choice. 831 """
832 - def __init__(self, position_list):
833 """Initialize with a set of posssible positions. 834 835 position_list is a list of AbstractPosition derived objects, 836 specifying possible locations. 837 """ 838 # unique attribute for this type of positions 839 self.position_choices = position_list 840 # find the smallest and largest position in the choices 841 smallest = None 842 largest = None 843 for position_choice in self.position_choices: 844 assert isinstance(position_choice, AbstractPosition), \ 845 "Expected position objects, got %r" % position_choice 846 if smallest is None and largest is None: 847 smallest = position_choice.position 848 largest = position_choice.position 849 elif position_choice.position > largest: 850 largest = position_choice.position 851 elif position_choice.position < smallest: 852 smallest = position_choice.position 853 # initialize with our definition of position and extension 854 AbstractPosition.__init__(self, smallest, largest - smallest)
855
856 - def __repr__(self):
857 """String representation of the OneOfPosition location for debugging.""" 858 return "%s(%s)" % (self.__class__.__name__, \ 859 repr(self.position_choices))
860
861 - def __str__(self):
862 out = "one-of(" 863 for position in self.position_choices: 864 out += "%s," % position 865 # replace the last comma with the closing parenthesis 866 out = out[:-1] + ")" 867 return out
868
869 - def _shift(self, offset):
870 return self.__class__([position_choice._shift(offset) \ 871 for position_choice in self.position_choices])
872
873 -class PositionGap(object):
874 """Simple class to hold information about a gap between positions. 875 """
876 - def __init__(self, gap_size):
877 """Intialize with a position object containing the gap information. 878 """ 879 self.gap_size = gap_size
880
881 - def __repr__(self):
882 """A string representation of the position gap for debugging.""" 883 return "%s(%s)" % (self.__class__.__name__, repr(self.gap_size))
884
885 - def __str__(self):
886 out = "gap(%s)" % self.gap_size 887 return out
888
889 -def _test():
890 """Run the Bio.SeqFeature module's doctests (PRIVATE). 891 892 This will try and locate the unit tests directory, and run the doctests 893 from there in order that the relative paths used in the examples work. 894 """ 895 import doctest 896 import os 897 if os.path.isdir(os.path.join("..","Tests")): 898 print "Runing doctests..." 899 cur_dir = os.path.abspath(os.curdir) 900 os.chdir(os.path.join("..","Tests")) 901 doctest.testmod() 902 os.chdir(cur_dir) 903 del cur_dir 904 print "Done" 905 elif os.path.isdir(os.path.join("Tests")) : 906 print "Runing doctests..." 907 cur_dir = os.path.abspath(os.curdir) 908 os.chdir(os.path.join("Tests")) 909 doctest.testmod() 910 os.chdir(cur_dir) 911 del cur_dir 912 print "Done"
913 914 915 if __name__ == "__main__": 916 _test() 917