Package Bio :: Package AlignIO :: Module EmbossIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.AlignIO.EmbossIO

  1  # Copyright 2008-2010 by Peter Cock.  All rights reserved. 
  2  # 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  """ 
  7  Bio.AlignIO support for the "emboss" alignment output from EMBOSS tools. 
  8   
  9  You are expected to use this module via the Bio.AlignIO functions (or the 
 10  Bio.SeqIO functions if you want to work directly with the gapped sequences). 
 11   
 12  This module contains a parser for the EMBOSS pairs/simple file format, for 
 13  example from the alignret, water and needle tools. 
 14  """ 
 15   
 16  from Bio.Seq import Seq 
 17  from Bio.SeqRecord import SeqRecord 
 18  from Bio.Align import MultipleSeqAlignment 
 19  from Interfaces import AlignmentIterator, SequentialAlignmentWriter 
 20   
21 -class EmbossWriter(SequentialAlignmentWriter):
22 """Emboss alignment writer (WORK IN PROGRESS). 23 24 Writes a simplfied version of the EMBOSS pairs/simple file format. 25 A lot of the information their tools record in their headers is not 26 available and is ommitted. 27 """ 28
29 - def write_header(self):
30 handle = self.handle 31 handle.write("########################################\n") 32 handle.write("# Program: Biopython\n") 33 try: 34 handle.write("# Report_file: %s\n" % handle.name) 35 except AttributeError: 36 pass 37 handle.write("########################################\n")
38 43
44 - def write_alignment(self, alignment):
45 """Use this to write (another) single alignment to an open file.""" 46 handle = self.handle 47 handle.write("#=======================================\n") 48 handle.write("#\n") 49 handle.write("# Aligned_sequences: %i\n" % len(alignment)) 50 for i, record in enumerate(alignment): 51 handle.write("# %i: %s\n" % (i+1, record.id)) 52 handle.write("#\n") 53 handle.write("# Length: %i\n" % alignment.get_alignment_length()) 54 handle.write("#\n") 55 handle.write("#=======================================\n") 56 handle.write("\n") 57 #... 58 assert False
59
60 -class EmbossIterator(AlignmentIterator):
61 """Emboss alignment iterator. 62 63 For reading the (pairwise) alignments from EMBOSS tools in what they 64 call the "pairs" and "simple" formats. 65 """ 66
67 - def next(self):
68 69 handle = self.handle 70 71 try: 72 #Header we saved from when we were parsing 73 #the previous alignment. 74 line = self._header 75 del self._header 76 except AttributeError: 77 line = handle.readline() 78 if not line: 79 raise StopIteration 80 81 while line.rstrip() != "#=======================================": 82 line = handle.readline() 83 if not line: 84 raise StopIteration 85 86 length_of_seqs = None 87 number_of_seqs = None 88 ids = [] 89 seqs = [] 90 91 92 while line[0] == "#": 93 #Read in the rest of this alignment header, 94 #try and discover the number of records expected 95 #and their length 96 parts = line[1:].split(":",1) 97 key = parts[0].lower().strip() 98 if key == "aligned_sequences": 99 number_of_seqs = int(parts[1].strip()) 100 assert len(ids) == 0 101 # Should now expect the record identifiers... 102 for i in range(number_of_seqs): 103 line = handle.readline() 104 parts = line[1:].strip().split(":",1) 105 assert i+1 == int(parts[0].strip()) 106 ids.append(parts[1].strip()) 107 assert len(ids) == number_of_seqs 108 if key == "length": 109 length_of_seqs = int(parts[1].strip()) 110 111 #And read in another line... 112 line = handle.readline() 113 114 if number_of_seqs is None: 115 raise ValueError("Number of sequences missing!") 116 if length_of_seqs is None: 117 raise ValueError("Length of sequences missing!") 118 119 if self.records_per_alignment is not None \ 120 and self.records_per_alignment != number_of_seqs: 121 raise ValueError("Found %i records in this alignment, told to expect %i" \ 122 % (number_of_seqs, self.records_per_alignment)) 123 124 seqs = ["" for id in ids] 125 seq_starts = [] 126 index = 0 127 128 #Parse the seqs 129 while line: 130 if len(line) > 21: 131 id_start = line[:21].strip().split(None, 1) 132 seq_end = line[21:].strip().split(None, 1) 133 if len(id_start) == 2 and len(seq_end) == 2: 134 #identifier, seq start position, seq, seq end position 135 #(an aligned seq is broken up into multiple lines) 136 id, start = id_start 137 seq, end = seq_end 138 if start==end: 139 #Special case, either a single letter is present, 140 #or no letters at all. 141 if seq.replace("-","") == "": 142 start = int(start) 143 end = int(end) 144 else: 145 start = int(start) - 1 146 end = int(end) 147 else: 148 assert seq.replace("-","") != "" 149 start = int(start)-1 #python counting 150 end = int(end) 151 152 #The identifier is truncated... 153 assert 0 <= index and index < number_of_seqs, \ 154 "Expected index %i in range [0,%i)" \ 155 % (index, number_of_seqs) 156 assert id==ids[index] or id == ids[index][:len(id)] 157 158 if len(seq_starts) == index: 159 #Record the start 160 seq_starts.append(start) 161 162 #Check the start... 163 if start == end: 164 assert seq.replace("-","") == "", line 165 else: 166 assert start - seq_starts[index] == len(seqs[index].replace("-","")), \ 167 "Found %i chars so far for sequence %i (%s, %s), line says start %i:\n%s" \ 168 % (len(seqs[index].replace("-","")), index, id, repr(seqs[index]), 169 start, line) 170 171 seqs[index] += seq 172 173 #Check the end ... 174 assert end == seq_starts[index] + len(seqs[index].replace("-","")), \ 175 "Found %i chars so far for sequence %i (%s, %s, start=%i), file says end %i:\n%s" \ 176 % (len(seqs[index].replace("-","")), index, id, repr(seqs[index]), 177 seq_starts[index], end, line) 178 179 index += 1 180 if index >= number_of_seqs: 181 index = 0 182 else: 183 #just a start value, this is just alignment annotation (?) 184 #print "Skipping: " + line.rstrip() 185 pass 186 elif line.strip() == "": 187 #Just a spacer? 188 pass 189 else: 190 print line 191 assert False 192 193 line = handle.readline() 194 if line.rstrip() == "#---------------------------------------" \ 195 or line.rstrip() == "#=======================================": 196 #End of alignment 197 self._header = line 198 break 199 200 assert index == 0 201 202 if self.records_per_alignment is not None \ 203 and self.records_per_alignment != len(ids): 204 raise ValueError("Found %i records in this alignment, told to expect %i" \ 205 % (len(ids), self.records_per_alignment)) 206 207 records = [] 208 for id, seq in zip(ids, seqs): 209 if len(seq) != length_of_seqs: 210 #EMBOSS 2.9.0 is known to use spaces instead of minus signs 211 #for leading gaps, and thus fails to parse. This old version 212 #is still used as of Dec 2008 behind the EBI SOAP webservice: 213 #http://www.ebi.ac.uk/Tools/webservices/wsdl/WSEmboss.wsdl 214 raise ValueError("Error parsing alignment - sequences of " 215 "different length? You could be using an " 216 "old version of EMBOSS.") 217 records.append(SeqRecord(Seq(seq, self.alphabet), \ 218 id=id, description=id)) 219 return MultipleSeqAlignment(records, self.alphabet)
220 221 222 if __name__ == "__main__": 223 print "Running a quick self-test" 224 225 #http://emboss.sourceforge.net/docs/themes/alnformats/align.simple 226 simple_example = \ 227 """######################################## 228 # Program: alignret 229 # Rundate: Wed Jan 16 17:16:13 2002 230 # Report_file: stdout 231 ######################################## 232 #======================================= 233 # 234 # Aligned_sequences: 4 235 # 1: IXI_234 236 # 2: IXI_235 237 # 3: IXI_236 238 # 4: IXI_237 239 # Matrix: EBLOSUM62 240 # Gap_penalty: 10.0 241 # Extend_penalty: 0.5 242 # 243 # Length: 131 244 # Identity: 95/131 (72.5%) 245 # Similarity: 127/131 (96.9%) 246 # Gaps: 25/131 (19.1%) 247 # Score: 100.0 248 # 249 # 250 #======================================= 251 252 IXI_234 1 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQAT 50 253 IXI_235 1 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQAT 41 254 IXI_236 1 TSPASIRPPAGPSSRPAMVSSR--RPSPPPPRRPPGRPCCSAAPPRPQAT 48 255 IXI_237 1 TSPASLRPPAGPSSRPAMVSSRR-RPSPPGPRRPT----CSAAPRRPQAT 45 256 |||||:|||||||||::::::: |||||:||||:::::|||||:||||| 257 258 IXI_234 51 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAG 100 259 IXI_235 42 GGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAG 81 260 IXI_236 49 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSR--G 96 261 IXI_237 46 GGYKTCSGTCTTSTSTRHRGRSGYSARTTTAACLRASRKSMRAACSR--G 93 262 ||:||||||||||||||||||||:::::::::::||||||||||||| | 263 264 IXI_234 101 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 131 265 IXI_235 82 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 112 266 IXI_236 97 SRPPRFAPPLMSSCITSTTGPPPPAGDRSHE 127 267 IXI_237 94 SRPNRFAPTLMSSCLTSTTGPPAYAGDRSHE 124 268 |||:||||:|||||:|||||||::||||||| 269 270 271 #--------------------------------------- 272 #--------------------------------------- 273 274 """ 275 276 #http://emboss.sourceforge.net/docs/themes/alnformats/align.pair 277 pair_example = \ 278 """######################################## 279 # Program: water 280 # Rundate: Wed Jan 16 17:23:19 2002 281 # Report_file: stdout 282 ######################################## 283 #======================================= 284 # 285 # Aligned_sequences: 2 286 # 1: IXI_234 287 # 2: IXI_235 288 # Matrix: EBLOSUM62 289 # Gap_penalty: 10.0 290 # Extend_penalty: 0.5 291 # 292 # Length: 131 293 # Identity: 112/131 (85.5%) 294 # Similarity: 112/131 (85.5%) 295 # Gaps: 19/131 (14.5%) 296 # Score: 591.5 297 # 298 # 299 #======================================= 300 301 IXI_234 1 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQAT 50 302 ||||||||||||||| |||||||||||||||||||||||||| 303 IXI_235 1 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQAT 41 304 305 IXI_234 51 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAG 100 306 |||||||||||||||||||||||| |||||||||||||||| 307 IXI_235 42 GGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAG 81 308 309 IXI_234 101 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 131 310 ||||||||||||||||||||||||||||||| 311 IXI_235 82 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 112 312 313 314 #--------------------------------------- 315 #--------------------------------------- 316 317 318 """ 319 320 pair_example2 = \ 321 """######################################## 322 # Program: needle 323 # Rundate: Sun 27 Apr 2007 17:20:35 324 # Commandline: needle 325 # [-asequence] Spo0F.faa 326 # [-bsequence] paired_r.faa 327 # -sformat2 pearson 328 # Align_format: srspair 329 # Report_file: ref_rec .needle 330 ######################################## 331 332 #======================================= 333 # 334 # Aligned_sequences: 2 335 # 1: ref_rec 336 # 2: gi|94968718|receiver 337 # Matrix: EBLOSUM62 338 # Gap_penalty: 10.0 339 # Extend_penalty: 0.5 340 # 341 # Length: 124 342 # Identity: 32/124 (25.8%) 343 # Similarity: 64/124 (51.6%) 344 # Gaps: 17/124 (13.7%) 345 # Score: 112.0 346 # 347 # 348 #======================================= 349 350 ref_rec 1 KILIVDD----QYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDL 46 351 :|:.|| :.|.|::|.: :.|.....:|.:|.||:.:..:..|.: 352 gi|94968718|r 1 -VLLADDHALVRRGFRLMLED--DPEIEIVAEAGDGAQAVKLAGELHPRV 47 353 354 ref_rec 47 VLLDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALT 96 355 |::|..:|||.|::..|:::....:|.|:::|.:.|...::.:.|.||.. 356 gi|94968718|r 48 VVMDCAMPGMSGMDATKQIRTQWPDIAVLMLTMHSEDTWVRLALEAGANG 97 357 358 ref_rec 97 HFAK-PFDIDEIRDAV-------- 111 359 :..| ..|:|.|: || 360 gi|94968718|r 98 YILKSAIDLDLIQ-AVRRVANGET 120 361 362 363 #======================================= 364 # 365 # Aligned_sequences: 2 366 # 1: ref_rec 367 # 2: gi|94968761|receiver 368 # Matrix: EBLOSUM62 369 # Gap_penalty: 10.0 370 # Extend_penalty: 0.5 371 # 372 # Length: 119 373 # Identity: 34/119 (28.6%) 374 # Similarity: 58/119 (48.7%) 375 # Gaps: 9/119 ( 7.6%) 376 # Score: 154.0 377 # 378 # 379 #======================================= 380 381 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLD 50 382 ||||||:......|:..|...|::.....|.::||:|...:..||:|.| 383 gi|94968761|r 1 -ILIVDDEANTLASLSRAFRLAGHEATVCDNAVRALEIAKSKPFDLILSD 49 384 385 ref_rec 51 MKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAK 100 386 :.:||.||:.:|:.:|.......|::|:....::|..::..||||....| 387 gi|94968761|r 50 VVMPGRDGLTLLEDLKTAGVQAPVVMMSGQAHIEMAVKATRLGALDFLEK 99 388 389 ref_rec 101 PFDIDEIRDAV-------- 111 390 |...|::...| 391 gi|94968761|r 100 PLSTDKLLLTVENALKLKR 118 392 393 394 #======================================= 395 # 396 # Aligned_sequences: 2 397 # 1: ref_rec 398 # 2: gi|94967506|receiver 399 # Matrix: EBLOSUM62 400 # Gap_penalty: 10.0 401 # Extend_penalty: 0.5 402 # 403 # Length: 120 404 # Identity: 29/120 (24.2%) 405 # Similarity: 53/120 (44.2%) 406 # Gaps: 9/120 ( 7.5%) 407 # Score: 121.0 408 # 409 # 410 #======================================= 411 412 ref_rec 1 -KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLL 49 413 .|::|||..|..:.:..||.:.|:..........|.:.:.....||.:: 414 gi|94967506|r 1 LHIVVVDDDPGTCVYIESVFAELGHTCKSFVRPEAAEEYILTHPVDLAIV 50 415 416 ref_rec 50 DMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFA 99 417 |:.:....|:|:|:|.:|....:..:|:|....|:|...|...||:.:.. 418 gi|94967506|r 51 DVYLGSTTGVEVLRRCRVHRPKLYAVIITGQISLEMAARSIAEGAVDYIQ 100 419 420 ref_rec 100 KPFDIDEIRDAV-------- 111 421 ||.|||.:.:.. 422 gi|94967506|r 101 KPIDIDALLNIAERALEHKE 120 423 424 425 #======================================= 426 # 427 # Aligned_sequences: 2 428 # 1: ref_rec 429 # 2: gi|94970045|receiver 430 # Matrix: EBLOSUM62 431 # Gap_penalty: 10.0 432 # Extend_penalty: 0.5 433 # 434 # Length: 118 435 # Identity: 30/118 (25.4%) 436 # Similarity: 64/118 (54.2%) 437 # Gaps: 9/118 ( 7.6%) 438 # Score: 126.0 439 # 440 # 441 #======================================= 442 443 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTK--ERPDLVL 48 444 :|:|:|:..:|....:.....||:...|.:|.:||.:.:| ||.|::: 445 gi|94970045|r 1 -VLLVEDEEALRAAAGDFLETRGYKIMTARDGTEALSMASKFAERIDVLI 49 446 447 ref_rec 49 LDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHF 98 448 .|:.:||:.|..:.:.:..|....:|:.|:.|.: :.:..:.|:.:.:.| 449 gi|94970045|r 50 TDLVMPGISGRVLAQELVKIHPETKVMYMSGYDD-ETVMVNGEIDSSSAF 98 450 451 ref_rec 99 -AKPFDID----EIRDAV 111 452 .|||.:| :||:.: 453 gi|94970045|r 99 LRKPFRMDALSAKIREVL 116 454 455 456 #======================================= 457 # 458 # Aligned_sequences: 2 459 # 1: ref_rec 460 # 2: gi|94970041|receiver 461 # Matrix: EBLOSUM62 462 # Gap_penalty: 10.0 463 # Extend_penalty: 0.5 464 # 465 # Length: 125 466 # Identity: 35/125 (28.0%) 467 # Similarity: 70/125 (56.0%) 468 # Gaps: 18/125 (14.4%) 469 # Score: 156.5 470 # 471 # 472 #======================================= 473 474 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIV--TKERPDLVL 48 475 .:|:|:|:.|:|.|:..:.:::||...:|.:|.:||:|| :.::.|::| 476 gi|94970041|r 1 TVLLVEDEEGVRKLVRGILSRQGYHVLEATSGEEALEIVRESTQKIDMLL 50 477 478 ref_rec 49 LDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHF 98 479 .|:.:.||.|.|:.:|:::...:::||.|:.|.:..:::. |.||.. 480 gi|94970041|r 51 SDVVLVGMSGRELSERLRIQMPSLKVIYMSGYTDDAIVRH----GVLTES 96 481 482 ref_rec 99 A----KPFDIDEIRDAV-------- 111 483 | |||..|.:...| 484 gi|94970041|r 97 AEFLQKPFTSDSLLRKVRAVLQKRQ 121 485 486 487 #--------------------------------------- 488 #--------------------------------------- 489 490 """ 491 492 pair_example3 = """######################################## 493 # Program: needle 494 # Rundate: Mon 14 Jul 2008 11:45:42 495 # Commandline: needle 496 # [-asequence] asis:TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAATAGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGACTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTGGGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGATACTTATTGTGTAGTAGCTCATTTTCATTATGTTCTTCGAATGGGAGCAGTCATTGGTATTTTTTTGGTTTTTTTTTGAAATTTTTAGGTTATTTAGACCATTTTTTTTTGTTTCGCTAATTAGAATTTTATTAGCCTTTGGTTTTTTTTTATTTTTTGGGGTTAAGACAAGGTGTCGTTGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATAGGATCTACCTTTTATCTTTCTAATCTTTTGTTTTAGTATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTTTTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTTTCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGTGAAAGGGGGTTAATAGC 497 # [-bsequence] asis:TTATTAATCTTATGGTTTTGCCGTAAAATTTCTTTCTTTATTTTTTATTGTTAGGATTTTGTTGATTTTATTTTTCTCAAGAATTTTTAGGTCAATTAGACCGGCTTATTTTTTTGTCAGTGTTTAAAGTTTTATTAATTTTTGGGGGGGGGGGGAGACGGGGTGTTATCTGAATTAGTTTTTGGGAGTCTCTAGACATCTCATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGGAGTAAGAATTTCGATTCAGCAACTTTAGTTCACAGTCTTTTTTTTTATTAAGAAAGGTTT 498 # -filter 499 # Align_format: srspair 500 # Report_file: stdout 501 ######################################## 502 503 #======================================= 504 # 505 # Aligned_sequences: 2 506 # 1: asis 507 # 2: asis 508 # Matrix: EDNAFULL 509 # Gap_penalty: 10.0 510 # Extend_penalty: 0.5 511 # 512 # Length: 667 513 # Identity: 210/667 (31.5%) 514 # Similarity: 210/667 (31.5%) 515 # Gaps: 408/667 (61.2%) 516 # Score: 561.0 517 # 518 # 519 #======================================= 520 521 asis 1 TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAAT 50 522 523 asis 0 -------------------------------------------------- 0 524 525 asis 51 AGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGA 100 526 527 asis 0 -------------------------------------------------- 0 528 529 asis 101 CTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTG 150 530 531 asis 0 -------------------------------------------------- 0 532 533 asis 151 GGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGA 200 534 .|||||| 535 asis 1 ------------TTATTAA------------------------------- 7 536 537 asis 201 TACTTATTGT------GTAGTAGCTCATTTTCATTATGTTCTTCGAATGG 244 538 .|||||.|| |||..|..|| ||||.||||.||.| ||.| 539 asis 8 -TCTTATGGTTTTGCCGTAAAATTTC--TTTCTTTATTTTTT----ATTG 50 540 541 asis 245 GAGCAGTCATTGGTATTTTTTTGGTTTTTTTTT------GAAATTTTTAG 288 542 ||.|.|||||.|||.||||.|||| | ||||||||| 543 asis 51 ---------TTAGGATTTTGTTGATTTTATTTTTCTCAAG-AATTTTTAG 90 544 545 asis 289 GTTATTTAGACC-----ATTTTTTTTT--GTTTCGCTAATTAGAATTTTA 331 546 ||.|.||||||| ||||||||.| ||.| |||.|.||||| 547 asis 91 GTCAATTAGACCGGCTTATTTTTTTGTCAGTGT------TTAAAGTTTTA 134 548 549 asis 332 TTAGCCTTTGGTTTTTTTTTATTTTT----TGGGGTTAAGACAAGGTGTC 377 550 ||| |||||| .||||...||||..|||||. 551 asis 135 TTA-----------------ATTTTTGGGGGGGGGGGGAGACGGGGTGTT 167 552 553 asis 378 GT-TGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATA-------- 418 554 .| ||||||||||| || ||.||.||.|| 555 asis 168 ATCTGAATTAGTTT-------------TT--GGGAGTCTCTAGACATCTC 202 556 557 asis 419 -------------GGATCTACCTTTTATCTTTCTAAT--CTTTT----GT 449 558 ||..||.||.|.|||..||||.|| ||||| | 559 asis 203 ATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGG- 251 560 561 asis 450 TTTAGT-ATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTT 498 562 ||| |.||| |||||||||.||| .||||||...||||||||| 563 asis 252 ---AGTAAGAAT-----TTCGATTCAGCAA-CTTTAGTTCACAGTCTTTT 292 564 565 asis 499 TTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTT 548 566 ||||||||..| |||||||| 567 asis 293 TTTTTATTAAG-AAAGGTTT------------------------------ 311 568 569 asis 549 TCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGT 598 570 571 asis 311 -------------------------------------------------- 311 572 573 asis 599 GAAAGGGGGTTAATAGC 615 574 575 asis 311 ----------------- 311 576 577 578 #--------------------------------------- 579 #---------------------------------------""" 580 581 from StringIO import StringIO 582 583 alignments = list(EmbossIterator(StringIO(pair_example))) 584 assert len(alignments) == 1 585 assert len(alignments[0]) == 2 586 assert [r.id for r in alignments[0]] \ 587 == ["IXI_234", "IXI_235"] 588 589 alignments = list(EmbossIterator(StringIO(simple_example))) 590 assert len(alignments) == 1 591 assert len(alignments[0]) == 4 592 assert [r.id for r in alignments[0]] \ 593 == ["IXI_234", "IXI_235", "IXI_236", "IXI_237"] 594 595 alignments = list(EmbossIterator(StringIO(pair_example + simple_example))) 596 assert len(alignments) == 2 597 assert len(alignments[0]) == 2 598 assert len(alignments[1]) == 4 599 assert [r.id for r in alignments[0]] \ 600 == ["IXI_234", "IXI_235"] 601 assert [r.id for r in alignments[1]] \ 602 == ["IXI_234", "IXI_235", "IXI_236", "IXI_237"] 603 604 alignments = list(EmbossIterator(StringIO(pair_example2))) 605 assert len(alignments) == 5 606 assert len(alignments[0]) == 2 607 assert [r.id for r in alignments[0]] \ 608 == ["ref_rec", "gi|94968718|receiver"] 609 assert [r.id for r in alignments[4]] \ 610 == ["ref_rec", "gi|94970041|receiver"] 611 612 613 alignments = list(EmbossIterator(StringIO(pair_example3))) 614 assert len(alignments) == 1 615 assert len(alignments[0]) == 2 616 assert [r.id for r in alignments[0]] \ 617 == ["asis","asis"] 618 619 print "Done" 620