1
2
3
4
5
6 """
7 Large file parsing of Genepop files
8
9 The standard parser loads the whole file into memory. This parser
10 provides an iterator over data.
11
12 Classes:
13 LargeRecord Holds GenePop data.
14
15 Functions:
16 read Parses a GenePop record (file) into a Record object.
17
18 """
19
20 from copy import deepcopy
21
22
24 indiv_name, marker_line = line.split(',')
25 markers = marker_line.replace('\t', ' ').split(' ')
26 markers = [marker for marker in markers if marker!='']
27 if len(markers[0]) in [2, 4]:
28 marker_len = 2
29 else:
30 marker_len = 3
31 try:
32 allele_list = [(int(marker[0:marker_len]),
33 int(marker[marker_len:]))
34 for marker in markers]
35 except ValueError:
36 allele_list = [(int(marker[0:marker_len]),)
37 for marker in markers]
38 return indiv_name, allele_list, marker_len
39
65
66
68 """Holds information from a GenePop record.
69
70 Members:
71 marker_len The marker length (2 or 3 digit code per allele).
72
73 comment_line Comment line.
74
75 loci_list List of loci names.
76
77 data_generator Iterates over population data.
78
79 The generator will only work once. If you want to read a handle
80 twice you have to re-open it!
81
82 data_generator can either be () - an empty tuple - marking a new
83 population or an individual. An individual is something like
84 ('Ind1', [(1,1), (3,None), (200,201)],
85 In the case above the individual is called Ind1,
86 has three diploid loci. For the second loci, one of the alleles
87 is unknown.
88
89
90 """
92 self.handle = handle
93 self.marker_len = 0
94 self.comment_line = ""
95 self.loci_list = []
96 self.populations = []
97 self.data_generator = None
98 self.stack = []
99
101 for handle in [self.stack, self.handle]:
102 for line in handle:
103 line = line.rstrip()
104 if line.upper()=='POP':
105 yield ()
106 else:
107 indiv_name, allele_list, marker_len = get_indiv(line)
108 clean_list = []
109 for locus in allele_list:
110 mk_real = []
111 for al in locus:
112 if al==0:
113 mk_real.append(None)
114 else:
115 mk_real.append(al)
116 clean_list.append(tuple(mk_real))
117 yield indiv_name, clean_list
118 raise StopIteration()
119