1
2
3
4
5
6 """
7 This module provides code to work with GenePop.
8
9 See http://wbiomed.curtin.edu.au/genepop/ , the format is documented
10 here: http://wbiomed.curtin.edu.au/genepop/help_input.html .
11
12 Classes:
13 Record Holds GenePop data.
14
15 Functions:
16 read Parses a GenePop record (file) into a Record object.
17
18
19 Obsolete classes:
20 RecordParser Parses a GenePop record (file) into a Record object.
21
22 _Scanner Scans a GenePop record.
23 _RecordConsumer Consumes GenePop data to a Record object.
24
25 Partially inspired on MedLine Code.
26
27 """
28 from copy import deepcopy
29
30
32 indiv_name, marker_line = line.split(',')
33 markers = marker_line.replace('\t', ' ').split(' ')
34 markers = [marker for marker in markers if marker!='']
35 if len(markers[0]) in [2, 4]:
36 marker_len = 2
37 else:
38 marker_len = 3
39 try:
40 allele_list = [(int(marker[0:marker_len]),
41 int(marker[marker_len:]))
42 for marker in markers]
43 except ValueError:
44 allele_list = [(int(marker[0:marker_len]),)
45 for marker in markers]
46 return indiv_name, allele_list, marker_len
47
49 """Parses a handle containing a GenePop file.
50
51 handle is a file-like object that contains a GenePop record.
52 """
53 record = Record()
54 record.comment_line = handle.next().rstrip()
55
56
57
58 sample_loci_line = handle.next().rstrip().replace(',', '')
59 all_loci = sample_loci_line.split(' ')
60 record.loci_list.extend(all_loci)
61 for line in handle:
62 line = line.rstrip()
63 if line.upper()=='POP':
64 break
65 record.loci_list.append(line)
66 else:
67 raise ValueError('No population data found, file probably not GenePop related')
68 record.populations.append([])
69 for line in handle:
70 line = line.rstrip()
71 if line.upper()=='POP':
72 record.populations.append([])
73 else:
74 indiv_name, allele_list, record.marker_len = get_indiv(line)
75 record.populations[-1].append((indiv_name, allele_list))
76 loci = record.loci_list
77 for pop in record.populations:
78 record.pop_list.append(pop[-1][0])
79 for indiv in pop:
80 for mk_i in range(len(loci)):
81 mk_orig = indiv[1][mk_i]
82 mk_real = []
83 for al in mk_orig:
84 if al == 0:
85 mk_real.append(None)
86 else:
87 mk_real.append(al)
88 indiv[1][mk_i] = tuple(mk_real)
89 return record
90
91
93 """Holds information from a GenePop record.
94
95 Members:
96 marker_len The marker length (2 or 3 digit code per allele).
97
98 comment_line Comment line.
99
100 loci_list List of loci names.
101
102 pop_list List of population names.
103
104 populations List of population data.
105
106 In most genepop files, the population name is not trustable.
107 It is strongly recommended that populations are referred by index.
108
109 populations has one element per population. Each element is itself
110 a list of individuals, each individual is a pair composed by individual
111 name and a list of alleles (2 per marker or 1 for haploids): Example
112 [
113 [
114 ('Ind1', [(1,2), (3,3), (200,201)],
115 ('Ind2', [(2,None), (3,3), (None,None)],
116 ],
117 [
118 ('Other1', [(1,1), (4,3), (200,200)],
119 ]
120 ]
121
122
123 """
125 self.marker_len = 0
126 self.comment_line = ""
127 self.loci_list = []
128 self.pop_list = []
129 self.populations = []
130
132 """Returns (reconstructs) a GenePop textual representation.
133 """
134 rep = [self.comment_line + '\n']
135 rep.append('\n'.join(self.loci_list) + '\n')
136 for pop in self.populations:
137 rep.append('Pop\n')
138 for indiv in pop:
139 name, markers = indiv
140 rep.append(name)
141 rep.append(',')
142 for marker in markers:
143 rep.append(' ')
144 for al in marker:
145 if al == None:
146 al = '0'
147 aStr = str(al)
148 while len(aStr)<self.marker_len:
149 aStr = "".join(['0', aStr])
150 rep.append(aStr)
151 rep.append('\n')
152 return "".join(rep)
153
155 """Splits a GP record in a dictionary with 1 pop per entry.
156
157 Given a record with n pops and m loci returns a dictionary
158 of records (key pop_name) where each item is a record
159 with a single pop and m loci.
160
161 Parameters:
162 pop_names - Population names
163 """
164 gp_pops = {}
165 for i in range(len(self.populations)):
166 gp_pop = Record()
167 gp_pop.marker_len = self.marker_len
168 gp_pop.comment_line = self.comment_line
169 gp_pop.loci_list = deepcopy(self.loci_list)
170 gp_pop.populations = [deepcopy(self.populations[i])]
171 gp_pops[pop_names[i]] = gp_pop
172 return gp_pops
173
175 """Splits a GP record in a dictionary with 1 locus per entry.
176
177 Given a record with n pops and m loci returns a dictionary
178 of records (key locus name) where each item is a record
179 with a single locus and n pops.
180 """
181 gp_loci = {}
182 for i in range(len(self.loci_list)):
183 gp_pop = Record()
184 gp_pop.marker_len = self.marker_len
185 gp_pop.comment_line = self.comment_line
186 gp_pop.loci_list = [self.loci_list[i]]
187 gp_pop.populations = []
188 for pop in self.populations:
189 my_pop = []
190 for indiv in pop:
191 my_pop.append((indiv[0], [indiv[1][i]]))
192 gp_pop.populations.append(my_pop)
193 gp_loci[gp_pop.loci_list[0]] = gp_pop
194 return gp_loci
195
196
198 """Removes a population (by position).
199 """
200 del self.populations[pos]
201
203 """Removes a locus by position.
204 """
205 del self.loci_list[pos]
206 for pop in self.populations:
207 for indiv in pop:
208 name, loci = indiv
209 del loci[pos]
210
218
219
220