1
2
3
4
5
6 """
7 This class provides code to parse BIG GenePop files.
8
9 The difference between this class and the standard Bio.PopGen.GenePop.Record
10 class is that this one does not read the whole file to memory.
11 It provides an iterator interface, slower but consuming much mess memory.
12 Should be used with big files (Thousands of markers and individuals).
13
14 See http://wbiomed.curtin.edu.au/genepop/ , the format is documented
15 here: http://wbiomed.curtin.edu.au/genepop/help_input.html .
16
17 Classes:
18 FileRecord Holds GenePop data.
19
20 Functions:
21
22
23 """
24 from copy import deepcopy
25 from Bio.PopGen.GenePop import get_indiv
26
28 """Parses a file containing a GenePop file.
29
30 fname is a file name that contains a GenePop record.
31 """
32 record = FileRecord(fname)
33 return record
34
35
37 """Holds information from a GenePop record.
38
39 Members:
40 marker_len The marker length (2 or 3 digit code per allele).
41
42 comment_line Comment line.
43
44 loci_list List of loci names.
45
46 Functions:
47 get_individual Returns the next individual of the current population.
48
49 skip_population Skips the current population.
50
51 skip_population skips the individuals of the current population, returns
52 True if there are more populations.
53
54 get_individual returns an individual of the current population (or None
55 if the list ended).
56 Each individual is a pair composed by individual
57 name and a list of alleles (2 per marker or 1 for haploid data).
58 Examples
59 ('Ind1', [(1,2), (3,3), (200,201)]
60 ('Ind2', [(2,None), (3,3), (None,None)]
61 ('Other1', [(1,1), (4,3), (200,200)]
62
63
64 """
66 self.comment_line = ""
67 self.loci_list = []
68 self.fname = fname
69 self.start_read()
70
72 """Returns (reconstructs) a GenePop textual representation.
73
74 This might take a lot of memory.
75 Marker length will be 3.
76 """
77 marker_len = 3
78 rep = [self.comment_line + '\n']
79 rep.append('\n'.join(self.loci_list) + '\n')
80 current_pop = self.current_pop
81 current_ind = self.current_ind
82 self._handle.close()
83 self._handle = open(self.fname)
84 self.skip_header()
85 rep.append('Pop\n')
86 more = True
87 while more:
88 res = self.get_individual()
89 if res == True:
90 rep.append('Pop\n')
91 elif res == False:
92 more = False
93 else:
94 name, markers = res
95 rep.append(name)
96 rep.append(',')
97 for marker in markers:
98 rep.append(' ')
99 for al in marker:
100 if al == None:
101 al = '0'
102 aStr = str(al)
103 while len(aStr)<marker_len:
104 aStr = "".join(['0', aStr])
105 rep.append(aStr)
106 rep.append('\n')
107 self.seek_position(current_pop, current_ind)
108 return "".join(rep)
109
110
112 """Starts parsing a file containing a GenePop file.
113 """
114 self._handle = open(self.fname)
115 self.comment_line = self._handle.next().rstrip()
116
117
118
119 sample_loci_line = self._handle.next().rstrip().replace(',', '')
120 all_loci = sample_loci_line.split(' ')
121 self.loci_list.extend(all_loci)
122 for line in self._handle:
123 line = line.rstrip()
124 if line.upper()=='POP':
125 break
126 self.loci_list.append(line)
127 else:
128 raise ValueError('No population data found, file probably not GenePop related')
129
130 self.current_pop = 0
131 self.current_ind = 0
132
134 """Skips the Header. To be done after a re-open."""
135 self.current_pop = 0
136 self.current_ind = 0
137 for line in self._handle:
138 if line.rstrip().upper()=="POP":
139 return
140
142 """Seeks a certain position in the file.
143
144 pop - pop position (0 is first)
145 indiv - individual in pop
146 """
147 self._handle.close()
148 self._handle = open(self.fname)
149 self.skip_header()
150 while pop>0:
151 self.skip_population()
152 pop -= 1
153 while indiv>0:
154 self.get_individual()
155 indiv -= 1
156
158 "Skips the current population. Returns true if there is another pop."
159 for line in self._handle:
160 if line=="":
161 return False
162 line = line.rstrip()
163 if line.upper()=='POP':
164 self.current_pop += 1
165 self.current_ind = 0
166 return True
167
169 """Gets the next individual.
170
171 Returns individual information if there are more individuals
172 in the current population.
173 Returns True if there are no more individuals in the current
174 population, but there are more populations. Next read will
175 be of the following pop.
176 Returns False if at end of file.
177 """
178 marker_len = None
179 for line in self._handle:
180 line = line.rstrip()
181 if line.upper()=='POP':
182 self.current_pop += 1
183 self.current_ind = 0
184 return True
185 else:
186 self.current_ind += 1
187 indiv_name, allele_list, ignore = get_indiv(line)
188 return (indiv_name, allele_list)
189 return False
190
192 """Removes a population (by position).
193
194 pos - position
195 fw - A file handle (write enabled) to write the new record
196 """
197
198 pass
199
201 """Removes a locus by position.
202
203 pos - position
204 fw - A file handle (write enabled) to write the new record
205 """
206
207
208
209
210
211
213 """Removes a locus by name.
214
215 name - name
216 fw - A file handle (write enabled) to write the new record
217 """
218 for i in range(len(self.loci_list)):
219 if self.loci_list[i] == name:
220 self.remove_locus_by_position(i, fw)
221 return
222
223
224