Trees | Indices | Help |
---|
|
1 # Copyright 2004 by James Casbon. All rights reserved. 2 # This code is part of the Biopython distribution and governed by its 3 # license. Please see the LICENSE file that should have been included 4 # as part of this package. 5 6 """ 7 Code to deal with COMPASS output, a program for profile/profile comparison. 8 9 Compass is described in: 10 11 Sadreyev R, Grishin N. COMPASS: a tool for comparison of multiple protein 12 alignments with assessment of statistical significance. J Mol Biol. 2003 Feb 13 7;326(1):317-36. 14 15 Tested with COMPASS 1.24. 16 17 Functions: 18 read Reads a COMPASS file containing one COMPASS record 19 parse Iterates over records in a COMPASS file. 20 21 Classes: 22 Record One result of a COMPASS file 23 24 OBSOLETE CLASSES: 25 26 _Scanner Scan compass results 27 _Consumer Consume scanner events 28 RecordParser Parse one compass record 29 Iterator Iterate through a number of compass records 30 """ 31 import re 32 33 3436 record = None 37 try: 38 line = handle.next() 39 record = Record() 40 __read_names(record, line) 41 line = handle.next() 42 __read_threshold(record, line) 43 line = handle.next() 44 __read_lengths(record, line) 45 line = handle.next() 46 __read_profilewidth(record, line) 47 line = handle.next() 48 __read_scores(record, line) 49 except StopIteration: 50 if not record: 51 raise ValueError("No record found in handle") 52 else: 53 raise ValueError("Unexpected end of stream.") 54 for line in handle: 55 if is_blank_line(line): 56 continue 57 __read_query_alignment(record, line) 58 try: 59 line = handle.next() 60 __read_positive_alignment(record, line) 61 line = handle.next() 62 __read_hit_alignment(record, line) 63 except StopIteration: 64 raise ValueError("Unexpected end of stream.") 65 return record6668 record = None 69 try: 70 line = handle.next() 71 except StopIteration: 72 return 73 while True: 74 try: 75 record = Record() 76 __read_names(record, line) 77 line = handle.next() 78 __read_threshold(record, line) 79 line = handle.next() 80 __read_lengths(record, line) 81 line = handle.next() 82 __read_profilewidth(record, line) 83 line = handle.next() 84 __read_scores(record, line) 85 except StopIteration: 86 raise ValueError("Unexpected end of stream.") 87 for line in handle: 88 if not line.strip(): 89 continue 90 if "Ali1:" in line: 91 yield record 92 break 93 __read_query_alignment(record, line) 94 try: 95 line = handle.next() 96 __read_positive_alignment(record, line) 97 line = handle.next() 98 __read_hit_alignment(record, line) 99 except StopIteration: 100 raise ValueError("Unexpected end of stream.") 101 else: 102 yield record 103 break104106 """ 107 Hold information from one compass hit. 108 Ali1 one is the query, Ali2 the hit. 109 """ 110141 142 # Everything below is private 143 144 __regex = {"names": re.compile("Ali1:\s+(\S+)\s+Ali2:\s+(\S+)\s+"), 145 "threshold": re.compile("Threshold of effective gap content in columns: (\S+)"), 146 "lengths": re.compile("length1=(\S+)\s+filtered_length1=(\S+)\s+length2=(\S+)\s+filtered_length2=(\S+)"), 147 "profilewidth": re.compile("Nseqs1=(\S+)\s+Neff1=(\S+)\s+Nseqs2=(\S+)\s+Neff2=(\S+)"), 148 "scores": re.compile("Smith-Waterman score = (\S+)\s+Evalue = (\S+)"), 149 "start": re.compile("(\d+)"), 150 "align": re.compile("^.{15}(\S+)"), 151 "positive_alignment": re.compile("^.{15}(.+)"), 152 } 153112 self.query='' 113 self.hit='' 114 self.gap_threshold=0 115 self.query_length=0 116 self.query_filtered_length=0 117 self.query_nseqs=0 118 self.query_neffseqs=0 119 self.hit_length=0 120 self.hit_filtered_length=0 121 self.hit_nseqs=0 122 self.hit_neffseqs=0 123 self.sw_score=0 124 self.evalue=-1 125 self.query_start=-1 126 self.hit_start=-1 127 self.query_aln='' 128 self.hit_aln='' 129 self.positives=''130 131133 """Return the length of the query covered in alignment""" 134 s = self.query_aln.replace("=", "") 135 return len(s)136155 """ 156 Ali1: 60456.blo.gz.aln Ali2: allscop//14984.blo.gz.aln 157 ------query----- -------hit------------- 158 """ 159 if not "Ali1:" in line: 160 raise ValueError("Line does not contain 'Ali1:':\n%s" % line) 161 m = __regex["names"].search(line) 162 record.query = m.group(1) 163 record.hit = m.group(2)164166 if not line.startswith("Threshold"): 167 raise ValueError("Line does not start with 'Threshold':\n%s" % line) 168 m = __regex["threshold"].search(line) 169 record.gap_threshold = float(m.group(1))170172 if not line.startswith("length1="): 173 raise ValueError("Line does not start with 'length1=':\n%s" % line) 174 m = __regex["lengths"].search(line) 175 record.query_length = int(m.group(1)) 176 record.query_filtered_length = float(m.group(2)) 177 record.hit_length = int(m.group(3)) 178 record.hit_filtered_length = float(m.group(4))179181 if not "Nseqs1" in line: 182 raise ValueError("Line does not contain 'Nseqs1':\n%s" % line) 183 m = __regex["profilewidth"].search(line) 184 record.query_nseqs = int(m.group(1)) 185 record.query_neffseqs = float(m.group(2)) 186 record.hit_nseqs = int(m.group(3)) 187 record.hit_neffseqs = float(m.group(4))188190 if not line.startswith("Smith-Waterman"): 191 raise ValueError("Line does not start with 'Smith-Waterman':\n%s" % line) 192 m = __regex["scores"].search(line) 193 if m: 194 record.sw_score = int(m.group(1)) 195 record.evalue = float(m.group(2)) 196 else: 197 record.sw_score = 0 198 record.evalue = -1.0199201 m = __regex["start"].search(line) 202 if m: 203 record.query_start = int(m.group(1)) 204 m = __regex["align"].match(line) 205 assert m!=None, "invalid match" 206 record.query_aln += m.group(1)207209 m = __regex["positive_alignment"].match(line) 210 assert m!=None, "invalid match" 211 record.positives += m.group(1)212214 m = __regex["start"].search(line) 215 if m: 216 record.hit_start = int(m.group(1)) 217 m = __regex["align"].match(line) 218 assert m!=None, "invalid match" 219 record.hit_aln += m.group(1)220 221 # Everything below is obsolete 222 223 from Bio import File 224 from Bio.ParserSupport import * 225 226228 """Reads compass output and generate events (OBSOLETE)""" 229305231 """Feed in COMPASS ouput""" 232 233 if isinstance(handle, File.UndoHandle): 234 pass 235 else: 236 handle = File.UndoHandle(handle) 237 238 239 assert isinstance(handle, File.UndoHandle), \ 240 "handle must be an UndoHandle" 241 if handle.peekline(): 242 self._scan_record(handle, consumer)243 244246 self._scan_names(handle, consumer) 247 self._scan_threshold(handle, consumer) 248 self._scan_lengths(handle,consumer) 249 self._scan_profilewidth(handle, consumer) 250 self._scan_scores(handle,consumer) 251 self._scan_alignment(handle,consumer)252254 """ 255 Ali1: 60456.blo.gz.aln Ali2: allscop//14984.blo.gz.aln 256 """ 257 read_and_call(handle, consumer.names, contains="Ali1:")258260 """ 261 Threshold of effective gap content in columns: 0.5 262 """ 263 read_and_call(handle, consumer.threshold, start="Threshold")264266 """ 267 length1=388 filtered_length1=386 length2=145 filtered_length2=137 268 """ 269 read_and_call(handle, consumer.lengths, start="length1=")270272 """ 273 Nseqs1=399 Neff1=12.972 Nseqs2=1 Neff2=6.099 274 """ 275 read_and_call(handle, consumer.profilewidth, contains="Nseqs1")276278 """ 279 Smith-Waterman score = 37 Evalue = 5.75e+02 280 """ 281 read_and_call(handle, consumer.scores, start="Smith-Waterman")282284 """ 285 QUERY 2 LSDRLELVSASEIRKLFDIAAGMKDVISLGIGEPDFDTPQHIKEYAKEALDKGLTHYGPN 286 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 287 QUERY 2 LSDRLELVSASEIRKLFDIAAGMKDVISLGIGEPDFDTPQHIKEYAKEALDKGLTHYGPN 288 289 290 QUERY IGLLELREAIAEKLKKQNGIEADPKTEIMVLLGANQAFLMGLSAFLKDGEEVLIPTPAFV 291 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 292 QUERY IGLLELREAIAEKLKKQNGIEADPKTEIMVLLGANQAFLMGLSAFLKDGEEVLIPTPAFV 293 294 """ 295 while 1: 296 line = handle.readline() 297 if not line: 298 break 299 if is_blank_line(line): 300 continue 301 else: 302 consumer.query_alignment(line) 303 read_and_call(handle, consumer.positive_alignment) 304 read_and_call(handle, consumer.hit_alignment)307 # all regular expressions used -- compile only once 308 _re_names = re.compile("Ali1:\s+(\S+)\s+Ali2:\s+(\S+)\s+") 309 _re_threshold = \ 310 re.compile("Threshold of effective gap content in columns: (\S+)") 311 _re_lengths = \ 312 re.compile("length1=(\S+)\s+filtered_length1=(\S+)\s+length2=(\S+)" 313 + "\s+filtered_length2=(\S+)") 314 _re_profilewidth = \ 315 re.compile("Nseqs1=(\S+)\s+Neff1=(\S+)\s+Nseqs2=(\S+)\s+Neff2=(\S+)") 316 _re_scores = re.compile("Smith-Waterman score = (\S+)\s+Evalue = (\S+)") 317 _re_start = re.compile("(\d+)") 318 _re_align = re.compile("^.{15}(\S+)") 319 _re_positive_alignment = re.compile("^.{15}(.+)") 320381322 self.data = None323325 """ 326 Ali1: 60456.blo.gz.aln Ali2: allscop//14984.blo.gz.aln 327 ------query----- -------hit------------- 328 """ 329 self.data = Record() 330 m = self.__class__._re_names.search(line) 331 self.data.query = m.group(1) 332 self.data.hit = m.group(2)333 337339 m = self.__class__._re_lengths.search(line) 340 self.data.query_length = int(m.group(1)) 341 self.data.query_filtered_length = float(m.group(2)) 342 self.data.hit_length = int(m.group(3)) 343 self.data.hit_filtered_length = float(m.group(4))344346 m = self.__class__._re_profilewidth.search(line) 347 self.data.query_nseqs = int(m.group(1)) 348 self.data.query_neffseqs = float(m.group(2)) 349 self.data.hit_nseqs = int(m.group(3)) 350 self.data.hit_neffseqs = float(m.group(4))351353 m = self.__class__._re_scores.search(line) 354 if m: 355 self.data.sw_score = int(m.group(1)) 356 self.data.evalue = float(m.group(2)) 357 else: 358 self.data.sw_score = 0 359 self.data.evalue = -1.0360362 m = self.__class__._re_start.search(line) 363 if m: 364 self.data.query_start = int(m.group(1)) 365 m = self.__class__._re_align.match(line) 366 assert m!=None, "invalid match" 367 self.data.query_aln = self.data.query_aln + m.group(1)368370 m = self.__class__._re_positive_alignment.match(line) 371 assert m!=None, "invalid match" 372 self.data.positives = self.data.positives + m.group(1)373383 """Parses compass results into a Record object (OBSOLETE). 384 """ 388 389397391 if isinstance(handle, File.UndoHandle): 392 uhandle = handle 393 else: 394 uhandle = File.UndoHandle(handle) 395 self._scanner.feed(uhandle, self._consumer) 396 return self._consumer.data399 """Iterate through a file of compass results (OBSOLETE).""" 403422405 lines = [] 406 while 1: 407 line = self._uhandle.readline() 408 if not line: 409 break 410 if line[0:4] == "Ali1" and lines: 411 self._uhandle.saveline(line) 412 break 413 414 lines.append(line) 415 416 417 if not lines: 418 return None 419 420 data = ''.join(lines) 421 return self._parser.parse(File.StringHandle(data))
Trees | Indices | Help |
---|
Generated by Epydoc 3.0.1 on Wed Jul 14 01:50:40 2010 | http://epydoc.sourceforge.net |