Package Bio :: Package SeqIO :: Module SwissIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.SeqIO.SwissIO

  1  # Copyright 2006-2010 by Peter Cock and Michiel de Hoon. 
  2  # All rights reserved. 
  3  # 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7   
  8  """Bio.SeqIO support for the "swiss" (aka SwissProt/UniProt) file format. 
  9   
 10  You are expected to use this module via the Bio.SeqIO functions. 
 11  See also the Bio.SwissProt module which offers more than just accessing 
 12  the sequences as SeqRecord objects.""" 
 13   
 14  from Bio import Seq 
 15  from Bio import SeqRecord 
 16  from Bio import Alphabet 
 17  from Bio import SeqFeature 
 18  from Bio import SwissProt 
 19       
 20  #This is a generator function! 
21 -def SwissIterator(handle):
22 """Breaks up a Swiss-Prot/UniProt file into SeqRecord objects. 23 24 Every section from the ID line to the terminating // becomes 25 a single SeqRecord with associated annotation and features. 26 27 This parser is for the flat file "swiss" format as used by: 28 * Swiss-Prot aka SwissProt 29 * TrEMBL 30 * UniProtKB aka UniProt Knowledgebase 31 32 It does NOT read their new XML file format. 33 http://www.expasy.org/sprot/ 34 35 For consistency with BioPerl and EMBOSS we call this the "swiss" 36 format. 37 """ 38 swiss_records = SwissProt.parse(handle) 39 for swiss_record in swiss_records: 40 # Convert the SwissProt record to a SeqRecord 41 seq = Seq.Seq(swiss_record.sequence, Alphabet.generic_protein) 42 record = SeqRecord.SeqRecord(seq, 43 id=swiss_record.accessions[0], 44 name=swiss_record.entry_name, 45 description=swiss_record.description, 46 ) 47 record.description = swiss_record.description 48 for cross_reference in swiss_record.cross_references: 49 if len(cross_reference) < 2: 50 continue 51 database, accession = cross_reference[:2] 52 dbxref = "%s:%s" % (database, accession) 53 if not dbxref in record.dbxrefs: 54 record.dbxrefs.append(dbxref) 55 annotations = record.annotations 56 annotations['accessions'] = swiss_record.accessions 57 annotations['date'] = swiss_record.created[0] 58 annotations['date_last_sequence_update'] = swiss_record.sequence_update[0] 59 if swiss_record.annotation_update: 60 annotations['date_last_annotation_update'] = swiss_record.annotation_update[0] 61 if swiss_record.gene_name: 62 annotations['gene_name'] = swiss_record.gene_name 63 annotations['organism'] = swiss_record.organism.rstrip(".") 64 annotations['taxonomy'] = swiss_record.organism_classification 65 annotations['ncbi_taxid'] = swiss_record.taxonomy_id 66 if swiss_record.host_organism: 67 annotations['organism_host'] = [word.rstrip(".") \ 68 for word \ 69 in swiss_record.host_organism] 70 if swiss_record.comments: 71 annotations['comment'] = "\n".join(swiss_record.comments) 72 if swiss_record.references: 73 annotations['references'] = [] 74 for reference in swiss_record.references: 75 feature = SeqFeature.Reference() 76 feature.comment = " ".join(["%s=%s;" % (key, value) \ 77 for key, value \ 78 in reference.comments]) 79 for key, value in reference.references: 80 if key == 'PubMed': 81 feature.pubmed_id = value 82 elif key == 'MEDLINE': 83 feature.medline_id = value 84 elif key == 'DOI': 85 pass 86 elif key == 'AGRICOLA': 87 pass 88 else: 89 raise ValueError(\ 90 "Unknown key %s found in references" % key) 91 feature.authors = reference.authors 92 feature.title = reference.title 93 feature.journal = reference.location 94 annotations['references'].append(feature) 95 if swiss_record.keywords: 96 record.annotations['keywords'] = swiss_record.keywords 97 yield record
98 99 if __name__ == "__main__": 100 print "Quick self test..." 101 102 example_filename = "../../Tests/SwissProt/sp008" 103 104 import os 105 if not os.path.isfile(example_filename): 106 print "Missing test file %s" % example_filename 107 else: 108 #Try parsing it! 109 handle = open(example_filename) 110 records = SwissIterator(handle) 111 for record in records: 112 print record.name 113 print record.id 114 print record.annotations['keywords'] 115 print repr(record.annotations['organism']) 116 print record.seq.tostring()[:20] + "..." 117 handle.close() 118