1 # Standard Content and Dispatch handlers for the Bioformat IO system
2 # This is a Python module.
3 """This module is DEPRECATED.
5 Andrew Dalke is no longer maintaining Martel or Bio.Mindy, and these modules
6 and associate ones like Bio.StdHandler are now deprecated. They are no longer
7 used in any of the current Biopython parsers, and are likely to be removed
12 warnings.warn("Martel and those parts of Biopython depending on it" \
13 +" directly (such as Bio.Mindy and Bio.StdHandler) are now" \
14 +" deprecated, and will be removed in a future release of"\
15 +" Biopython. If you want to continue to use this code,"\
16 +" please get in contact with the Biopython developers via"\
17 +" the mailing lists to avoid its permanent removal from"\
21 from xml.sax import handler
22 from Martel import Parser, Dispatch
23 from Bio import Std, Decode
25 ###################################
27 # Helper functions to make functions
29 def add_int_handler(klass, tag, attrname):
30 assert not hasattr(klass, "start_" +tag), "existing method exists"
31 assert not hasattr(klass, "end_" +tag), "existing method exists"
33 def start(self, tag, attrs):
34 self.save_characters()
36 self.%s = int(self.get_characters())
40 setattr(klass, "start_" + tag, d["start"])
41 setattr(klass, "end_" + tag, d["end"])
43 def add_text_handler(klass, tag, attrname):
44 assert not hasattr(klass, "start_" +tag), "existing method exists"
45 assert not hasattr(klass, "end_" +tag), "existing method exists"
47 def start(self, tag, attrs):
48 self.save_characters()
50 self.%s = self.get_characters()
54 setattr(klass, "start_" + tag, d["start"])
55 setattr(klass, "end_" + tag, d["end"])
57 def add_text_dict_handler(klass, tag, attrname, key):
58 assert not hasattr(klass, "start_" +tag), "existing method exists"
59 assert not hasattr(klass, "end_" +tag), "existing method exists"
61 def start(self, tag, attrs):
62 self.save_characters()
64 self.%s["%s"] = self.get_characters()
68 setattr(klass, "start_" + tag, d["start"])
69 setattr(klass, "end_" + tag, d["end"])
71 def add_text_decode_handler(klass, tag, attrname):
72 assert not hasattr(klass, "start_" +tag), "existing method exists"
73 assert not hasattr(klass, "end_" +tag), "existing method exists"
75 def start(self, tag, attrs):
76 self.save_characters()
77 self._decode_%s = attrs.get("bioformat:decode", None)
79 if self._decode_%s is not None:
80 s = Decode.make_decoder(self._decode_%s)(s)
81 self.%s = self.get_characters()
82 """ % (tag, tag, tag, attrname)
83 d = {"Decode": Decode}
85 setattr(klass, "start_" + tag, d["start"])
86 setattr(klass, "end_" + tag, d["end"])
88 def add_first_text_handler(klass, tag, attrname):
89 assert not hasattr(klass, "start_" +tag), "existing method exists"
90 assert not hasattr(klass, "end_" +tag), "existing method exists"
92 def start(self, tag, attrs):
94 self.save_characters()
97 self.%s = self.get_characters()
98 """ % (attrname, attrname, attrname)
101 setattr(klass, "start_" + tag, d["start"])
102 setattr(klass, "end_" + tag, d["end"])
104 def add_text_block_handler(klass, tag, joinattr, defaultjoin, attrname):
105 assert not hasattr(klass, "start_" + tag), "existing method exists"
106 assert not hasattr(klass, "end_" + tag), "existing method exists"
107 assert not hasattr(klass, "start_"+tag+"_block"), "existing method exists"
108 assert not hasattr(klass, "end_" +tag+"_block"), "existing method exists"
110 def start_block(self, tag, attrs):
111 self._%(tag)s_join_func = Decode.make_decoder(attrs.get(%(joinattr)r, %(defaultjoin)r))
112 self._%(tag)s_lines = []
113 def end_block(self, tag):
114 self.%(attrname)s = self._%(tag)s_join_func(self._%(tag)s_lines)
115 def start(self, tag, attrs):
116 self.save_characters()
118 self._%(tag)s_lines.append(self.get_characters())
120 d = {"Decode": Decode}
122 setattr(klass, "start_" + tag, d["start"])
123 setattr(klass, "end_" + tag, d["end"])
124 setattr(klass, "start_" + tag + "_block", d["start_block"])
125 setattr(klass, "end_" + tag + "_block", d["end_block"])
127 def add_value_handler(klass, tag, attrname):
128 assert not hasattr(klass, "start_" +tag), "existing method exists"
129 assert not hasattr(klass, "end_" +tag), "existing method exists"
131 def start(self, tag, attrs):
132 self._%(tag)s_name = attrs["name"]
133 self._%(tag)s_decode = attrs.get("bioformat:decode", None)
134 self.save_characters()
136 s = self.get_characters()
137 if self._%(tag)s_decode is not None:
138 s = Decode.make_decoder(self._%(tag)s_decode)(s)
139 self.%(attrname)s[self._%(tag)s_name] = s
141 d = {"Decode": Decode}
143 setattr(klass, "start_" + tag, d["start"])
144 setattr(klass, "end_" + tag, d["end"])
147 #################################
149 class ConvertHandler(handler.ContentHandler):
150 """Used to read records and produce output"""
151 def __init__(self, record_builder, writer, record_tag = "record"):
152 handler.ContentHandler.__init__(self)
153 self.record_builder = record_builder
155 self.record_tag = record_tag
157 def startDocument(self):
158 self.inside_record = 0
159 self.characters = self.ignore_characters
161 def startElement(self, tag, attrs):
162 if self.inside_record:
163 self.record_builder.startElement(tag, attrs)
164 elif tag == self.record_tag:
165 self.record_builder.startDocument()
166 self.inside_record = 1
167 self.characters = self.record_builder.characters
168 self.record_builder.startElement(tag, attrs)
170 def endElement(self, tag):
171 if self.inside_record:
172 self.record_builder.endElement(tag)
173 if tag == self.record_tag:
174 self.record_builder.endDocument()
175 self.writer.write(self.record_builder.document)
176 self.inside_record = 0
177 self.characters = self.ignore_characters
179 def ignore_characters(self, s):
182 class ConvertDispatchHandler(Dispatch.Dispatcher):
183 """Used to read records and produce output through a Dispatcher"""
184 def __init__(self, record_builder, writer, record_tag = "record"):
185 setattr(self, "end_" + record_tag, self.write_record)
186 Dispatch.Dispatcher.__init__(self,
187 remap = {record_tag: "bioformat:"}
189 self.acquire(record_builder)
190 self.record_builder = record_builder
192 self.record_tag = record_tag
193 def write_record(self, tag):
194 self.writer.write(self.record_builder.document)
198 class RecognizeHandler(handler.ContentHandler, handler.ErrorHandler):
203 def fatalError(self, exc):
204 if isinstance(exc, Parser.ParserIncompleteException):
213 def endElement(self, tag):
215 raise Parser.ParserException("we finished a record!")
219 class Handle_dbid(Dispatch.Callback):
220 def start_dbid(self, tag, attrs):
222 self.save_characters()
224 def end_dbid(self, tag):
225 text = self.get_characters()
226 self.callback(text, self.attrs)
229 class Handle_description(Dispatch.Callback):
230 def start_description_block(self, tag, attrs):
231 j = attrs.get("join", None)
233 self.join_fctn = Decode.join_fixspaces
235 self.join_fctn = Decode.make_typechecked_decoder(j, list, str)
236 self.descriptions = []
237 def start_description(self, tag, attrs):
238 self.save_characters()
239 def end_description(self, tag):
240 x = self.get_characters()
241 self.descriptions.append(x)
242 def end_description_block(self, tag):
243 self.callback(self.join_fctn(self.descriptions))
245 #### There can be multiple dbxref_dbids in a dbxref
246 # DR EMBL; X64411; CAA45756.1; -.
247 # <dbxref><..dbname style="swiss">EMBL</..dbname>
248 # <dbid type="primary">X64411</dbid>
249 # <dbid type="accession">CAA45756.1</dbid>
252 # DR P35156, YPUI_BACSU, F;
253 # <dbxref><dbid type="primary" dbname="sprot">P35156</dbid>
254 # <dbid type="accession" dbname="sprot">YPUI_BACSU</dbid>
258 def _fixup_sp_pattern(exp):
261 exp = Martel.select_names(exp, (Std.dbxref_dbname.tag,Std.dbxref_dbid.tag))
263 e = exp._find_groups(Std.dbxref_dbname.tag)
267 dbstyle = e.attrs["style"]
269 e = exp._find_groups(Std.dbxref_dbid.tag)
271 e[0].name = "primary_dbid"
272 primary_type = e[0].attrs["type"]
274 e[1].name = "secondary_dbid"
275 secondary_type = e[1].attrs["type"]
277 pattern = str(exp) + "$"
278 pat = re.compile(pattern)
279 return pat, dbstyle, primary_type, secondary_type
281 # Turns out these 'fast' versions speed up the dbxref code by about
284 # DR PIR; S08427; S08427.
285 _fast_dbxref_sp_general_data = None
286 def _fast_dbxref_sp_general(s):
287 global _fast_dbxref_sp_general_data
288 if _fast_dbxref_sp_general_data is None:
289 from Bio.expressions.swissprot import sprot38
290 _fast_dbxref_sp_general_data = _fixup_sp_pattern(
291 sprot38.real_DR_general)
293 pat, dbstyle, primary_type, secondary_type = _fast_dbxref_sp_general_data
296 assert m is not None, "Ill-formated sp-general dxbref: %r" % s
298 (dbstyle, m.group("dbname"), primary_type,
299 m.group("primary_dbid"), 0),
300 (dbstyle, m.group("dbname"), secondary_type,
301 m.group("secondary_dbid"), 0)
304 # DR PFAM; PF01018; GTP1_OBG; 1.
305 # DR PROSITE; PS00905; GTP1_OBG; 1.
307 _fast_dbxref_sp_prosite_data = None
308 def _fast_dbxref_sp_prosite(s):
309 global _fast_dbxref_sp_prosite_data
311 if _fast_dbxref_sp_prosite_data is None:
312 from Bio.expressions.swissprot import sprot38
313 _fast_dbxref_sp_prosite_data = _fixup_sp_pattern(
314 sprot38.real_DR_prosite)
316 pat, dbstyle, primary_type, secondary_type = _fast_dbxref_sp_prosite_data
318 assert m is not None, "Ill-formated sp-prosite dxbref: %r" % s
320 (dbstyle, m.group("dbname"), primary_type,
321 m.group("primary_dbid"), 0),
322 (dbstyle, m.group("dbname"), secondary_type,
323 m.group("secondary_dbid"), 0)
327 # DR EMBL; M36407; AAA33110.1; -.
328 _fast_dbxref_sp_embl_data = None
329 def _fast_dbxref_sp_embl(s):
330 global _fast_dbxref_sp_embl_data
332 if _fast_dbxref_sp_embl_data is None:
333 from Bio.expressions.swissprot import sprot38
334 _fast_dbxref_sp_embl_data = _fixup_sp_pattern(
335 sprot38.real_DR_embl)
337 pat, dbstyle, primary_type, secondary_type = _fast_dbxref_sp_embl_data
339 assert m is not None, "Ill-formated sp-embl dxbref: %r" % s
341 (dbstyle, m.group("dbname"), primary_type,
342 m.group("primary_dbid"), 0),
343 (dbstyle, m.group("dbname"), secondary_type,
344 m.group("secondary_dbid"), 0)
347 _fast_dbxref_parser_table = {
348 "sp-general": _fast_dbxref_sp_general,
349 "sp-prosite": _fast_dbxref_sp_prosite,
350 "sp-embl": _fast_dbxref_sp_embl,
353 class Handle_dbxref(Dispatch.Callback):
354 def __init__(self, callback):
355 Dispatch.Callback.__init__(self, callback)
356 self.supported_features.append("fast-sp-dbxref")
357 self.slow_callback = self.callback
358 def start_dbxref(self, tag, attrs):
364 def start_dbxref_dbname(self, tag, attrs):
365 assert self.dbname is None, "cannot set the dbname twice"
366 self.dbname_style = attrs.get("style", "unknown")
367 self.save_characters()
368 def end_dbxref_dbname(self, tag):
369 self.dbname = self.get_characters()
371 def start_dbxref_dbid(self, tag, attrs):
372 d = attrs.get("dbname", None)
374 assert self.dbname is not None, "must set the dbname"
375 self.info.append( (self.dbname_style, self.dbname,
376 attrs.get("type", "primary")) )
378 self.info.append( ("bioformat", d,
379 attrs.get("type", "primary")) )
380 self.save_characters()
382 def end_dbxref_dbid(self, tag):
383 self.dbids.append( self.get_characters())
385 def start_dbxref_negate(self, tag, attrs):
388 def end_dbxref(self, tag):
389 cb = self.slow_callback
393 for ( (dbname_style, dbname, idtype), dbid) in zip(self.info,
395 self.slow_callback(dbname_style, dbname, idtype, dbid, negate)
397 def start_fast_dbxref(self, tag, attrs):
398 style = attrs["style"]
399 self._fast_parser = _fast_dbxref_parser_table[style]
400 self.save_characters()
401 self.slow_callback = None
402 def end_fast_dbxref(self, tag):
403 for info in self._fast_parser(self.get_characters()):
405 self.slow_callback = self.callback
408 class Handle_sequence(Dispatch.Callback):
409 global_alphabet = None
410 def start_(self, tag, attrs):
411 self.global_alphabet = None
413 def start_sequence_block(self, tag, attrs):
414 self.local_alphabet = attrs.get("alphabet", None)
415 self.gapchar = attrs.get("gapchar", None)
416 self.stopchar = attrs.get("stopchar", None)
417 j = attrs.get("join", None)
419 self.join_func = Decode.make_typechecked_decoder(j, list, str)
421 self.join_func = None
424 def end_sequence_block(self, tag):
427 seq = self.f(self.sequences)
429 seq = "".join(self.sequences).replace(" ", "")
430 alphabet = self.local_alphabet or self.global_alphabet or "unknown"
431 self.callback( (alphabet, seq, self.gapchar, self.stopchar) )
433 def start_alphabet(self, tag, attrs):
434 self.global_alphabet = attrs["alphabet"]
436 def start_sequence(self, tag, attrs):
437 self.save_characters()
438 def end_sequence(self, tag):
439 self.sequences.append(self.get_characters())
442 def __init__(self, name, description, location, qualifiers):
444 self.description = description
445 self.location = location
446 self.qualifiers = qualifiers
448 return "Feature %r %r %s num_qualifiers = %d" % \
449 (self.name, self.description, self.location,
450 len(self.qualifiers))
453 class Handle_feature_location(Dispatch.Callback):
454 def __init__(self, callback, settings = {}):
455 Dispatch.Callback.__init__(self, callback)
456 self.settings = settings
458 def start_feature(self, tag, attrs):
459 self.location_style = attrs.get("location-style",
460 self.settings["location-style"])
461 j = attrs.get("join-feature", None)
463 self.text_join_func = "".join
465 self.text_join_func = Decode.make_typechecked_decoder(j, list, str)
467 self.location_start = None
468 self.location_end = None
471 def end_feature(self, tag):
472 if self.location_start or self.location_end:
474 raise TypeError("Cannot have both location text and start/end")
475 self.callback(self.location_style,
476 (self.location_start, self.location_end))
478 self.callback(self.location_style,
479 (self.text_join_func(self.text_lines), None))
481 def start_feature_location(self, tag, attrs):
482 self.save_characters()
483 def end_feature_location(self, tag):
484 self.text_lines.append(self.get_characters())
486 add_text_handler(Handle_feature_location, "feature_location_start",
488 add_text_handler(Handle_feature_location, "feature_location_end",
491 ##################################
493 class Handle_feature_qualifier(Dispatch.Callback):
494 def __init__(self, callback, settings):
495 self.settings = settings
496 Dispatch.Callback.__init__(self, callback)
498 def start_feature_qualifier(self, tag, attrs):
500 self.description = []
501 qj = attrs.get("join-qualifier", None)
503 self.join = self.settings["qualifier_join_func"]
505 self.join = Decode.make_typechecked_decoder(qj, list, str)
507 def end_feature_qualifier(self, tag):
508 self.callback(self.name, self.join(self.description))
510 def start_feature_qualifier_description(self, tag, attrs):
511 self.save_characters()
512 def end_feature_qualifier_description(self, tag):
513 self.description.append(self.get_characters())
515 add_text_handler(Handle_feature_qualifier, "feature_qualifier_name", "name")
519 class Handle_features(Dispatch.Callback):
520 def __init__(self, callback):
521 Dispatch.Callback.__init__(self, callback)
524 self.acquire(Handle_feature_location(self.add_location, self.settings))
526 self.acquire(Handle_feature_qualifier(self.add_feature_qualifier,
529 def start_feature_block(self, tag, attrs):
530 jf = attrs.get("join-description", None)
532 self.join_feature_description = Decode.join_fixspaces
534 self.join_feature_description = Decode.make_typechecked_decoder(
537 self.settings["location-style"] = attrs.get("location-style", None)
539 jq = attrs.get("join-qualifier", None)
541 self.settings["qualifier_join_func"] = Decode.join_fixspaces
543 self.settings["qualifier_join_func"] = \
544 Decode.make_typechecked_decoder(jq, list, str)
547 def end_feature_block(self, tag):
548 self.callback(self.features)
551 def start_feature(self, tag, attrs):
553 self.description = []
557 def start_feature_description(self, tag, attrs):
558 self.save_characters()
559 def end_feature_description(self, tag):
560 self.description.append(self.get_characters())
562 def end_feature(self, tag):
563 self.features.append(Feature(
565 self.join_feature_description(self.description),
569 def add_feature_qualifier(self, name, description):
570 self.qualifiers.append((name, description))
572 def add_location(self, style, location_info):
573 self.location = (style, location_info)
575 add_text_handler(Handle_features, "feature_name", "name")
578 ############## Search handlers
580 class Handle_hsp_seqalign(Dispatch.Callback):
581 def start_hsp(self, tag, attrs):
582 self.query_name = None # "Query"
583 self.subject_name = None # "Sbjct"
585 self.query_seq = "" # the actual text of the sequence
586 self.homology_seq = ""
587 self.subject_seq = ""
589 self.query_start_loc = None
590 self.query_end_loc = None
592 self.subject_start_loc = None
593 self.subject_end_loc = None
595 def end_hsp(self, tag):
598 def start_hsp_seqalign(self, tag, attrs):
599 self.sub_leader = None
601 def start_hsp_seqalign_query_seq(self, tag, attrs):
602 self.save_characters()
603 def end_hsp_seqalign_query_seq(self, tag):
604 s = self.get_characters()
606 self.sub_query_seq_len = len(s)
608 def start_hsp_seqalign_homology_seq(self, tag, attrs):
609 self.save_characters()
610 def end_hsp_seqalign_homology_seq(self, tag):
611 query_leader = self.leader_size
612 query_seq_len = self.sub_query_seq_len
613 line = self.get_characters()
614 s = line[query_leader:query_leader+query_seq_len]
615 assert len(s) == query_seq_len, (len(s), query_seq_len, line)
616 self.homology_seq += s
618 def start_hsp_seqalign_subject_seq(self, tag, attrs):
619 self.save_characters()
620 def end_hsp_seqalign_subject_seq(self, tag):
621 self.subject_seq += self.get_characters()
623 def start_hsp_seqalign_query_leader(self, tag, attrs):
624 self.save_characters()
625 def end_hsp_seqalign_query_leader(self, tag):
626 self.leader_size = len(self.get_characters())
628 add_first_text_handler(Handle_hsp_seqalign, "hsp_seqalign_query_name",
631 add_first_text_handler(Handle_hsp_seqalign, "hsp_seqalign_subject_name",
634 add_first_text_handler(Handle_hsp_seqalign, "hsp_seqalign_query_start",
636 add_text_handler(Handle_hsp_seqalign, "hsp_seqalign_query_end",
639 add_first_text_handler(Handle_hsp_seqalign, "hsp_seqalign_subject_start",
641 add_text_handler(Handle_hsp_seqalign, "hsp_seqalign_subject_end",
647 #############################
649 class Handle_hsp(Dispatch.Callback):
650 def __init__(self, callback):
651 Dispatch.Callback.__init__(self, callback)
652 self.acquire(Handle_hsp_seqalign(self.add_hsp_seqs))
654 def start_hsp(self, tag, attrs):
655 self.hsp_values = {} # expect, p, identities, ...
659 def end_hsp(self, tag):
660 self.callback(self.hsp_values,
662 self.strands, self.frames,
665 def start_hsp_strand(self, tag, attrs):
666 self.strands[attrs["which"]] = attrs["strand"]
668 def start_hsp_frame(self, tag, attrs):
669 self.getting_frame = attrs["which"]
670 self.save_characters()
672 def end_hsp_frame(self, tag):
673 self.frames[self.getting_frame] = self.get_characters()
674 self.getting_frame = None
676 def add_hsp_seqs(self, hsp_info):
677 self.hsp_info = hsp_info
679 def start_hsp_value(self, tag, attrs):
680 self.value_convert = attrs.get("bioformat:decode", None)
681 self.value_name = attrs["name"]
682 self.save_characters()
684 def end_hsp_value(self, tag):
685 s = self.get_characters()
686 if self.value_name is not None:
687 if self.value_name == "float":
690 s = Decode.make_decoder(self.value_convert)(s)
691 self.hsp_values[self.value_name] = s
693 #############################
696 class Handle_search_table(Dispatch.Callback):
697 def start_search_table_value(self, tag, attrs):
698 self.value_name = attrs["name"]
699 self.value_decode = attrs.get("bioformat:decode", None)
700 self.save_characters()
701 def end_search_table_value(self, tag):
702 s = self.get_characters()
703 if self.value_decode is not None:
704 x = self.value_decode
710 s = Decode.make_decoder(x)(s)
711 self.values[self.value_name] = s
713 def start_search_table(self, tag, attrs):
715 def end_search_table(self, tag):
716 self.callback(self.data)
719 def start_search_table_entry(self, tag, attrs):
720 self.description = None
723 def end_search_table_entry(self, tag):
724 self.data.append( (self.description, self.values) )
725 self.description = self.values = None
727 add_text_handler(Handle_search_table, "search_table_description",
730 #############################
732 class Handle_search_header(Dispatch.Callback):
733 def start_(self, tag, attrs):
735 self.query_description = None
737 def end_search_header(self, tag):
739 d["query_description"] = self.query_description
742 add_text_block_handler(Handle_search_header, "query_description",
743 "join-query", "join|fixspaces", "query_description")
745 add_text_dict_handler(Handle_search_header, "application_name",
747 add_text_dict_handler(Handle_search_header, "application_version",
748 "dict", "appversion")
749 add_text_dict_handler(Handle_search_header, "database_name",
751 add_text_dict_handler(Handle_search_header, "database_num_sequences",
752 "dict", "db_num_sequences")
753 add_text_dict_handler(Handle_search_header, "database_num_letters",
754 "dict", "db_num_letters")
755 add_text_dict_handler(Handle_search_header, "query_size",
756 "dict", "query_size")
759 #############################
761 class Handle_search_info(Dispatch.Callback):
762 def start_(self, tag, attrs):
767 self.callback(self.parameters, self.statistics)
769 add_value_handler(Handle_search_info, "search_parameter", "parameters")
770 add_value_handler(Handle_search_info, "search_statistic", "statistics")