1 # This is a Python module.
2 """This module is DEPRECATED.
4 Andrew Dalke is no longer maintaining Martel or Bio.Mindy, and these modules
5 and associate ones like Bio.Std are now deprecated. They are no longer
6 used in any of the current Biopython parsers, and are likely to be removed
11 warnings.warn("Martel and those parts of Biopython depending on it" \
12 +" directly (such as Bio.Mindy and Bio.Std) are now" \
13 +" deprecated, and will be removed in a future release of"\
14 +" Biopython. If you want to continue to use this code,"\
15 +" please get in contact with the Biopython developers via"\
16 +" the mailing lists to avoid its permanent removal from"\
19 # Standard Bioformats definitions
24 namespace = "bioformat"
26 XMLNS = "http://biopython.org/bioformat"
28 def _set_if_given(attrs, field, d, valid = None, convert = None):
29 value = attrs.get(field)
32 if value not in valid:
33 raise TypeError("%s (%r) must be one of %s" % \
34 (field, value, valid))
38 d[field] = convert(value)
40 def _complain_if_given(attrs, name):
41 if attrs.has_key(name) and attrs[name] is not None:
42 raise NotImplementedError("Don't yet handle %r" % (name,))
44 def _must_have(expr, f):
46 if tag not in expr.group_names():
48 "group %r not present in the expression but is required" % \
51 def _must_have_set(expr, sets):
52 names = expr.group_names()
61 raise TypeError("missing required tags (need %s) in expression" %
62 [f.tag for f in sets[0]])
63 lines = ["missing required tags in expression; must have one set from:"]
65 lines.append( str( [t.tag for f in set] ) )
69 def _must_not_have(expr, f):
71 if tag in expr.group_names():
73 "group %r present in the expression but is not allowed" % \
77 # pre- Python 2.2 functions didn't allow attributes
83 except AttributeError:
87 def _check_name(f, text):
88 if text == "record": # XXX FIXME
90 assert NS + f.func_name == text, (NS + ":" + f.func_name, text)
92 def _check_attrs(attrs, names):
93 for name in attrs.keys():
95 raise TypeError("attr %r is not allowed here (valid terms: %s)" % \
99 if not d.has_key(name):
108 # Convert the functions into callable objects
110 def __init__(self, func):
112 def __call__(self, *args, **kwargs):
113 return self._func( *args, **kwargs)
117 x = globals()[f.func_name] = StdTerm(f)
120 ################ identifier, description, and cross-references
121 def record(expr, attrs = {}):
122 attrs = _check_attrs(attrs, ("format",))
123 d = {"xmlns:bioformat": XMLNS}
124 _set_if_given(attrs, "format", d)
125 return Group("record", expr, d) # XXX FIXME
126 _settag(record, "record") # XXX AND FIXME
129 def dbid(expr, attrs = {}):
130 attrs = _check_attrs(attrs, ("type", "style", "dbname"))
132 _set_if_given(attrs, "type", d, ("primary", "accession", "secondary"))
133 _set_if_given(attrs, "dbname", d)
134 return Group(NS + "dbid", expr, d)
135 _settag(dbid, NS + "dbid")
137 def description_block(expr, attrs = {}):
138 attrs = _check_attrs(attrs, ("join",))
139 _must_have(expr, description)
141 _set_if_given(attrs, "join", d, ("english", "concat", "space", "newline"))
142 return Group(NS + "description_block", expr, d)
143 _settag(description_block, NS + "description_block")
145 def description(expr, attrs = {}):
146 attrs = _check_attrs(attrs, ())
147 return Group(NS + "description", expr)
148 _settag(description, NS + "description")
150 def description_line(expr, attrs = {}):
151 return description_block(description(expr, attrs))
153 def fast_dbxref(expr, attrs = {}):
154 attrs = _check_attrs(attrs, ("style",))
156 _set_if_given(attrs, "style", d, ("sp-general", "sp-prosite", "sp-embl"))
157 return Group(NS + "fast_dbxref", expr, d)
159 def dbxref(expr, attrs = {}):
160 attrs = _check_attrs(attrs, ("style",))
161 _must_have(expr, dbxref_dbid)
163 _complain_if_given(attrs, "style")
164 return Group(NS + "dbxref", expr, d)
165 _settag(dbxref, NS + "dbxref")
167 def dbxref_dbname(expr, attrs = {}):
168 attrs = _check_attrs(attrs, ("style",))
170 _set_if_given(attrs, "style", d)
171 return Group(NS + "dbxref_dbname", expr, d)
172 _settag(dbxref_dbname, NS + "dbxref_dbname")
174 def dbxref_dbid(expr, attrs = {}):
175 attrs = _check_attrs(attrs, ("dbname", "type", "style", "negate"))
177 _set_if_given(attrs, "dbname", d)
178 _set_if_given(attrs, "type", d, ("primary", "accession", "secondary"))
179 _complain_if_given(attrs, "style")
180 _set_if_given(attrs, "negate", d, (0, 1), str)
182 return Group(NS + "dbxref_dbid", expr, d)
183 _settag(dbxref_dbid, NS + "dbxref_dbid")
185 def dbxref_negate(expr, attrs = {}):
186 attrs = _check_attrs(attrs, ())
187 return Group(NS + "dbxref_negate", expr)
188 _settag(dbxref_negate, NS + "dbxref_negate")
190 ##################### sequences
192 def _check_gapchar(s):
193 if not ( ord(" ") <= ord(s) <= 126 ):
194 raise TypeError("%r not allowed as a gap character" % (s,))
197 # What about three letter codes?
198 def sequence_block(expr, attrs = {}):
199 attrs = _check_attrs(attrs, ("alphabet", "gapchar", "remove_spaces"))
200 _must_have(expr, sequence)
202 _set_if_given(attrs, "alphabet", d,
203 ("iupac-protein", "iupac-dna", "iupac-rna",
204 "iupac-ambiguous-protein",
205 "iupac-ambiguous-dna",
206 "iupac-ambiguous-rna",
207 "protein", "dna", "rna", "unknown"))
208 _set_if_given(attrs, "gapchar", d, convert = _check_gapchar)
209 _set_if_given(attrs, "remove_spaces", d, (0, 1), str)
210 return Group(NS + "sequence_block", expr, d)
211 _settag(sequence_block, NS + "sequence_block")
213 def sequence(expr, attrs = {}):
214 attrs = _check_attrs(attrs, ())
215 return Group(NS + "sequence", expr)
216 _settag(sequence, NS + "sequence")
218 def alphabet(expr, attrs = {}):
219 attrs = _check_attrs(attrs, ("alphabet",))
221 _set_if_given(attrs, "alphabet", d,
222 ("iupac-protein", "iupac-dna", "iupac-rna",
223 "iupac-ambiguous-protein",
224 "iupac-ambiguous-dna",
225 "iupac-ambiguous-rna",
226 "protein", "dna", "rna", "nucleotide", "unknown"))
227 return Group(NS + "alphabet", expr, d)
228 _settag(alphabet, NS + "alphabet")
232 ############################## features
237 # 1-25 #domain signal sequence #status predicted #label SIG\
238 # 26-737 #product procollagen-lysine 5-dioxygenase 2 #status
239 # predicted #label MAT\
240 # 63,209,297,365,522,
241 # 725 #binding_site carbohydrate (Asn) (covalent) #status
244 # The whole thing is a 'feature_block'
247 # 26-737 #product procollagen-lysine 5-dioxygenase 2 #status
248 # predicted #label MAT\
250 # One 'feature_name' is "binding_site".
252 # An example of the feature_location_block and feature_block, which I
253 # will abbreviate as 'flb' and 'fl', is:
254 # <flb> <fl>63,209,297,365,522,</fl>
255 # <fl>725</fl> #binding_site carbohydrate ...
257 # PIR doesn't have a 'feature_description'
260 # fq = feature_qualifier
261 # fqb = feature_qualifier
262 # fqn = feature_qualifier_name
263 # fqd = feature_qualifier_description
266 # 26-737 #product procollagen-lysine 5-dioxygenase 2 #status
267 # predicted #label MAT\
269 # can be represented as (the rather tedious)
271 # 26-737 <fqb><fq>#<fqn>product</fqn> <fqd>procollagen-\
272 # lysine 5-dioxygenase 2</fqd></fq> #<fq><fqn>status</fqn>
273 # <fqd>predicted</fqd> #<fq><fqn>label\
274 # </fqn> <fqd>MAT</fqd></fq>\</fqb>
277 # 'style' determines the namespace for the feature name
278 def feature_block(expr, attrs = {}):
279 attrs = _check_attrs(attrs, ("style", "location-style"))
281 _set_if_given(attrs, "style", d)
282 _set_if_given(attrs, "location-style", d)
283 _must_have(expr, feature)
284 return Group(NS + "feature_block", expr, d)
285 _settag(feature_block, NS + "feature_block")
287 def feature(expr, attrs = {}):
288 attrs = _check_attrs(attrs, ("location-style",))
290 _set_if_given(attrs, "location-style", d)
291 _must_have(expr, feature_name)
292 _must_have_set(expr, [[feature_location],
293 [feature_location_start, feature_location_end]])
294 return Group(NS + "feature", expr, d)
295 _settag(feature, NS + "feature")
297 def feature_name(expr, attrs = {}):
298 attrs = _check_attrs(attrs, ())
299 return Group(NS + "feature_name", expr)
300 _settag(feature_name, NS + "feature_name")
302 def feature_location(expr, attrs = {}):
303 attrs = _check_attrs(attrs, ())
304 return Group(NS + "feature_location", expr)
305 _settag(feature_location, NS + "feature_location")
307 def feature_location_start(expr, attrs = {}):
308 attrs = _check_attrs(attrs, ())
309 return Group(NS + "feature_location_start", expr)
310 _settag(feature_location_start, NS + "feature_location_start")
312 def feature_location_end(expr, attrs = {}):
313 attrs = _check_attrs(attrs, ())
314 return Group(NS + "feature_location_end", expr)
315 _settag(feature_location_end, NS + "feature_location_end")
317 def feature_description(expr, attrs = {}):
318 attrs = _check_attrs(attrs, ())
319 return Group(NS + "feature_description", expr)
320 _settag(feature_description, NS + "feature_description")
323 ##def feature_qualifier_block(expr, attrs = {}):
324 ## attrs = _check_attrs(attrs, ())
325 ## _must_have(expr, feature_qualifier)
326 ## return Group(NS + "feature_qualifier_block", expr)
327 ##_settag(feature_qualifier_block, NS + "feature_qualifier_block")
329 def feature_qualifier(expr, attrs = {}):
330 attrs = _check_attrs(attrs, ())
331 _must_have(expr, feature_qualifier_name)
332 return Group(NS + "feature_qualifier", expr)
333 _settag(feature_qualifier, NS + "feature_qualifier")
335 def feature_qualifier_name(expr, attrs = {}):
336 attrs = _check_attrs(attrs, ())
337 return Group(NS + "feature_qualifier_name", expr)
338 _settag(feature_qualifier_name, NS + "feature_qualifier_name")
340 def feature_qualifier_description(expr, attrs = {}):
341 attrs = _check_attrs(attrs, ())
342 return Group(NS + "feature_qualifier_description", expr)
343 _settag(feature_qualifier_description, NS + "feature_qualifier_description")
346 ############ For homology searches
349 def application_name(expr, attrs = {}):
350 attrs = _check_attrs(attrs, ("app",))
351 return Group("bioformat:application_name", expr, attrs)
353 # "2.0.11", "2.0a19MP-WashU"
354 def application_version(expr, attrs = {}):
355 attrs = _check_attrs(attrs, ())
356 return Group("bioformat:application_version", expr, attrs)
358 def search_header(expr, attrs = {}):
359 attrs = _check_attrs(attrs, ())
360 return Group("bioformat:search_header", expr, attrs)
362 def search_table(expr, attrs = {}):
363 attrs = _check_attrs(attrs, ())
364 return Group("bioformat:search_table", expr, attrs)
366 def search_table_description(expr, attrs = {}):
367 attrs = _check_attrs(attrs, ("bioformat:decode",))
368 d = {"bioformat:decode": "strip"}
369 _set_if_given(attrs, "bioformat:decode", d)
370 return Group("bioformat:search_table_description", expr, d)
372 def search_table_value(expr, attrs = {}):
373 attrs = _check_attrs(attrs, ("name", "bioformat:decode"))
374 return Group("bioformat:search_table_value", expr, attrs)
376 def search_table_entry(expr, attrs = {}):
377 attrs = _check_attrs(attrs, ())
378 return Group("bioformat:search_table_entry", expr, attrs)
380 def query_description_block(expr, attrs = {}):
381 attrs = _check_attrs(attrs, ("join-query",))
382 d = {"join-query": "join|fixspaces"}
383 _set_if_given(attrs, "join-query", d)
384 return Group("bioformat:query_description_block", expr, d)
386 def query_description(expr, attrs = {}):
387 attrs = _check_attrs(attrs, ("bioformat:decode"))
389 _set_if_given(attrs, "bioformat:decode", d)
390 return Group("bioformat:query_description", expr, d)
392 def query_size(expr, attrs = {}):
393 attrs = _check_attrs(attrs, ())
394 return Group("bioformat:query_size", expr)
396 def database_name(expr, attrs = {}):
397 attrs = _check_attrs(attrs, ())
398 return Group("bioformat:database_name", expr, attrs)
400 def database_num_sequences(expr, attrs = {}):
401 attrs = _check_attrs(attrs, ("bioformat:decode",))
402 return Group("bioformat:database_num_sequences", expr, attrs)
404 def database_num_letters(expr, attrs = {}):
405 attrs = _check_attrs(attrs, ("bioformat:decode",))
406 return Group("bioformat:database_num_letters", expr, attrs)
408 def hit(expr, attrs = {}):
409 attrs = _check_attrs(attrs, ("join-description",))
410 d = {"join-description": "join|fixspaces"}
411 _set_if_given(attrs, "join-description", d)
412 return Group("bioformat:hit", expr, d)
414 def hit_length(expr, attrs = {}):
415 attrs = _check_attrs(attrs, ())
416 return Group("bioformat:hit_length", expr, attrs)
418 def hit_description(expr, attrs = {}):
419 attrs = _check_attrs(attrs, ("bioformat:decode"))
421 _set_if_given(attrs, "bioformat:decode", d)
422 return Group("bioformat:hit_description", expr, d)
424 def hsp(expr, attrs = {}):
425 attrs = _check_attrs(attrs, ())
426 return Group("bioformat:hsp", expr, attrs)
428 def hsp_value(expr, attrs = {}):
429 attrs = _check_attrs(attrs, ("name", "bioformat:decode"))
430 return Group("bioformat:hsp_value", expr, attrs)
432 def hsp_frame(expr, attrs = {}):
433 attrs = _check_attrs(attrs, ("which",))
435 _set_if_given(attrs, "which", d, valid = ("query", "homology", "subject"))
436 return Group("bioformat:hsp_frame", expr, d)
438 def hsp_strand(expr, attrs = {}):
439 attrs = _check_attrs(attrs, ("strand", "which"))
441 _set_if_given(attrs, "which", d, valid = ("query", "homology", "subject"))
442 _set_if_given(attrs, "strand", d, valid = ("+1", "0", "-1", ""))
443 return Group("bioformat:hsp_strand", expr, d)
445 def hsp_seqalign_query_seq(expr, attrs = {}):
446 attrs = _check_attrs(attrs, ())
447 return Group("bioformat:hsp_seqalign_query_seq", expr, attrs)
449 def hsp_seqalign_homology_seq(expr, attrs = {}):
450 attrs = _check_attrs(attrs, ())
451 return Group("bioformat:hsp_seqalign_homology_seq", expr, attrs)
453 def hsp_seqalign_subject_seq(expr, attrs = {}):
454 attrs = _check_attrs(attrs, ())
455 return Group("bioformat:hsp_seqalign_subject_seq", expr, attrs)
457 def hsp_seqalign_query_leader(expr, attrs = {}):
458 attrs = _check_attrs(attrs, ())
459 return Group("bioformat:hsp_seqalign_query_leader", expr, attrs)
462 def hsp_seqalign_query_name(expr, attrs = {}):
463 attrs = _check_attrs(attrs, ())
464 return Group("bioformat:hsp_seqalign_query_name", expr, attrs)
466 def hsp_seqalign_subject_name(expr, attrs = {}):
467 attrs = _check_attrs(attrs, ())
468 return Group("bioformat:hsp_seqalign_subject_name", expr, attrs)
470 def hsp_seqalign(expr, attrs = {}):
471 attrs = _check_attrs(attrs, ())
472 return Group("bioformat:hsp_seqalign", expr, attrs)
474 def hsp_seqalign_query_start(expr, attrs = {}):
475 attrs = _check_attrs(attrs, ())
476 return Group("bioformat:hsp_seqalign_query_start", expr, attrs)
478 def hsp_seqalign_query_end(expr, attrs = {}):
479 attrs = _check_attrs(attrs, ())
480 return Group("bioformat:hsp_seqalign_query_end", expr, attrs)
482 def hsp_seqalign_subject_start(expr, attrs = {}):
483 attrs = _check_attrs(attrs, ())
484 return Group("bioformat:hsp_seqalign_subject_start", expr, attrs)
486 def hsp_seqalign_subject_end(expr, attrs = {}):
487 attrs = _check_attrs(attrs, ())
488 return Group("bioformat:hsp_seqalign_subject_end", expr, attrs)
490 def search_parameter(expr, attrs = {}):
491 attrs = _check_attrs(attrs, ("name", "bioformat:decode"))
493 _set_if_given(attrs, "name", d)
494 _set_if_given(attrs, "bioformat:decode", d)
495 return Group("bioformat:search_parameter", expr, d)
497 def search_statistic(expr, attrs = {}):
498 attrs = _check_attrs(attrs, ("name", "bioformat:decode"))
500 _set_if_given(attrs, "name", d)
501 _set_if_given(attrs, "bioformat:decode", d)
502 return Group("bioformat:search_statistic", expr, d)