1 # Copyright 2002 by Andrew Dalke.
3 # This code is part of the Biopython distribution and governed by its
4 # license. Please see the LICENSE file that should have been included
5 # as part of this package.
6 """Decode elements from a Std/Martel parsed XML stream (OBSOLETE).
8 Andrew Dalke is no longer maintaining Martel or Bio.Mindy, and these modules
9 (and therefore Bio.Decode) have been deprecated. They are no longer used in
10 any of the current Biopython parsers, and are likely to be removed in a
14 warnings.warn("Martel and those parts of Biopython depending on it" \
15 +" directly (such as Bio.Mindy and Bio.Decode) are now" \
16 +" deprecated, and will be removed in a future release of"\
17 +" Biopython. If you want to continue to use this code,"\
18 +" please get in contact with the Biopython developers via"\
19 +" the mailing lists to avoid its permanent removal from"\
24 from Bio.Parsers.spark import GenericScanner, GenericParser
28 for i in range(len(s)):
36 raise ValueError("invalid \\x escape")
42 # \octals don't do a length assertion check
47 return "".join(result)
49 def join_english(fields):
53 for field in fields[1:]:
54 if s[-1:] == "-" and s[-3:-2] == "-":
57 if s.find(" ") == -1 and field.find(" ") == -1:
61 return (" ".join(s.split())).strip()
76 if s[:1] == c and s[-1:] == c:
81 # s.split breaks down to a list of words
82 # " ".join puts them together
83 # strip removes leading and trailing spaces
84 return " ".join(s.split()).strip()
86 def join_fixspaces(lines):
87 return " ".join((" ".join(lines)).split()).strip()
90 table = string.maketrans(frm, to)
91 return s.translate(table)
94 """converts to int if the number is small, long if it's large"""
101 "chomp": (chomp, str, str),
102 "chompchomp": (chompchomp, str, str),
103 "chop": (lambda s: s[:-1], str, str),
104 "chopchop": (lambda s: s[1:-1], str, str),
105 "fixspaces": (fixspaces, str, str),
106 "lchomp": (lchomp, str, str),
107 "lchop": (lambda s: s[1:], str, str),
108 "lower": (lambda s: s.lower(), str, str),
109 "lstrip": (lambda s: s.lstrip(), str, str),
110 "replace": (lambda s, old, new: s.replace(old, new), str, str),
111 "rstrip": (lambda s: s.rstrip(), str, str),
112 "str": (str, str, str),
113 "strip": (lambda s: s.strip(), str, str),
114 "tr": (tr, str, str),
115 "unescape.c": (unescape_C, str, str),
116 "unescape.doublequote": (lambda s: s.replace('""', '"'), str, str),
117 "unescape.singlequote": (lambda s: s.replace("''", "'"), str, str),
118 "upper": (lambda s: s.upper(), str, str),
121 "join": (lambda lst, s = " ": s.join(lst), list, str),
122 "join.english": (join_english, list, str),
125 "int": (safe_int, [float, str, int], int),
126 "int.comma": (lambda s: safe_int(s.replace(",", "")),
127 [float, str, int], int),
128 "hex": (hex, str, int),
129 "oct": (oct, str, int),
130 "add": ((lambda i, j: i+j), int, int),
133 "float": (float, (float, str, int), float),
138 # Normalize so the 2nd and 3rd terms are tuples
139 for k, v in decode_functions.items():
140 f, in_types, out_types = v
141 if isinstance(in_types, type([])):
142 in_types = tuple(in_types)
143 elif not isinstance(in_types, type( () )):
144 in_types = (in_types,)
146 if isinstance(out_types, type([])):
147 out_types = tuple(out_types)
148 elif not isinstance(out_types, type( () )):
149 out_types = (out_types,)
151 decode_functions[k] = (f, in_types, out_types)
155 def __init__(self, type):
157 def __cmp__(self, other):
158 return cmp(self.type, other)
160 return "Token(%r)" % (self.type,)
162 class ValueToken(Token):
163 def __init__(self, type, val):
164 Token.__init__(self, type)
166 def __cmp__(self, other):
167 return cmp(self.type, other)
169 return "%s(%r)" % (self.__class__.__name__, self.val)
173 class Integer(ValueToken):
174 def __init__(self, val):
175 ValueToken.__init__(self, "integer", val)
177 class Float(ValueToken):
178 def __init__(self, val):
179 ValueToken.__init__(self, "float", val)
181 class String(ValueToken):
182 def __init__(self, val):
183 ValueToken.__init__(self, "string", val)
185 class FunctionName(ValueToken):
186 def __init__(self, val):
187 ValueToken.__init__(self, "functionname", val)
189 class DecodeScanner(GenericScanner):
191 GenericScanner.__init__(self)
193 def tokenize(self, input):
195 GenericScanner.tokenize(self, input)
198 def t_functionname(self, input):
200 self.rv.append(FunctionName(input))
202 def t_pipe(self, input):
204 self.rv.append(Token("pipe"))
206 def t_open_paren(self, input):
208 self.rv.append(Token("open_paren"))
210 def t_close_paren(self, input):
212 self.rv.append(Token("close_paren"))
214 def t_comma(self, input):
216 self.rv.append(Token("comma"))
218 def t_whitespace(self, input):
222 def t_string(self, input):
223 r""" "([^"\\]+|\\.)*"|'([^'\\]+|\\.)*' """
228 self.rv.append(String(s))
230 def t_float(self, input):
231 r""" [+-]?((\d+(\.\d*)?)|\.\d+)([eE][+-]?[0-9]+)? """
232 # See if this is an integer
234 self.rv.append(Integer(safe_int(input)))
236 self.rv.append(Float(float(input)))
239 def __init__(self, name, args = ()):
248 return "%s(x, %s)" % (self.name, s)
251 class DecodeParser(GenericParser):
252 def __init__(self, start = "expression"):
253 GenericParser.__init__(self, start)
256 def p_expression(self, args):
259 expression ::= term pipe expression
263 return [args[0]] + args[2]
265 def p_term(self, args):
267 term ::= functionname
268 term ::= functionname open_paren args close_paren
271 return Function(args[0].val)
272 return Function(args[0].val, tuple([x.val for x in args[2]]))
274 def p_args(self, args):
277 args ::= arg comma args
281 return [args[0]] + args[2]
283 def p_arg(self, args):
292 scanner = DecodeScanner()
293 return scanner.tokenize(input)
296 parser = DecodeParser()
297 return parser.parse(tokens)
302 def __init__(self, f, args):
305 def __call__(self, x):
306 return self.f(x, *self.args)
308 class FunctionCallChain:
309 def __init__(self, inner_f, f, args):
310 self.inner_f = inner_f
313 def __call__(self, x):
314 return self.f(self.inner_f(x), *self.args)
316 #### I don't think this is the right way to do things
318 ## def __init__(self, f, call_types, return_types):
320 ## self.call_types = call_types
321 ## self.return_types = return_types
322 ## def __call__(self, x):
323 ## if self.call_types is not None:
324 ## for T in self.call_types:
325 ## if isinstance(x, T):
329 ## "Call value %s of type %s, expecting one of %s" %
330 ## (x, type(x).__name__,
331 ## [T.name for T in self.call_types]))
334 ## if not self.return_types:
337 ## for T in self.return_types:
338 ## if isinstance(y, T):
340 ## raise TypeError("Return value %s of type %s, expecting one of %s" %
341 ## (y, type(y).__name__,
342 ## [T.name for T in self.return_types]))
346 return _decoder_cache[s]
350 functions = parse(scan(s))
353 fc = decode_functions[f.name][0]
356 fc = FunctionCall(fc, args)
357 for f in functions[1:]:
358 fc = FunctionCallChain(fc, decode_functions[f.name][0], f.args)
359 _decoder_cache[s] = fc
362 def _verify_subtypes(subset, total, old_name, new_name):
365 raise TypeError("%s can produce a %r value not accepted by %s" %
366 (old_name, x.__name__, new_name))
368 _typechecked_decoder_cache = {}
369 def make_typechecked_decoder(s, input_types = None, output_types = None):
370 cache_lookup = (s, input_types, output_types)
372 return _typechecked_decoder_cache[cache_lookup]
375 if input_types is not None and not isinstance(input_types, type( () )):
376 input_types = (input_types,)
377 if output_types is not None and not isinstance(output_types, type( () )):
378 output_types = (output_types,)
380 functions = parse(scan(s))
382 # Make sure the input type(s) are allowed
384 fc, in_types, out_types = decode_functions[f.name]
385 if input_types is not None:
386 for x in input_types:
387 if x not in in_types:
389 "the input type includes %r which isn't supported by %s" %
390 (x.__name__, f.name))
394 input_types = out_types
395 args = functions[0].args
397 fc = FunctionCall(fc, args)
399 for f in functions[1:]:
400 transform_func, in_types, out_types = decode_functions[f.name]
401 _verify_subtypes(input_types, in_types, old_name, f.name)
403 input_types = out_types
404 fc = FunctionCallChain(fc, transform_func, f.args)
406 if output_types is not None:
407 _verify_subtypes(input_types, output_types, old_name, "the output")
408 _typechecked_decoder_cache[cache_lookup] = fc
413 assert make_decoder("chop")("Andrew") == "Andre"
414 assert make_decoder("int")("9") == 9
415 assert make_decoder('join(" ")')(["Andrew", "Dalke"]) == \
417 assert make_decoder('chomp("|")')("|test|") == "|test"
418 assert make_decoder('chomp("|")')("|test") == "|test"
419 assert make_decoder('chomp("A")|chop')("BA") == ""
420 assert make_decoder('chomp("A")|chop')("AB") == "A"
421 assert make_decoder('chop|chomp("A")')("AB") == ""
422 assert make_decoder('chop|chomp("A")')("BA") == "B"
423 assert make_decoder('add(5)')(2) == 7
424 assert make_decoder('add(-2)')(5) == 3
426 if __name__ == "__main__":