+++ /dev/null
-# Copyright 2002 by Andrew Dalke.
-# All rights reserved.
-# This code is part of the Biopython distribution and governed by its
-# license. Please see the LICENSE file that should have been included
-# as part of this package.
-"""Decode elements from a Std/Martel parsed XML stream (OBSOLETE).
-
-Andrew Dalke is no longer maintaining Martel or Bio.Mindy, and these modules
-(and therefore Bio.Decode) have been deprecated. They are no longer used in
-any of the current Biopython parsers, and are likely to be removed in a
-future release."""
-
-import warnings
-warnings.warn("Martel and those parts of Biopython depending on it" \
- +" directly (such as Bio.Mindy and Bio.Decode) are now" \
- +" deprecated, and will be removed in a future release of"\
- +" Biopython. If you want to continue to use this code,"\
- +" please get in contact with the Biopython developers via"\
- +" the mailing lists to avoid its permanent removal from"\
- +" Biopython.", \
- DeprecationWarning)
-
-import string
-from Bio.Parsers.spark import GenericScanner, GenericParser
-
-def unescape_C(s):
- result = []
- for i in range(len(s)):
- if s[i] != "\\":
- result.append(s[i])
- continue
- c = s[i+1:i+2]
- if c == "x":
- x = s[i+2:i+4]
- if len(x) != 2:
- raise ValueError("invalid \\x escape")
- i = int(x, 16)
- result.append(chr(i))
- continue
- if c in "01234567":
- x = s[i+1:i+4]
- # \octals don't do a length assertion check
- i = int(x, 8)
- result.append(chr(i))
- continue
- result.append(c)
- return "".join(result)
-
-def join_english(fields):
- if not fields:
- return ""
- s = fields[0]
- for field in fields[1:]:
- if s[-1:] == "-" and s[-3:-2] == "-":
- s = s + field
- continue
- if s.find(" ") == -1 and field.find(" ") == -1:
- s = s + field
- continue
- s = s + " " + field
- return (" ".join(s.split())).strip()
-
-
-
-def chomp(s, c):
- if s[-1:] == c:
- return s[:-1]
- return s
-
-def lchomp(s, c):
- if s[:1] == c:
- return s[1:]
- return s
-
-def chompchomp(s, c):
- if s[:1] == c and s[-1:] == c:
- return s[1:-1]
- return s
-
-def fixspaces(s):
- # s.split breaks down to a list of words
- # " ".join puts them together
- # strip removes leading and trailing spaces
- return " ".join(s.split()).strip()
-
-def join_fixspaces(lines):
- return " ".join((" ".join(lines)).split()).strip()
-
-def tr(s, frm, to):
- table = string.maketrans(frm, to)
- return s.translate(table)
-
-def safe_int(s):
- """converts to int if the number is small, long if it's large"""
- try:
- return int(s)
- except ValueError:
- return long(s)
-
-decode_functions = {
- "chomp": (chomp, str, str),
- "chompchomp": (chompchomp, str, str),
- "chop": (lambda s: s[:-1], str, str),
- "chopchop": (lambda s: s[1:-1], str, str),
- "fixspaces": (fixspaces, str, str),
- "lchomp": (lchomp, str, str),
- "lchop": (lambda s: s[1:], str, str),
- "lower": (lambda s: s.lower(), str, str),
- "lstrip": (lambda s: s.lstrip(), str, str),
- "replace": (lambda s, old, new: s.replace(old, new), str, str),
- "rstrip": (lambda s: s.rstrip(), str, str),
- "str": (str, str, str),
- "strip": (lambda s: s.strip(), str, str),
- "tr": (tr, str, str),
- "unescape.c": (unescape_C, str, str),
- "unescape.doublequote": (lambda s: s.replace('""', '"'), str, str),
- "unescape.singlequote": (lambda s: s.replace("''", "'"), str, str),
- "upper": (lambda s: s.upper(), str, str),
-
- # List operations
- "join": (lambda lst, s = " ": s.join(lst), list, str),
- "join.english": (join_english, list, str),
-
- # Integer operations
- "int": (safe_int, [float, str, int], int),
- "int.comma": (lambda s: safe_int(s.replace(",", "")),
- [float, str, int], int),
- "hex": (hex, str, int),
- "oct": (oct, str, int),
- "add": ((lambda i, j: i+j), int, int),
-
- # Float operations
- "float": (float, (float, str, int), float),
-
- }
-
-def _fixup_defs():
- # Normalize so the 2nd and 3rd terms are tuples
- for k, v in decode_functions.items():
- f, in_types, out_types = v
- if isinstance(in_types, type([])):
- in_types = tuple(in_types)
- elif not isinstance(in_types, type( () )):
- in_types = (in_types,)
-
- if isinstance(out_types, type([])):
- out_types = tuple(out_types)
- elif not isinstance(out_types, type( () )):
- out_types = (out_types,)
-
- decode_functions[k] = (f, in_types, out_types)
-_fixup_defs()
-
-class Token:
- def __init__(self, type):
- self.type = type
- def __cmp__(self, other):
- return cmp(self.type, other)
- def __repr__(self):
- return "Token(%r)" % (self.type,)
-
-class ValueToken(Token):
- def __init__(self, type, val):
- Token.__init__(self, type)
- self.val = val
- def __cmp__(self, other):
- return cmp(self.type, other)
- def __repr__(self):
- return "%s(%r)" % (self.__class__.__name__, self.val)
- def __str__(self):
- return str(self.val)
-
-class Integer(ValueToken):
- def __init__(self, val):
- ValueToken.__init__(self, "integer", val)
-
-class Float(ValueToken):
- def __init__(self, val):
- ValueToken.__init__(self, "float", val)
-
-class String(ValueToken):
- def __init__(self, val):
- ValueToken.__init__(self, "string", val)
-
-class FunctionName(ValueToken):
- def __init__(self, val):
- ValueToken.__init__(self, "functionname", val)
-
-class DecodeScanner(GenericScanner):
- def __init__(self):
- GenericScanner.__init__(self)
-
- def tokenize(self, input):
- self.rv = []
- GenericScanner.tokenize(self, input)
- return self.rv
-
- def t_functionname(self, input):
- r" \w+(\.\w+)*"
- self.rv.append(FunctionName(input))
-
- def t_pipe(self, input):
- r" \| "
- self.rv.append(Token("pipe"))
-
- def t_open_paren(self, input):
- r" \( "
- self.rv.append(Token("open_paren"))
-
- def t_close_paren(self, input):
- r" \) "
- self.rv.append(Token("close_paren"))
-
- def t_comma(self, input):
- r" , "
- self.rv.append(Token("comma"))
-
- def t_whitespace(self, input):
- r" \s+ "
- pass
-
- def t_string(self, input):
- r""" "([^"\\]+|\\.)*"|'([^'\\]+|\\.)*' """
- # "' # emacs cruft
- s = input[1:-1]
- s = unescape_C(s)
-
- self.rv.append(String(s))
-
- def t_float(self, input):
- r""" [+-]?((\d+(\.\d*)?)|\.\d+)([eE][+-]?[0-9]+)? """
- # See if this is an integer
- try:
- self.rv.append(Integer(safe_int(input)))
- except ValueError:
- self.rv.append(Float(float(input)))
-
-class Function:
- def __init__(self, name, args = ()):
- self.name = name
- self.args = args
- def __str__(self):
- args = self.args
- if not args:
- s = ""
- else:
- s = str(args)[1:-1]
- return "%s(x, %s)" % (self.name, s)
- __repr__ = __str__
-
-class DecodeParser(GenericParser):
- def __init__(self, start = "expression"):
- GenericParser.__init__(self, start)
- self.begin_pos = 0
-
- def p_expression(self, args):
- """
- expression ::= term
- expression ::= term pipe expression
- """
- if len(args) == 1:
- return [args[0]]
- return [args[0]] + args[2]
-
- def p_term(self, args):
- """
- term ::= functionname
- term ::= functionname open_paren args close_paren
- """
- if len(args) == 1:
- return Function(args[0].val)
- return Function(args[0].val, tuple([x.val for x in args[2]]))
-
- def p_args(self, args):
- """
- args ::= arg
- args ::= arg comma args
- """
- if len(args) == 1:
- return [args[0]]
- return [args[0]] + args[2]
-
- def p_arg(self, args):
- """
- arg ::= string
- arg ::= integer
- arg ::= float
- """
- return args[0]
-
-def scan(input):
- scanner = DecodeScanner()
- return scanner.tokenize(input)
-
-def parse(tokens):
- parser = DecodeParser()
- return parser.parse(tokens)
-
-_decoder_cache = {}
-
-class FunctionCall:
- def __init__(self, f, args):
- self.f = f
- self.args = args
- def __call__(self, x):
- return self.f(x, *self.args)
-
-class FunctionCallChain:
- def __init__(self, inner_f, f, args):
- self.inner_f = inner_f
- self.f = f
- self.args = args
- def __call__(self, x):
- return self.f(self.inner_f(x), *self.args)
-
-#### I don't think this is the right way to do things
-##class CheckTypes:
-## def __init__(self, f, call_types, return_types):
-## self.f = f
-## self.call_types = call_types
-## self.return_types = return_types
-## def __call__(self, x):
-## if self.call_types is not None:
-## for T in self.call_types:
-## if isinstance(x, T):
-## break
-## else:
-## raise TypeError(
-## "Call value %s of type %s, expecting one of %s" %
-## (x, type(x).__name__,
-## [T.name for T in self.call_types]))
-## y = self.f(x)
-
-## if not self.return_types:
-## return y
-
-## for T in self.return_types:
-## if isinstance(y, T):
-## return y
-## raise TypeError("Return value %s of type %s, expecting one of %s" %
-## (y, type(y).__name__,
-## [T.name for T in self.return_types]))
-
-def make_decoder(s):
- try:
- return _decoder_cache[s]
- except KeyError:
- pass
-
- functions = parse(scan(s))
-
- f = functions[0]
- fc = decode_functions[f.name][0]
- args = f.args
- if args:
- fc = FunctionCall(fc, args)
- for f in functions[1:]:
- fc = FunctionCallChain(fc, decode_functions[f.name][0], f.args)
- _decoder_cache[s] = fc
- return fc
-
-def _verify_subtypes(subset, total, old_name, new_name):
- for x in subset:
- if x not in total:
- raise TypeError("%s can produce a %r value not accepted by %s" %
- (old_name, x.__name__, new_name))
-
-_typechecked_decoder_cache = {}
-def make_typechecked_decoder(s, input_types = None, output_types = None):
- cache_lookup = (s, input_types, output_types)
- try:
- return _typechecked_decoder_cache[cache_lookup]
- except KeyError:
- pass
- if input_types is not None and not isinstance(input_types, type( () )):
- input_types = (input_types,)
- if output_types is not None and not isinstance(output_types, type( () )):
- output_types = (output_types,)
-
- functions = parse(scan(s))
-
- # Make sure the input type(s) are allowed
- f = functions[0]
- fc, in_types, out_types = decode_functions[f.name]
- if input_types is not None:
- for x in input_types:
- if x not in in_types:
- raise TypeError(
- "the input type includes %r which isn't supported by %s" %
- (x.__name__, f.name))
-
- # Do the composition
- old_name = f.name
- input_types = out_types
- args = functions[0].args
- if args:
- fc = FunctionCall(fc, args)
-
- for f in functions[1:]:
- transform_func, in_types, out_types = decode_functions[f.name]
- _verify_subtypes(input_types, in_types, old_name, f.name)
- old_name = f.name
- input_types = out_types
- fc = FunctionCallChain(fc, transform_func, f.args)
-
- if output_types is not None:
- _verify_subtypes(input_types, output_types, old_name, "the output")
- _typechecked_decoder_cache[cache_lookup] = fc
- return fc
-
-
-def test():
- assert make_decoder("chop")("Andrew") == "Andre"
- assert make_decoder("int")("9") == 9
- assert make_decoder('join(" ")')(["Andrew", "Dalke"]) == \
- "Andrew Dalke"
- assert make_decoder('chomp("|")')("|test|") == "|test"
- assert make_decoder('chomp("|")')("|test") == "|test"
- assert make_decoder('chomp("A")|chop')("BA") == ""
- assert make_decoder('chomp("A")|chop')("AB") == "A"
- assert make_decoder('chop|chomp("A")')("AB") == ""
- assert make_decoder('chop|chomp("A")')("BA") == "B"
- assert make_decoder('add(5)')(2) == 7
- assert make_decoder('add(-2)')(5) == 3
-
-if __name__ == "__main__":
- test()