binaries/src/disembl/biopython-1.50/Bio/Decode.py

   1 # Copyright 2002 by Andrew Dalke.
   2 # All rights reserved.
   3 # This code is part of the Biopython distribution and governed by its
   4 # license.  Please see the LICENSE file that should have been included
   5 # as part of this package.
   6 """Decode elements from a Std/Martel parsed XML stream (OBSOLETE).
   7
   8 Andrew Dalke is no longer maintaining Martel or Bio.Mindy, and these modules
   9 (and therefore Bio.Decode) have been deprecated.  They are no longer used in
  10 any of the current Biopython parsers, and are likely to be removed in a
  11 future release."""
  12
  13 import warnings
  14 warnings.warn("Martel and those parts of Biopython depending on it" \
  15               +" directly (such as Bio.Mindy and Bio.Decode) are now" \
  16               +" deprecated, and will be removed in a future release of"\
  17               +" Biopython.  If you want to continue to use this code,"\
  18               +" please get in contact with the Biopython developers via"\
  19               +" the mailing lists to avoid its permanent removal from"\
  20               +" Biopython.", \
  21               DeprecationWarning)
  22
  23 import string
  24 from Bio.Parsers.spark import GenericScanner, GenericParser
  25
  26 def unescape_C(s):
  27     result = []
  28     for i in range(len(s)):
  29         if s[i] != "\\":
  30             result.append(s[i])
  31             continue
  32         c = s[i+1:i+2]
  33         if c == "x":
  34             x = s[i+2:i+4]
  35             if len(x) != 2:
  36                 raise ValueError("invalid \\x escape")
  37             i = int(x, 16)
  38             result.append(chr(i))
  39             continue
  40         if c in "01234567":
  41             x = s[i+1:i+4]
  42             # \octals don't do a length assertion check
  43             i = int(x, 8)
  44             result.append(chr(i))
  45             continue
  46         result.append(c)
  47     return "".join(result)
  48
  49 def join_english(fields):
  50     if not fields:
  51         return ""
  52     s = fields[0]
  53     for field in fields[1:]:
  54         if s[-1:] == "-" and s[-3:-2] == "-":
  55             s = s + field
  56             continue
  57         if s.find(" ") == -1 and field.find(" ") == -1:
  58             s = s + field
  59             continue
  60         s = s + " " + field
  61     return (" ".join(s.split())).strip()
  62
  63
  64
  65 def chomp(s, c):
  66     if s[-1:] == c:
  67         return s[:-1]
  68     return s
  69
  70 def lchomp(s, c):
  71     if s[:1] == c:
  72         return s[1:]
  73     return s
  74
  75 def chompchomp(s, c):
  76     if s[:1] == c and s[-1:] == c:
  77         return s[1:-1]
  78     return s
  79
  80 def fixspaces(s):
  81     # s.split breaks down to a list of words
  82     # " ".join puts them together
  83     # strip removes leading and trailing spaces
  84     return " ".join(s.split()).strip()
  85
  86 def join_fixspaces(lines):
  87     return " ".join((" ".join(lines)).split()).strip()
  88
  89 def tr(s, frm, to):
  90     table = string.maketrans(frm, to)
  91     return s.translate(table)
  92
  93 def safe_int(s):
  94     """converts to int if the number is small, long if it's large"""
  95     try:
  96         return int(s)
  97     except ValueError:
  98         return long(s)
  99
 100 decode_functions = {
 101     "chomp": (chomp, str, str),
 102     "chompchomp": (chompchomp, str, str),
 103     "chop": (lambda s: s[:-1], str, str),
 104     "chopchop": (lambda s: s[1:-1], str, str),
 105     "fixspaces": (fixspaces, str, str),
 106     "lchomp": (lchomp, str, str),
 107     "lchop": (lambda s: s[1:], str, str),
 108     "lower": (lambda s: s.lower(), str, str),
 109     "lstrip": (lambda s: s.lstrip(), str, str),
 110     "replace": (lambda s, old, new: s.replace(old, new), str, str),
 111     "rstrip": (lambda s: s.rstrip(), str, str),
 112     "str": (str, str, str),
 113     "strip": (lambda s: s.strip(), str, str),
 114     "tr": (tr, str, str),
 115     "unescape.c": (unescape_C, str, str),
 116     "unescape.doublequote": (lambda s: s.replace('""', '"'), str, str),
 117     "unescape.singlequote": (lambda s: s.replace("''", "'"), str, str),
 118     "upper": (lambda s: s.upper(), str, str),
 119
 120     # List operations
 121     "join": (lambda lst, s = " ": s.join(lst), list, str),
 122     "join.english": (join_english, list, str),
 123
 124     # Integer operations
 125     "int": (safe_int, [float, str, int], int),
 126     "int.comma": (lambda s: safe_int(s.replace(",", "")),
 127                   [float, str, int], int),
 128     "hex": (hex, str, int),
 129     "oct": (oct, str, int),
 130     "add": ((lambda i, j: i+j), int, int),
 131
 132     # Float operations
 133     "float": (float, (float, str, int), float),
 134
 135     }
 136
 137 def _fixup_defs():
 138     # Normalize so the 2nd and 3rd terms are tuples
 139     for k, v in decode_functions.items():
 140         f, in_types, out_types = v
 141         if isinstance(in_types, type([])):
 142             in_types = tuple(in_types)
 143         elif not isinstance(in_types, type( () )):
 144             in_types = (in_types,)
 145
 146         if isinstance(out_types, type([])):
 147             out_types = tuple(out_types)
 148         elif not isinstance(out_types, type( () )):
 149             out_types = (out_types,)
 150
 151         decode_functions[k] = (f, in_types, out_types)
 152 _fixup_defs()
 153
 154 class Token:
 155     def __init__(self, type):
 156         self.type = type
 157     def __cmp__(self, other):
 158         return cmp(self.type, other)
 159     def __repr__(self):
 160         return "Token(%r)" % (self.type,)
 161
 162 class ValueToken(Token):
 163     def __init__(self, type, val):
 164         Token.__init__(self, type)
 165         self.val = val
 166     def __cmp__(self, other):
 167         return cmp(self.type, other)
 168     def __repr__(self):
 169         return "%s(%r)" % (self.__class__.__name__, self.val)
 170     def __str__(self):
 171         return str(self.val)
 172
 173 class Integer(ValueToken):
 174     def __init__(self, val):
 175         ValueToken.__init__(self, "integer", val)
 176
 177 class Float(ValueToken):
 178     def __init__(self, val):
 179         ValueToken.__init__(self, "float", val)
 180
 181 class String(ValueToken):
 182     def __init__(self, val):
 183         ValueToken.__init__(self, "string", val)
 184
 185 class FunctionName(ValueToken):
 186     def __init__(self, val):
 187         ValueToken.__init__(self, "functionname", val)
 188
 189 class DecodeScanner(GenericScanner):
 190     def __init__(self):
 191         GenericScanner.__init__(self)
 192
 193     def tokenize(self, input):
 194         self.rv = []
 195         GenericScanner.tokenize(self, input)
 196         return self.rv
 197
 198     def t_functionname(self, input):
 199         r" \w+(\.\w+)*"
 200         self.rv.append(FunctionName(input))
 201
 202     def t_pipe(self, input):
 203         r" \| "
 204         self.rv.append(Token("pipe"))
 205
 206     def t_open_paren(self, input):
 207         r" \( "
 208         self.rv.append(Token("open_paren"))
 209
 210     def t_close_paren(self, input):
 211         r" \) "
 212         self.rv.append(Token("close_paren"))
 213
 214     def t_comma(self, input):
 215         r" , "
 216         self.rv.append(Token("comma"))
 217
 218     def t_whitespace(self, input):
 219         r" \s+ "
 220         pass
 221
 222     def t_string(self, input):
 223         r""" "([^"\\]+|\\.)*"|'([^'\\]+|\\.)*' """
 224         # "'  # emacs cruft
 225         s = input[1:-1]
 226         s = unescape_C(s)
 227
 228         self.rv.append(String(s))
 229
 230     def t_float(self, input):
 231         r""" [+-]?((\d+(\.\d*)?)|\.\d+)([eE][+-]?[0-9]+)? """
 232         # See if this is an integer
 233         try:
 234             self.rv.append(Integer(safe_int(input)))
 235         except ValueError:
 236             self.rv.append(Float(float(input)))
 237
 238 class Function:
 239     def __init__(self, name, args = ()):
 240         self.name = name
 241         self.args = args
 242     def __str__(self):
 243         args = self.args
 244         if not args:
 245             s = ""
 246         else:
 247             s = str(args)[1:-1]
 248         return "%s(x, %s)" % (self.name, s)
 249     __repr__ = __str__
 250
 251 class DecodeParser(GenericParser):
 252     def __init__(self, start = "expression"):
 253         GenericParser.__init__(self, start)
 254         self.begin_pos = 0
 255
 256     def p_expression(self, args):
 257         """
 258         expression ::= term
 259         expression ::= term pipe expression
 260         """
 261         if len(args) == 1:
 262             return [args[0]]
 263         return [args[0]] + args[2]
 264
 265     def p_term(self, args):
 266         """
 267         term ::= functionname
 268         term ::= functionname open_paren args close_paren
 269         """
 270         if len(args) == 1:
 271             return Function(args[0].val)
 272         return Function(args[0].val, tuple([x.val for x in args[2]]))
 273
 274     def p_args(self, args):
 275         """
 276         args ::= arg
 277         args ::= arg comma args
 278         """
 279         if len(args) == 1:
 280             return [args[0]]
 281         return [args[0]] + args[2]
 282
 283     def p_arg(self, args):
 284         """
 285         arg ::= string
 286         arg ::= integer
 287         arg ::= float
 288         """
 289         return args[0]
 290
 291 def scan(input):
 292     scanner = DecodeScanner()
 293     return scanner.tokenize(input)
 294
 295 def parse(tokens):
 296     parser = DecodeParser()
 297     return parser.parse(tokens)
 298
 299 _decoder_cache = {}
 300
 301 class FunctionCall:
 302     def __init__(self, f, args):
 303         self.f = f
 304         self.args = args
 305     def __call__(self, x):
 306         return self.f(x, *self.args)
 307
 308 class FunctionCallChain:
 309     def __init__(self, inner_f, f, args):
 310         self.inner_f = inner_f
 311         self.f = f
 312         self.args = args
 313     def __call__(self, x):
 314         return self.f(self.inner_f(x), *self.args)
 315
 316 #### I don't think this is the right way to do things
 317 ##class CheckTypes:
 318 ##    def __init__(self, f, call_types, return_types):
 319 ##        self.f = f
 320 ##        self.call_types = call_types
 321 ##        self.return_types = return_types
 322 ##    def __call__(self, x):
 323 ##        if self.call_types is not None:
 324 ##            for T in self.call_types:
 325 ##                if isinstance(x, T):
 326 ##                    break
 327 ##            else:
 328 ##                raise TypeError(
 329 ##                    "Call value %s of type %s, expecting one of %s" %
 330 ##                    (x, type(x).__name__,
 331 ##                     [T.name for T in self.call_types]))
 332 ##        y = self.f(x)
 333
 334 ##        if not self.return_types:
 335 ##            return y
 336
 337 ##        for T in self.return_types:
 338 ##            if isinstance(y, T):
 339 ##                return y
 340 ##        raise TypeError("Return value %s of type %s, expecting one of %s" %
 341 ##                        (y, type(y).__name__,
 342 ##                         [T.name for T in self.return_types]))
 343
 344 def make_decoder(s):
 345     try:
 346         return _decoder_cache[s]
 347     except KeyError:
 348         pass
 349
 350     functions = parse(scan(s))
 351
 352     f = functions[0]
 353     fc = decode_functions[f.name][0]
 354     args = f.args
 355     if args:
 356         fc = FunctionCall(fc, args)
 357     for f in functions[1:]:
 358         fc = FunctionCallChain(fc, decode_functions[f.name][0], f.args)
 359     _decoder_cache[s] = fc
 360     return fc
 361
 362 def _verify_subtypes(subset, total, old_name, new_name):
 363     for x in subset:
 364         if x not in total:
 365             raise TypeError("%s can produce a %r value not accepted by %s" %
 366                             (old_name, x.__name__, new_name))
 367
 368 _typechecked_decoder_cache = {}
 369 def make_typechecked_decoder(s, input_types = None, output_types = None):
 370     cache_lookup = (s, input_types, output_types)
 371     try:
 372         return _typechecked_decoder_cache[cache_lookup]
 373     except KeyError:
 374         pass
 375     if input_types is not None and not isinstance(input_types, type( () )):
 376         input_types = (input_types,)
 377     if output_types is not None and not isinstance(output_types, type( () )):
 378         output_types = (output_types,)
 379
 380     functions = parse(scan(s))
 381
 382     # Make sure the input type(s) are allowed
 383     f = functions[0]
 384     fc, in_types, out_types = decode_functions[f.name]
 385     if input_types is not None:
 386         for x in input_types:
 387             if x not in in_types:
 388                 raise TypeError(
 389                     "the input type includes %r which isn't supported by %s" %
 390                     (x.__name__, f.name))
 391
 392     # Do the composition
 393     old_name = f.name
 394     input_types = out_types
 395     args = functions[0].args
 396     if args:
 397         fc = FunctionCall(fc, args)
 398
 399     for f in functions[1:]:
 400         transform_func, in_types, out_types = decode_functions[f.name]
 401         _verify_subtypes(input_types, in_types, old_name, f.name)
 402         old_name = f.name
 403         input_types = out_types
 404         fc = FunctionCallChain(fc, transform_func, f.args)
 405
 406     if output_types is not None:
 407         _verify_subtypes(input_types, output_types, old_name, "the output")
 408     _typechecked_decoder_cache[cache_lookup] = fc
 409     return fc
 410
 411
 412 def test():
 413     assert make_decoder("chop")("Andrew") == "Andre"
 414     assert make_decoder("int")("9") == 9
 415     assert make_decoder('join(" ")')(["Andrew", "Dalke"]) == \
 416                                           "Andrew Dalke"
 417     assert make_decoder('chomp("|")')("|test|") == "|test"
 418     assert make_decoder('chomp("|")')("|test") == "|test"
 419     assert make_decoder('chomp("A")|chop')("BA") == ""
 420     assert make_decoder('chomp("A")|chop')("AB") == "A"
 421     assert make_decoder('chop|chomp("A")')("AB") == ""
 422     assert make_decoder('chop|chomp("A")')("BA") == "B"
 423     assert make_decoder('add(5)')(2) == 7
 424     assert make_decoder('add(-2)')(5) == 3
 425
 426 if __name__ == "__main__":
 427     test()