website/archive/binaries/mac/src/disembl/biopython-1.50/Bio/FilteredReader.py

   1 # Copyright 2001 by Katharine Lindner.  All rights reserved.\r
   2 # This code is part of the Biopython distribution and governed by its\r
   3 # license.  Please see the LICENSE file that should have been included\r
   4 # as part of this package.\r
   5 \r
   6 """Code for more fancy file handles (OBSOLETE).\r
   7 \r
   8 Classes:\r
   9 Filtered is a decorator for File that allows the user to filter the output\r
  10 on a line by line basis.\r
  11 \r
  12 The FilteredReader module reads a file and applies a sequence of filters to the input\r
  13 The constructor sets a default filter chain, but the user can select another filter by setting\r
  14 Bio.FilteredReader.filter_chain.\r
  15 \r
  16 handle = open( "filename" )\r
  17 filtered_reader = Bio.FilteredReader( handle )\r
  18 filtered_reader.filter_chain = [ remove_asterisks, replace_dot_with_dash ]\r
  19 filtered_reasder.read()\r
  20 \r
  21 All filters in the chain must provide the same interface with a line of text as the single\r
  22 input parameter and altered text as the return value.\r
  23 \r
  24 This module is now considered to be obsolete, and is likely to be deprecated\r
  25 in a future release of Biopython, and later removed.\r
  26 """\r
  27 \r
  28 \r
  29 def dump_saved( name, text, j ):\r
  30     """Used for debugging."""\r
  31     dump_file = open( name + '%d' % j, "w" )\r
  32     k = 0\r
  33     for i in range ( 0, len( text ), 80 ):\r
  34         dump_file.write(  '%s\n' % text[ i : i + 80 ] )\r
  35     dump_file.close()\r
  36 \r
  37 def remove_leading_whitespace( line ):\r
  38     return line.lstrip()\r
  39 \r
  40 \r
  41 def remove_empty_line( line ):\r
  42     stripped_line = line.strip()\r
  43     if( stripped_line ):\r
  44         return line[ : ]\r
  45     else:\r
  46         return ''\r
  47 \r
  48 def remove_useless_dot( line ):\r
  49     before = line\r
  50     while( 1 ):\r
  51         after = before.replace( "\t.\t", "\t\t" )\r
  52         if( len( before ) == len( after ) ):\r
  53             break\r
  54         before = after\r
  55     if( after.endswith( '.' ) ):\r
  56         after = after[ :-1 ]\r
  57     return after\r
  58 \r
  59 def fix_punctuation( line ):\r
  60     line = line.replace( "'", '' )\r
  61     line = line.replace( '"', '' )\r
  62     line = line.replace( ';', '\t' )\r
  63     line = line.replace( 'entryname', 'id' )\r
  64 #    line = line.lower( )\r
  65     if( line ):\r
  66         return line[ : ]\r
  67     else:\r
  68         return ''\r
  69 \r
  70 \r
  71 \r
  72 class FilteredReader:\r
  73     def __init__(self, handle ):\r
  74         self._handle = handle\r
  75         self._start_line = ''\r
  76         self._debug_count = 0\r
  77         self.filter_chain = [ remove_empty_line, remove_useless_dot, fix_punctuation ]\r
  78 \r
  79     def __getattr__(self, attr):\r
  80         return getattr(self._handle, attr)\r
  81 \r
  82 \r
  83 \r
  84     def close(self, *args, **keywds ):\r
  85         return self._handle.close( *args, **keywds)\r
  86 \r
  87     def read( self, *args, **keywds ):\r
  88         line = ''\r
  89         len_expected = self._get_len_expected( args, keywds )\r
  90         if( len_expected ):\r
  91             filtered_text = self.read_block( len_expected )\r
  92         else:\r
  93             filtered_text = self.read_to_end()\r
  94         return filtered_text\r
  95 \r
  96     def read_block( self, len_expected ):\r
  97 \r
  98         len_filtered = 0\r
  99         len_adjusted -= len( self._start_line )\r
 100         filtered_text = ''\r
 101         while( len_filtered < len_expected ):\r
 102 \r
 103             text_read = self._handle.read( len_adjusted )\r
 104             full_text = self._start_line + text_read\r
 105             lines = full_text.splitlines( 1 )\r
 106             if( text_read == '' ):\r
 107                 filtered_text = filtered_text + self.filter( lines )\r
 108                 break\r
 109             else:\r
 110                 all_but_last_line = lines[ :-1 ]\r
 111                 self._start_line = lines[ -1 ]\r
 112                 filtered_text = filtered_text + self.filter( all_but_last_line )\r
 113             len_filtered_text = len( filtered_text )\r
 114             len_adjusted = len_adjusted - len_filtered_text\r
 115         return filtered_text[ : ]\r
 116 \r
 117     def read_to_end( self ):\r
 118         filtered_text = ''\r
 119         text_read = self._handle.read()\r
 120         full_text = self._start_line + text_read\r
 121         lines = full_text.splitlines( 1 )\r
 122         filtered_text += self.filter( lines[:] )\r
 123         return filtered_text[ : ]\r
 124 \r
 125     def _get_len_expected( self, args, keywds ):\r
 126 \r
 127         if( len( args) > 0 ):\r
 128             len_expected = args[ 0 ]\r
 129             if( len_expected < 0 ):\r
 130                 len_expected = None\r
 131         elif 'size' in keywds:\r
 132             len_expected = keywds['size']\r
 133         else:\r
 134             len_expected = None\r
 135         return len_expected\r
 136 \r
 137     def filter( self, lines  ):\r
 138         filter_chain = self.filter_chain\r
 139         filtered_text = ''\r
 140         for line in lines:\r
 141             for filter in filter_chain:\r
 142                 line = filter( *( line, ) )\r
 143             filtered_text += line\r
 144 \r
 145         return filtered_text\r
 146 \r
 147 def has_trailing_linefeed( line ):\r
 148     if( line.endswith( chr( 13 ) ) or \\r
 149         line.endswith( chr( 10 ) ) ):\r
 150         return 1\r
 151     else:\r
 152         return 0\r