1 # Copyright 2001 by Katharine Lindner. All rights reserved.
\r
2 # This code is part of the Biopython distribution and governed by its
\r
3 # license. Please see the LICENSE file that should have been included
\r
4 # as part of this package.
\r
6 """Code for more fancy file handles (OBSOLETE).
\r
9 Filtered is a decorator for File that allows the user to filter the output
\r
10 on a line by line basis.
\r
12 The FilteredReader module reads a file and applies a sequence of filters to the input
\r
13 The constructor sets a default filter chain, but the user can select another filter by setting
\r
14 Bio.FilteredReader.filter_chain.
\r
16 handle = open( "filename" )
\r
17 filtered_reader = Bio.FilteredReader( handle )
\r
18 filtered_reader.filter_chain = [ remove_asterisks, replace_dot_with_dash ]
\r
19 filtered_reasder.read()
\r
21 All filters in the chain must provide the same interface with a line of text as the single
\r
22 input parameter and altered text as the return value.
\r
24 This module is now considered to be obsolete, and is likely to be deprecated
\r
25 in a future release of Biopython, and later removed.
\r
29 def dump_saved( name, text, j ):
\r
30 """Used for debugging."""
\r
31 dump_file = open( name + '%d' % j, "w" )
\r
33 for i in range ( 0, len( text ), 80 ):
\r
34 dump_file.write( '%s\n' % text[ i : i + 80 ] )
\r
37 def remove_leading_whitespace( line ):
\r
38 return line.lstrip()
\r
41 def remove_empty_line( line ):
\r
42 stripped_line = line.strip()
\r
43 if( stripped_line ):
\r
48 def remove_useless_dot( line ):
\r
51 after = before.replace( "\t.\t", "\t\t" )
\r
52 if( len( before ) == len( after ) ):
\r
55 if( after.endswith( '.' ) ):
\r
56 after = after[ :-1 ]
\r
59 def fix_punctuation( line ):
\r
60 line = line.replace( "'", '' )
\r
61 line = line.replace( '"', '' )
\r
62 line = line.replace( ';', '\t' )
\r
63 line = line.replace( 'entryname', 'id' )
\r
64 # line = line.lower( )
\r
72 class FilteredReader:
\r
73 def __init__(self, handle ):
\r
74 self._handle = handle
\r
75 self._start_line = ''
\r
76 self._debug_count = 0
\r
77 self.filter_chain = [ remove_empty_line, remove_useless_dot, fix_punctuation ]
\r
79 def __getattr__(self, attr):
\r
80 return getattr(self._handle, attr)
\r
84 def close(self, *args, **keywds ):
\r
85 return self._handle.close( *args, **keywds)
\r
87 def read( self, *args, **keywds ):
\r
89 len_expected = self._get_len_expected( args, keywds )
\r
91 filtered_text = self.read_block( len_expected )
\r
93 filtered_text = self.read_to_end()
\r
94 return filtered_text
\r
96 def read_block( self, len_expected ):
\r
99 len_adjusted -= len( self._start_line )
\r
101 while( len_filtered < len_expected ):
\r
103 text_read = self._handle.read( len_adjusted )
\r
104 full_text = self._start_line + text_read
\r
105 lines = full_text.splitlines( 1 )
\r
106 if( text_read == '' ):
\r
107 filtered_text = filtered_text + self.filter( lines )
\r
110 all_but_last_line = lines[ :-1 ]
\r
111 self._start_line = lines[ -1 ]
\r
112 filtered_text = filtered_text + self.filter( all_but_last_line )
\r
113 len_filtered_text = len( filtered_text )
\r
114 len_adjusted = len_adjusted - len_filtered_text
\r
115 return filtered_text[ : ]
\r
117 def read_to_end( self ):
\r
119 text_read = self._handle.read()
\r
120 full_text = self._start_line + text_read
\r
121 lines = full_text.splitlines( 1 )
\r
122 filtered_text += self.filter( lines[:] )
\r
123 return filtered_text[ : ]
\r
125 def _get_len_expected( self, args, keywds ):
\r
127 if( len( args) > 0 ):
\r
128 len_expected = args[ 0 ]
\r
129 if( len_expected < 0 ):
\r
130 len_expected = None
\r
131 elif 'size' in keywds:
\r
132 len_expected = keywds['size']
\r
134 len_expected = None
\r
135 return len_expected
\r
137 def filter( self, lines ):
\r
138 filter_chain = self.filter_chain
\r
141 for filter in filter_chain:
\r
142 line = filter( *( line, ) )
\r
143 filtered_text += line
\r
145 return filtered_text
\r
147 def has_trailing_linefeed( line ):
\r
148 if( line.endswith( chr( 13 ) ) or \
\r
149 line.endswith( chr( 10 ) ) ):
\r