1 # Copyright 1999 by Jeffrey Chang. All rights reserved.
2 # This code is part of the Biopython distribution and governed by its
3 # license. Please see the LICENSE file that should have been included
4 # as part of this package.
6 """Code for more fancy file handles.
10 UndoHandle File object decorator with support for undo-like operations.
11 StringHandle Wraps a file object around a string.
12 SGMLHandle File object that automatically strips SGML tags from data.
14 SGMLStripper Object that strips SGML. This is now considered OBSOLETE, and
15 is likely to be deprecated in a future release of Biopython,
24 """A Python handle that adds functionality for saving lines.
26 Saves lines in a LIFO fashion.
29 saveline Save a line to be returned next time.
30 peekline Peek at the next line without consuming it.
33 def __init__(self, handle):
41 next = self.readline()
46 def readlines(self, *args, **keywds):
47 lines = self._saved + self._handle.readlines(*args,**keywds)
51 def readline(self, *args, **keywds):
53 line = self._saved.pop(0)
55 line = self._handle.readline(*args,**keywds)
58 def read(self, size=-1):
60 saved = "".join(self._saved)
64 while size > 0 and self._saved:
65 if len(self._saved[0]) <= size:
66 size = size - len(self._saved[0])
67 saved = saved + self._saved.pop(0)
69 saved = saved + self._saved[0][:size]
70 self._saved[0] = self._saved[0][size:]
72 return saved + self._handle.read(size)
74 def saveline(self, line):
76 self._saved = [line] + self._saved
82 line = self._handle.readline()
87 lengths = map(len, self._saved)
88 sum = reduce(lambda x, y: x+y, lengths, 0)
89 return self._handle.tell() - sum
91 def seek(self, *args):
93 self._handle.seek(*args)
95 def __getattr__(self, attr):
96 return getattr(self._handle, attr)
101 def __exit__(self, type, value, traceback):
105 # I could make this faster by using cStringIO.
106 # However, cStringIO (in v1.52) does not implement the
108 StringHandle = StringIO.StringIO
113 """A Python handle that automatically strips SGML tags from data (OBSOLETE).
115 This module is now considered to be obsolete, and is likely to be
116 deprecated in a future release of Biopython, and later removed.
118 def __init__(self, handle):
119 """SGMLStripper(handle)
121 handle is a file handle to SGML-formatted data.
124 self._handle = handle
125 self._stripper = SGMLStripper()
127 def read(self, *args, **keywds):
128 data = self._handle.read(*args, **keywds)
129 return self._stripper.strip(data)
131 def readline(self, *args, **keywds):
132 line = self._handle.readline(*args, **keywds)
133 return self._stripper.strip(line)
135 def readlines(self, *args, **keywds):
136 lines = self._handle.readlines(*args, **keywds)
137 for i in range(len(lines)):
138 lines[i] = self._stripper.strip(str)
141 def __getattr__(self, attr):
142 return getattr(self._handle, attr)
146 class MyParser(sgmllib.SGMLParser):
148 sgmllib.SGMLParser.__init__(self)
150 def handle_data(self, data):
151 self.data = self.data + data
154 self._parser = SGMLStripper.MyParser()
156 def strip(self, str):
157 """S.strip(str) -> string
159 Strip the SGML tags from str.
162 if not str: # empty string, don't do anything.
164 # I need to make sure that I don't return an empty string if
165 # the buffer is not empty. This can happen if there's a newline
166 # character embedded within a tag. Thus, I'll first check to
167 # see if the last character is a newline. If it is, and it's stripped
168 # away, I'll add it back.
169 is_newline = str[-1] in ['\n', '\r']
171 self._parser.data = '' # clear the parser's data (don't reset)
172 self._parser.feed(str)
173 if self._parser.data:
174 str = self._parser.data