2 * Jalview - A Sequence Alignment Editor and Viewer
\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
\r
5 * This program is free software; you can redistribute it and/or
\r
6 * modify it under the terms of the GNU General Public License
\r
7 * as published by the Free Software Foundation; either version 2
\r
8 * of the License, or (at your option) any later version.
\r
10 * This program is distributed in the hope that it will be useful,
\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 * GNU General Public License for more details.
\r
15 * You should have received a copy of the GNU General Public License
\r
16 * along with this program; if not, write to the Free Software
\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
24 import jalview.datamodel.*;
\r
25 import jalview.util.*;
\r
27 public class PfamFile
\r
35 public PfamFile(String inStr)
\r
40 public PfamFile(String inFile, String type)
\r
43 super(inFile, type);
\r
46 public void initData()
\r
51 public void parse() throws IOException
\r
56 Hashtable seqhash = new Hashtable();
\r
57 Vector headers = new Vector();
\r
59 while ( (line = nextLine()) != null)
\r
61 if (line.indexOf(" ") != 0)
\r
63 if (line.indexOf("#") != 0)
\r
65 StringTokenizer str = new StringTokenizer(line, " ");
\r
68 if (str.hasMoreTokens())
\r
70 id = str.nextToken();
\r
72 StringBuffer tempseq;
\r
74 if (seqhash.containsKey(id))
\r
76 tempseq = (StringBuffer) seqhash.get(id);
\r
80 tempseq = new StringBuffer();
\r
81 seqhash.put(id, tempseq);
\r
84 if (! (headers.contains(id)))
\r
86 headers.addElement(id);
\r
89 tempseq.append(str.nextToken());
\r
95 this.noSeqs = headers.size();
\r
99 throw new IOException("No sequences found (PFAM input)");
\r
102 for (i = 0; i < headers.size(); i++)
\r
104 if (seqhash.get(headers.elementAt(i)) != null)
\r
106 if (maxLength < seqhash.get(headers.elementAt(i)).toString()
\r
109 maxLength = seqhash.get(headers.elementAt(i)).toString()
\r
114 Sequence newSeq = parseId(headers.elementAt(i).toString());
\r
115 newSeq.setSequence( seqhash.get(headers.elementAt(i).toString()).toString());
\r
116 seqs.addElement(newSeq);
\r
118 if (!isValidProteinSequence(newSeq.getSequence()))
\r
120 throw new IOException(AppletFormatAdapter.INVALID_CHARACTERS
\r
121 +" : "+ newSeq.getName()
\r
122 +" : "+invalidCharacter);
\r
127 System.err.println("PFAM File reader: Can't find sequence for " +
\r
128 headers.elementAt(i));
\r
133 public String print(SequenceI[] s)
\r
135 StringBuffer out = new StringBuffer("");
\r
142 while ( (i < s.length) && (s[i] != null))
\r
144 String tmp = printId(s[i]);
\r
146 if (s[i].getSequence().length() > max)
\r
148 max = s[i].getSequence().length();
\r
151 if (tmp.length() > maxid)
\r
153 maxid = tmp.length();
\r
166 while ( (j < s.length) && (s[j] != null))
\r
168 out.append(new Format("%-" + maxid + "s").form( printId(s[j])+" "));
\r
170 out.append(s[j].getSequence() + "\n");
\r
176 return out.toString();
\r
179 public String print()
\r
181 return print(getSeqsAsArray());
\r