2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.datamodel.Sequence;
24 import jalview.datamodel.SequenceI;
25 import jalview.util.Format;
26 import jalview.util.MessageManager;
28 import java.io.IOException;
29 import java.util.Hashtable;
30 import java.util.StringTokenizer;
31 import java.util.Vector;
33 public class PfamFile extends AlignFile
40 public PfamFile(String inFile, String type) throws IOException
45 public PfamFile(FileParse source) throws IOException
50 public void initData()
55 public void parse() throws IOException
60 Hashtable seqhash = new Hashtable();
61 Vector headers = new Vector();
63 while ((line = nextLine()) != null)
65 if (line.indexOf(" ") != 0)
67 if (line.indexOf("#") != 0)
69 // TODO: verify pfam format requires spaces and not tab characters -
70 // if not upgrade to use stevesoft regex and look for whitespace.
71 StringTokenizer str = new StringTokenizer(line, " ");
74 if (str.hasMoreTokens())
80 if (seqhash.containsKey(id))
82 tempseq = (StringBuffer) seqhash.get(id);
86 tempseq = new StringBuffer();
87 seqhash.put(id, tempseq);
90 if (!(headers.contains(id)))
92 headers.addElement(id);
94 if (str.hasMoreTokens())
96 tempseq.append(str.nextToken());
103 this.noSeqs = headers.size();
107 throw new IOException(
108 MessageManager.getString("exception.pfam_no_sequences_found"));
111 for (i = 0; i < headers.size(); i++)
113 if (seqhash.get(headers.elementAt(i)) != null)
115 if (maxLength < seqhash.get(headers.elementAt(i)).toString()
118 maxLength = seqhash.get(headers.elementAt(i)).toString().length();
121 Sequence newSeq = parseId(headers.elementAt(i).toString());
122 newSeq.setSequence(seqhash.get(headers.elementAt(i).toString())
124 seqs.addElement(newSeq);
128 System.err.println("PFAM File reader: Can't find sequence for "
129 + headers.elementAt(i));
134 public String print(SequenceI[] s)
136 StringBuffer out = new StringBuffer("");
143 while ((i < s.length) && (s[i] != null))
145 String tmp = printId(s[i]);
147 if (s[i].getSequence().length > max)
149 max = s[i].getSequence().length;
152 if (tmp.length() > maxid)
154 maxid = tmp.length();
167 while ((j < s.length) && (s[j] != null))
169 out.append(new Format("%-" + maxid + "s").form(printId(s[j]) + " "));
171 out.append(s[j].getSequenceAsString());
178 return out.toString();
181 public String print()
183 return print(getSeqsAsArray());