2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8)
3 * Copyright (C) 2012 J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
24 import jalview.datamodel.*;
25 import jalview.util.*;
27 public class PfamFile extends AlignFile
34 public PfamFile(String inFile, String type) throws IOException
39 public PfamFile(FileParse source) throws IOException
44 public void initData()
49 public void parse() throws IOException
54 Hashtable seqhash = new Hashtable();
55 Vector headers = new Vector();
57 while ((line = nextLine()) != null)
59 if (line.indexOf(" ") != 0)
61 if (line.indexOf("#") != 0)
63 // TODO: verify pfam format requires spaces and not tab characters -
64 // if not upgrade to use stevesoft regex and look for whitespace.
65 StringTokenizer str = new StringTokenizer(line, " ");
68 if (str.hasMoreTokens())
74 if (seqhash.containsKey(id))
76 tempseq = (StringBuffer) seqhash.get(id);
80 tempseq = new StringBuffer();
81 seqhash.put(id, tempseq);
84 if (!(headers.contains(id)))
86 headers.addElement(id);
88 if (str.hasMoreTokens())
90 tempseq.append(str.nextToken());
97 this.noSeqs = headers.size();
101 throw new IOException("No sequences found (PFAM input)");
104 for (i = 0; i < headers.size(); i++)
106 if (seqhash.get(headers.elementAt(i)) != null)
108 if (maxLength < seqhash.get(headers.elementAt(i)).toString()
111 maxLength = seqhash.get(headers.elementAt(i)).toString().length();
114 Sequence newSeq = parseId(headers.elementAt(i).toString());
115 newSeq.setSequence(seqhash.get(headers.elementAt(i).toString())
117 seqs.addElement(newSeq);
121 System.err.println("PFAM File reader: Can't find sequence for "
122 + headers.elementAt(i));
127 public String print(SequenceI[] s)
129 StringBuffer out = new StringBuffer("");
136 while ((i < s.length) && (s[i] != null))
138 String tmp = printId(s[i]);
140 if (s[i].getSequence().length > max)
142 max = s[i].getSequence().length;
145 if (tmp.length() > maxid)
147 maxid = tmp.length();
160 while ((j < s.length) && (s[j] != null))
162 out.append(new Format("%-" + maxid + "s").form(printId(s[j]) + " "));
164 out.append(s[j].getSequenceAsString());
171 return out.toString();
174 public String print()
176 return print(getSeqsAsArray());