2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.5)
3 * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
23 import jalview.datamodel.*;
24 import jalview.util.*;
26 public class PfamFile extends AlignFile
33 public PfamFile(String inFile, String type) throws IOException
38 public PfamFile(FileParse source) throws IOException
43 public void initData()
48 public void parse() throws IOException
53 Hashtable seqhash = new Hashtable();
54 Vector headers = new Vector();
56 while ((line = nextLine()) != null)
58 if (line.indexOf(" ") != 0)
60 if (line.indexOf("#") != 0)
62 // TODO: verify pfam format requires spaces and not tab characters -
63 // if not upgrade to use stevesoft regex and look for whitespace.
64 StringTokenizer str = new StringTokenizer(line, " ");
67 if (str.hasMoreTokens())
73 if (seqhash.containsKey(id))
75 tempseq = (StringBuffer) seqhash.get(id);
79 tempseq = new StringBuffer();
80 seqhash.put(id, tempseq);
83 if (!(headers.contains(id)))
85 headers.addElement(id);
87 if (str.hasMoreTokens())
89 tempseq.append(str.nextToken());
96 this.noSeqs = headers.size();
100 throw new IOException("No sequences found (PFAM input)");
103 for (i = 0; i < headers.size(); i++)
105 if (seqhash.get(headers.elementAt(i)) != null)
107 if (maxLength < seqhash.get(headers.elementAt(i)).toString()
110 maxLength = seqhash.get(headers.elementAt(i)).toString().length();
113 Sequence newSeq = parseId(headers.elementAt(i).toString());
114 newSeq.setSequence(seqhash.get(headers.elementAt(i).toString())
116 seqs.addElement(newSeq);
120 System.err.println("PFAM File reader: Can't find sequence for "
121 + headers.elementAt(i));
126 public String print(SequenceI[] s)
128 StringBuffer out = new StringBuffer("");
135 while ((i < s.length) && (s[i] != null))
137 String tmp = printId(s[i]);
139 if (s[i].getSequence().length > max)
141 max = s[i].getSequence().length;
144 if (tmp.length() > maxid)
146 maxid = tmp.length();
159 while ((j < s.length) && (s[j] != null))
161 out.append(new Format("%-" + maxid + "s").form(printId(s[j]) + " "));
163 out.append(s[j].getSequenceAsString() + "\n");
169 return out.toString();
172 public String print()
174 return print(getSeqsAsArray());