2 * Jalview - A Sequence Alignment Editor and Viewer (Development Version 2.4.1)
3 * Copyright (C) 2009 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 import jalview.datamodel.*;
25 import jalview.util.*;
27 public class PfamFile extends AlignFile
34 public PfamFile(String inFile, String type) throws IOException
39 public PfamFile(FileParse source) throws IOException
44 public void initData()
49 public void parse() throws IOException
54 Hashtable seqhash = new Hashtable();
55 Vector headers = new Vector();
57 while ((line = nextLine()) != null)
59 if (line.indexOf(" ") != 0)
61 if (line.indexOf("#") != 0)
63 // TODO: verify pfam format requires spaces and not tab characters - if not upgrade to use stevesoft regex and look for whitespace.
64 StringTokenizer str = new StringTokenizer(line, " ");
67 if (str.hasMoreTokens())
73 if (seqhash.containsKey(id))
75 tempseq = (StringBuffer) seqhash.get(id);
79 tempseq = new StringBuffer();
80 seqhash.put(id, tempseq);
83 if (!(headers.contains(id)))
85 headers.addElement(id);
87 if (str.hasMoreTokens())
89 tempseq.append(str.nextToken());
96 this.noSeqs = headers.size();
100 throw new IOException("No sequences found (PFAM input)");
103 for (i = 0; i < headers.size(); i++)
105 if (seqhash.get(headers.elementAt(i)) != null)
107 if (maxLength < seqhash.get(headers.elementAt(i)).toString()
110 maxLength = seqhash.get(headers.elementAt(i)).toString().length();
113 Sequence newSeq = parseId(headers.elementAt(i).toString());
114 newSeq.setSequence(seqhash.get(headers.elementAt(i).toString())
116 seqs.addElement(newSeq);
120 System.err.println("PFAM File reader: Can't find sequence for "
121 + headers.elementAt(i));
126 public String print(SequenceI[] s)
128 StringBuffer out = new StringBuffer("");
135 while ((i < s.length) && (s[i] != null))
137 String tmp = printId(s[i]);
139 if (s[i].getSequence().length > max)
141 max = s[i].getSequence().length;
144 if (tmp.length() > maxid)
146 maxid = tmp.length();
159 while ((j < s.length) && (s[j] != null))
161 out.append(new Format("%-" + maxid + "s").form(printId(s[j]) + " "));
163 out.append(s[j].getSequenceAsString() + "\n");
169 return out.toString();
172 public String print()
174 return print(getSeqsAsArray());