2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.datamodel.Sequence;
24 import jalview.datamodel.SequenceI;
25 import jalview.util.Format;
26 import jalview.util.MessageManager;
28 import java.io.IOException;
29 import java.util.ArrayList;
30 import java.util.HashMap;
32 public class PfamFile extends AlignFile
39 public PfamFile(String inFile, String type) throws IOException
44 public PfamFile(FileParse source) throws IOException
50 public void initData()
56 public void parse() throws IOException
61 HashMap<String, StringBuffer> seqhash = new HashMap<String, StringBuffer>();
62 ArrayList<String> headers = new ArrayList<String>();
63 boolean useTabs = false;
65 while ((line = nextLine()) != null)
67 if (line.indexOf("#") == 0)
72 // locate first space or (if already checked), tab
75 spces = line.indexOf("\t");
79 spces = line.indexOf(" ");
80 // check to see if we ought to split on tabs instead.
81 if (!useTabs && spces == -1)
84 spces = line.indexOf("\t");
89 // no sequence data to split on
92 String id = line.substring(0, spces);
95 if (seqhash.containsKey(id))
97 tempseq = seqhash.get(id);
101 tempseq = new StringBuffer();
102 seqhash.put(id, tempseq);
105 if (!(headers.contains(id)))
109 if (spces + 1 < line.length())
111 tempseq.append(line.substring(spces + 1).trim());
115 this.noSeqs = headers.size();
119 throw new IOException(
120 MessageManager.getString("exception.pfam_no_sequences_found"));
123 for (i = 0; i < headers.size(); i++)
125 if (seqhash.get(headers.get(i)) != null)
127 if (maxLength < seqhash.get(headers.get(i)).toString().length())
129 maxLength = seqhash.get(headers.get(i)).toString().length();
132 Sequence newSeq = parseId(headers.get(i).toString());
133 newSeq.setSequence(seqhash.get(headers.get(i).toString())
135 seqs.addElement(newSeq);
139 System.err.println("PFAM File reader: Can't find sequence for "
145 public String print(SequenceI[] s)
147 StringBuffer out = new StringBuffer("");
154 while ((i < s.length) && (s[i] != null))
156 String tmp = printId(s[i]);
158 if (s[i].getSequence().length > max)
160 max = s[i].getSequence().length;
163 if (tmp.length() > maxid)
165 maxid = tmp.length();
178 while ((j < s.length) && (s[j] != null))
180 out.append(new Format("%-" + maxid + "s").form(printId(s[j]) + " "));
182 out.append(s[j].getSequenceAsString());
189 return out.toString();
193 public String print()
195 return print(getSeqsAsArray());