2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
3 * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
23 import javax.xml.parsers.ParserConfigurationException;
25 import org.xml.sax.SAXException;
27 import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax;
28 import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed;
29 import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied;
30 import fr.orsay.lri.varna.exceptions.ExceptionUnmatchedClosingParentheses;
32 import jalview.datamodel.*;
33 import jalview.util.*;
35 public class PfamFile extends AlignFile
42 public PfamFile(String inFile, String type) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed, InterruptedException, ExceptionUnmatchedClosingParentheses
47 public PfamFile(FileParse source) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed, InterruptedException, ExceptionUnmatchedClosingParentheses
52 public void initData()
57 public void parse() throws IOException
62 Hashtable seqhash = new Hashtable();
63 Vector headers = new Vector();
65 while ((line = nextLine()) != null)
67 if (line.indexOf(" ") != 0)
69 if (line.indexOf("#") != 0)
71 // TODO: verify pfam format requires spaces and not tab characters -
72 // if not upgrade to use stevesoft regex and look for whitespace.
73 StringTokenizer str = new StringTokenizer(line, " ");
76 if (str.hasMoreTokens())
82 if (seqhash.containsKey(id))
84 tempseq = (StringBuffer) seqhash.get(id);
88 tempseq = new StringBuffer();
89 seqhash.put(id, tempseq);
92 if (!(headers.contains(id)))
94 headers.addElement(id);
96 if (str.hasMoreTokens())
98 tempseq.append(str.nextToken());
105 this.noSeqs = headers.size();
109 throw new IOException("No sequences found (PFAM input)");
112 for (i = 0; i < headers.size(); i++)
114 if (seqhash.get(headers.elementAt(i)) != null)
116 if (maxLength < seqhash.get(headers.elementAt(i)).toString()
119 maxLength = seqhash.get(headers.elementAt(i)).toString().length();
122 Sequence newSeq = parseId(headers.elementAt(i).toString());
123 newSeq.setSequence(seqhash.get(headers.elementAt(i).toString())
125 seqs.addElement(newSeq);
129 System.err.println("PFAM File reader: Can't find sequence for "
130 + headers.elementAt(i));
135 public String print(SequenceI[] s)
137 StringBuffer out = new StringBuffer("");
144 while ((i < s.length) && (s[i] != null))
146 String tmp = printId(s[i]);
148 if (s[i].getSequence().length > max)
150 max = s[i].getSequence().length;
153 if (tmp.length() > maxid)
155 maxid = tmp.length();
168 while ((j < s.length) && (s[j] != null))
170 out.append(new Format("%-" + maxid + "s").form(printId(s[j]) + " "));
172 out.append(s[j].getSequenceAsString());
179 return out.toString();
182 public String print()
184 return print(getSeqsAsArray());