/* * Jalview - A Sequence Alignment Editor and Viewer * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ package jalview.io; import java.io.*; import java.util.*; import jalview.datamodel.*; import jalview.util.*; public class PfamFile extends AlignFile { Vector ids; public PfamFile() { } public PfamFile(String inStr) { super(inStr); } public PfamFile(String inFile, String type) throws IOException { super(inFile, type); } public void initData() { super.initData(); ids = new Vector(); } public void parse() throws IOException { int i = 0; String line; Hashtable seqhash = new Hashtable(); Vector headers = new Vector(); while ( (line = nextLine()) != null) { if (line.indexOf(" ") != 0) { if (line.indexOf("#") != 0) { StringTokenizer str = new StringTokenizer(line, " "); String id = ""; if (str.hasMoreTokens()) { id = str.nextToken(); StringBuffer tempseq; if (seqhash.containsKey(id)) { tempseq = (StringBuffer) seqhash.get(id); } else { tempseq = new StringBuffer(); seqhash.put(id, tempseq); } if (! (headers.contains(id))) { headers.addElement(id); } tempseq.append(str.nextToken()); } } } } this.noSeqs = headers.size(); if (noSeqs < 1) { throw new IOException("No sequences found (PFAM input)"); } for (i = 0; i < headers.size(); i++) { if (seqhash.get(headers.elementAt(i)) != null) { if (maxLength < seqhash.get(headers.elementAt(i)).toString() .length()) { maxLength = seqhash.get(headers.elementAt(i)).toString() .length(); } String head = headers.elementAt(i).toString(); int start = 1; int end = seqhash.get(headers.elementAt(i)).toString().length(); if (head.indexOf("/") > 0) { StringTokenizer st = new StringTokenizer(head, "/"); if (st.countTokens() == 2) { ids.addElement(st.nextToken()); String tmp = st.nextToken(); st = new StringTokenizer(tmp, "-"); if (st.countTokens() == 2) { start = Integer.valueOf(st.nextToken()).intValue(); end = Integer.valueOf(st.nextToken()).intValue(); } else { start = -1; end = -1; } } else { ids.addElement(headers.elementAt(i)); } } else { ids.addElement(headers.elementAt(i)); } Sequence newSeq = null; if ( (start != -1) && (end != -1)) { newSeq = new Sequence(ids.elementAt(i).toString(), seqhash.get(headers.elementAt(i).toString()) .toString(), start, end); seqs.addElement(newSeq); } else { newSeq = new Sequence(ids.elementAt(i).toString(), seqhash.get(headers.elementAt(i).toString()) .toString(), 1, seqhash.get(headers.elementAt(i).toString()) .toString().length()); seqs.addElement(newSeq); } if (!isValidProteinSequence(newSeq.getSequence())) { throw new IOException( "Not a valid protein sequence - (PFAM input)"); } } else { System.err.println("PFAM File reader: Can't find sequence for " + headers.elementAt(i)); } } } public static String print(SequenceI[] s) { StringBuffer out = new StringBuffer(""); int max = 0; int maxid = 0; int i = 0; while ( (i < s.length) && (s[i] != null)) { String tmp = s[i].getName() + "/" + s[i].getStart() + "-" + s[i].getEnd(); if (s[i].getSequence().length() > max) { max = s[i].getSequence().length(); } if (tmp.length() > maxid) { maxid = tmp.length(); } i++; } if (maxid < 15) { maxid = 15; } int j = 0; while ( (j < s.length) && (s[j] != null)) { out.append(new Format("%-" + maxid + "s").form(s[j].getName() + "/" + s[j].getStart() + "-" + s[j].getEnd()) + " "); out.append(s[j].getSequence() + "\n"); j++; } out.append("\n"); return out.toString(); } public String print() { return print(getSeqsAsArray()); } }