X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FPfamFile.java;h=f4d61bc43e03d5da32856a642d1e479bd168e028;hb=0e2054d29bc49351f000d478659dc3c4371b251c;hp=a6c7a4f3e4edbe66718502f825a7ab6adaf27a40;hpb=efc31b4a8d5cee63555586804a2b79c06bdb5a14;p=jalview.git diff --git a/src/jalview/io/PfamFile.java b/src/jalview/io/PfamFile.java index a6c7a4f..f4d61bc 100755 --- a/src/jalview/io/PfamFile.java +++ b/src/jalview/io/PfamFile.java @@ -1,237 +1,179 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer - * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ -package jalview.io; - -import java.io.*; -import java.util.*; - -import jalview.datamodel.*; -import jalview.util.*; - -public class PfamFile - extends AlignFile -{ - Vector ids; - - public PfamFile() - { - } - - public PfamFile(String inStr) - { - super(inStr); - } - - public PfamFile(String inFile, String type) - throws IOException - { - super(inFile, type); - } - - public void initData() - { - super.initData(); - ids = new Vector(); - } - - public void parse() - throws IOException - { - int i = 0; - String line; - - Hashtable seqhash = new Hashtable(); - Vector headers = new Vector(); - - while ( (line = nextLine()) != null) - { - if (line.indexOf(" ") != 0) - { - if (line.indexOf("#") != 0) - { - StringTokenizer str = new StringTokenizer(line, " "); - String id = ""; - - if (str.hasMoreTokens()) - { - id = str.nextToken(); - - StringBuffer tempseq; - - if (seqhash.containsKey(id)) - { - tempseq = (StringBuffer) seqhash.get(id); - } - else - { - tempseq = new StringBuffer(); - seqhash.put(id, tempseq); - } - - if (! (headers.contains(id))) - { - headers.addElement(id); - } - - tempseq.append(str.nextToken()); - } - } - } - } - - this.noSeqs = headers.size(); - - if (noSeqs < 1) - { - throw new IOException("No sequences found (PFAM input)"); - } - - for (i = 0; i < headers.size(); i++) - { - if (seqhash.get(headers.elementAt(i)) != null) - { - if (maxLength < seqhash.get(headers.elementAt(i)).toString() - .length()) - { - maxLength = seqhash.get(headers.elementAt(i)).toString() - .length(); - } - - String head = headers.elementAt(i).toString(); - int start = 1; - int end = seqhash.get(headers.elementAt(i)).toString().length(); - - if (head.indexOf("/") > 0) - { - StringTokenizer st = new StringTokenizer(head, "/"); - - if (st.countTokens() == 2) - { - ids.addElement(st.nextToken()); - - String tmp = st.nextToken(); - st = new StringTokenizer(tmp, "-"); - - if (st.countTokens() == 2) - { - start = Integer.valueOf(st.nextToken()).intValue(); - end = Integer.valueOf(st.nextToken()).intValue(); - } - else - { - start = -1; - end = -1; - } - } - else - { - ids.addElement(headers.elementAt(i)); - } - } - else - { - ids.addElement(headers.elementAt(i)); - } - - Sequence newSeq = null; - - if ( (start != -1) && (end != -1)) - { - newSeq = new Sequence(ids.elementAt(i).toString(), - seqhash.get(headers.elementAt(i).toString()) - .toString(), start, end); - seqs.addElement(newSeq); - } - else - { - newSeq = new Sequence(ids.elementAt(i).toString(), - seqhash.get(headers.elementAt(i).toString()) - .toString(), 1, - seqhash.get(headers.elementAt(i).toString()) - .toString().length()); - seqs.addElement(newSeq); - } - - if (!isValidProteinSequence(newSeq.getSequence())) - { - throw new IOException( - "Not a valid protein sequence - (PFAM input)"); - } - } - else - { - System.err.println("PFAM File reader: Can't find sequence for " + - headers.elementAt(i)); - } - } - } - - public static String print(SequenceI[] s) - { - StringBuffer out = new StringBuffer(""); - - int max = 0; - int maxid = 0; - - int i = 0; - - while ( (i < s.length) && (s[i] != null)) - { - String tmp = s[i].getName() + "/" + s[i].getStart() + "-" + - s[i].getEnd(); - - if (s[i].getSequence().length() > max) - { - max = s[i].getSequence().length(); - } - - if (tmp.length() > maxid) - { - maxid = tmp.length(); - } - - i++; - } - - if (maxid < 15) - { - maxid = 15; - } - - int j = 0; - - while ( (j < s.length) && (s[j] != null)) - { - out.append(new Format("%-" + maxid + "s").form(s[j].getName() + - "/" + s[j].getStart() + "-" + s[j].getEnd()) + " "); - - out.append(s[j].getSequence() + "\n"); - j++; - } - - out.append("\n"); - - return out.toString(); - } - - public String print() - { - return print(getSeqsAsArray()); - } -} +/* + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.1) + * Copyright (C) 2014 The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.io; + +import java.io.*; +import java.util.*; + + +import jalview.datamodel.*; +import jalview.util.*; + +public class PfamFile extends AlignFile +{ + + public PfamFile() + { + } + + public PfamFile(String inFile, String type) throws IOException + { + super(inFile, type); + } + + public PfamFile(FileParse source) throws IOException + { + super(source); + } + + public void initData() + { + super.initData(); + } + + public void parse() throws IOException + { + int i = 0; + String line; + + Hashtable seqhash = new Hashtable(); + Vector headers = new Vector(); + + while ((line = nextLine()) != null) + { + if (line.indexOf(" ") != 0) + { + if (line.indexOf("#") != 0) + { + // TODO: verify pfam format requires spaces and not tab characters - + // if not upgrade to use stevesoft regex and look for whitespace. + StringTokenizer str = new StringTokenizer(line, " "); + String id = ""; + + if (str.hasMoreTokens()) + { + id = str.nextToken(); + + StringBuffer tempseq; + + if (seqhash.containsKey(id)) + { + tempseq = (StringBuffer) seqhash.get(id); + } + else + { + tempseq = new StringBuffer(); + seqhash.put(id, tempseq); + } + + if (!(headers.contains(id))) + { + headers.addElement(id); + } + if (str.hasMoreTokens()) + { + tempseq.append(str.nextToken()); + } + } + } + } + } + + this.noSeqs = headers.size(); + + if (noSeqs < 1) + { + throw new IOException("No sequences found (PFAM input)"); + } + + for (i = 0; i < headers.size(); i++) + { + if (seqhash.get(headers.elementAt(i)) != null) + { + if (maxLength < seqhash.get(headers.elementAt(i)).toString() + .length()) + { + maxLength = seqhash.get(headers.elementAt(i)).toString().length(); + } + + Sequence newSeq = parseId(headers.elementAt(i).toString()); + newSeq.setSequence(seqhash.get(headers.elementAt(i).toString()) + .toString()); + seqs.addElement(newSeq); + } + else + { + System.err.println("PFAM File reader: Can't find sequence for " + + headers.elementAt(i)); + } + } + } + + public String print(SequenceI[] s) + { + StringBuffer out = new StringBuffer(""); + + int max = 0; + int maxid = 0; + + int i = 0; + + while ((i < s.length) && (s[i] != null)) + { + String tmp = printId(s[i]); + + if (s[i].getSequence().length > max) + { + max = s[i].getSequence().length; + } + + if (tmp.length() > maxid) + { + maxid = tmp.length(); + } + + i++; + } + + if (maxid < 15) + { + maxid = 15; + } + + int j = 0; + + while ((j < s.length) && (s[j] != null)) + { + out.append(new Format("%-" + maxid + "s").form(printId(s[j]) + " ")); + + out.append(s[j].getSequenceAsString()); + out.append(newline); + j++; + } + + out.append(newline); + + return out.toString(); + } + + public String print() + { + return print(getSeqsAsArray()); + } +}