X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FClustalFile.java;h=a9295a3e4665a0fa409c9c81b344b221c7b3f544;hb=4d7f98a6dd54d9863ba449ec79dcd95d25ed863d;hp=b0d46b23079cc9ccf3111a133f8e177606aa148f;hpb=588042b69abf8e60bcc950b24c283933c7dd422f;p=jalview.git diff --git a/src/jalview/io/ClustalFile.java b/src/jalview/io/ClustalFile.java index b0d46b2..a9295a3 100755 --- a/src/jalview/io/ClustalFile.java +++ b/src/jalview/io/ClustalFile.java @@ -1,215 +1,271 @@ -/* -* Jalview - A Sequence Alignment Editor and Viewer -* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version 2 -* of the License, or (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program; if not, write to the Free Software -* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -*/ -package jalview.io; - -import jalview.datamodel.*; - -import jalview.util.*; - -import java.io.*; - -import java.util.*; - - -public class ClustalFile extends AlignFile { - Vector ids; - - public ClustalFile() { - } - - public ClustalFile(String inStr) { - super(inStr); - } - - public ClustalFile(String inFile, String type) throws IOException { - super(inFile, type); - } - - public void initData() { - super.initData(); - ids = new Vector(); - } - - public void parse() { - int i = 0; - boolean flag = false; - - Vector headers = new Vector(); - Hashtable seqhash = new Hashtable(); - - String line; - - try { - while ((line = nextLine()) != null) { - if (line.indexOf(" ") != 0) { - StringTokenizer str = new StringTokenizer(line, " "); - String id = ""; - - if (str.hasMoreTokens()) { - id = str.nextToken(); - - if (id.equals("CLUSTAL")) { - flag = true; - } else { - if (flag) { - StringBuffer tempseq; - - if (seqhash.containsKey(id)) { - tempseq = (StringBuffer) seqhash.get(id); - } else { - tempseq = new StringBuffer(); - seqhash.put(id, tempseq); - } - - if (!(headers.contains(id))) { - headers.addElement(id); - } - - if (str.hasMoreTokens()) { - tempseq.append(str.nextToken()); - } - } - } - } - } - } - } catch (IOException e) { - System.err.println("Exception parsing clustal file " + e); - e.printStackTrace(); - } - - if (flag) { - this.noSeqs = headers.size(); - - //Add sequences to the hash - for (i = 0; i < headers.size(); i++) { - int start = -1; - int end = -1; - - if (seqhash.get(headers.elementAt(i)) != null) { - if (maxLength < seqhash.get(headers.elementAt(i)).toString() - .length()) { - maxLength = seqhash.get(headers.elementAt(i)).toString() - .length(); - } - - String head = headers.elementAt(i).toString(); - start = 1; - end = seqhash.get(headers.elementAt(i)).toString().length(); - - if (head.indexOf("/") > 0) { - StringTokenizer st = new StringTokenizer(head, "/"); - - if (st.countTokens() == 2) { - ids.addElement(st.nextToken()); - - String tmp = st.nextToken(); - st = new StringTokenizer(tmp, "-"); - - if (st.countTokens() == 2) { - start = Integer.valueOf(st.nextToken()) - .intValue(); - end = Integer.valueOf(st.nextToken()).intValue(); - } - } else { - ids.addElement(headers.elementAt(i)); - } - } else { - ids.addElement(headers.elementAt(i)); - } - - Sequence newSeq = new Sequence(ids.elementAt(i).toString(), - seqhash.get(headers.elementAt(i).toString()) - .toString(), start, end); - - seqs.addElement(newSeq); - } else { - System.err.println( - "Clustal File Reader: Can't find sequence for " + - headers.elementAt(i)); - } - } - } - } - - public String print() { - return print(getSeqsAsArray()); - } - - public static String print(SequenceI[] s) { - StringBuffer out = new StringBuffer("CLUSTAL\n\n"); - - int max = 0; - int maxid = 0; - - int i = 0; - - while ((i < s.length) && (s[i] != null)) { - String tmp = s[i].getName() + "/" + s[i].getStart() + "-" + - s[i].getEnd(); - - if (s[i].getSequence().length() > max) { - max = s[i].getSequence().length(); - } - - if (tmp.length() > maxid) { - maxid = tmp.length(); - } - - i++; - } - - if (maxid < 15) { - maxid = 15; - } - - maxid++; - - int len = 60; - int nochunks = (max / len) + 1; - - for (i = 0; i < nochunks; i++) { - int j = 0; - - while ((j < s.length) && (s[j] != null)) { - out.append(new Format("%-" + maxid + "s").form(s[j].getName() + - "/" + s[j].getStart() + "-" + s[j].getEnd()) + " "); - - int start = i * len; - int end = start + len; - - if ((end < s[j].getSequence().length()) && - (start < s[j].getSequence().length())) { - out.append(s[j].getSequence().substring(start, end)); - } else { - if (start < s[j].getSequence().length()) { - out.append(s[j].getSequence().substring(start)); - } - } - - out.append("\n"); - j++; - } - - out.append("\n"); - } - - return out.toString(); - } -} +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.io; + +import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceI; +import jalview.util.Format; + +import java.io.IOException; +import java.util.Hashtable; +import java.util.StringTokenizer; +import java.util.Vector; + +public class ClustalFile extends AlignFile +{ + + public ClustalFile() + { + } + + public ClustalFile(String inFile, String type) throws IOException + { + super(inFile, type); + } + + public ClustalFile(FileParse source) throws IOException + { + super(source); + } + + public void initData() + { + super.initData(); + } + + public void parse() throws IOException + { + int i = 0; + boolean flag = false; + boolean rna = false; + boolean top = false; + StringBuffer pssecstr = new StringBuffer(), consstr = new StringBuffer(); + Vector headers = new Vector(); + Hashtable seqhash = new Hashtable(); + StringBuffer tempseq; + String line, id; + StringTokenizer str; + + try + { + while ((line = nextLine()) != null) + { + if (line.length() == 0) + { + top = true; + } + if (line.indexOf(" ") != 0) + { + str = new StringTokenizer(line, " "); + + if (str.hasMoreTokens()) + { + id = str.nextToken(); + + if (id.equalsIgnoreCase("CLUSTAL")) + { + flag = true; + } + else + { + if (flag) + { + if (seqhash.containsKey(id)) + { + tempseq = (StringBuffer) seqhash.get(id); + } + else + { + tempseq = new StringBuffer(); + seqhash.put(id, tempseq); + } + + if (!(headers.contains(id))) + { + headers.addElement(id); + } + + if (str.hasMoreTokens()) + { + tempseq.append(str.nextToken()); + } + top = false; + } + } + } + else + { + flag = true; + } + } + else + { + if (line.matches("\\s+(-|\\.|\\(|\\[|\\]|\\))+")) + { + if (top) + { + pssecstr.append(line.trim()); + } + else + { + consstr.append(line.trim()); + } + } + } + } + } catch (IOException e) + { + System.err.println("Exception parsing clustal file " + e); + e.printStackTrace(); + } + + if (flag) + { + this.noSeqs = headers.size(); + + // Add sequences to the hash + for (i = 0; i < headers.size(); i++) + { + if (seqhash.get(headers.elementAt(i)) != null) + { + if (maxLength < seqhash.get(headers.elementAt(i)).toString() + .length()) + { + maxLength = seqhash.get(headers.elementAt(i)).toString() + .length(); + } + + Sequence newSeq = parseId(headers.elementAt(i).toString()); + newSeq.setSequence(seqhash.get(headers.elementAt(i).toString()) + .toString()); + + seqs.addElement(newSeq); + } + else + { + System.err + .println("Clustal File Reader: Can't find sequence for " + + headers.elementAt(i)); + } + } + AlignmentAnnotation lastssa = null; + if (pssecstr.length() == maxLength) + { + Vector ss = new Vector(); + AlignmentAnnotation ssa = lastssa = StockholmFile + .parseAnnotationRow(ss, "secondary structure", + pssecstr.toString()); + ssa.label = "Secondary Structure"; + annotations.addElement(ssa); + } + if (consstr.length() == maxLength) + { + Vector ss = new Vector(); + AlignmentAnnotation ssa = StockholmFile.parseAnnotationRow(ss, + "secondary structure", consstr.toString()); + ssa.label = "Consensus Secondary Structure"; + if (lastssa == null + || !lastssa.getRNAStruc().equals( + ssa.getRNAStruc().replace('-', '.'))) + { + annotations.addElement(ssa); + } + } + } + } + + public String print() + { + return print(getSeqsAsArray()); + // TODO: locaRNA style aln output + } + + public String print(SequenceI[] s) + { + StringBuffer out = new StringBuffer("CLUSTAL" + newline + newline); + + int max = 0; + int maxid = 0; + + int i = 0; + + while ((i < s.length) && (s[i] != null)) + { + String tmp = printId(s[i]); + + if (s[i].getSequence().length > max) + { + max = s[i].getSequence().length; + } + + if (tmp.length() > maxid) + { + maxid = tmp.length(); + } + + i++; + } + + if (maxid < 15) + { + maxid = 15; + } + + maxid++; + + int len = 60; + int nochunks = (max / len) + 1; + + for (i = 0; i < nochunks; i++) + { + int j = 0; + + while ((j < s.length) && (s[j] != null)) + { + out.append(new Format("%-" + maxid + "s").form(printId(s[j]) + " ")); + + int start = i * len; + int end = start + len; + + if ((end < s[j].getSequence().length) + && (start < s[j].getSequence().length)) + { + out.append(s[j].getSequenceAsString(start, end)); + } + else + { + if (start < s[j].getSequence().length) + { + out.append(s[j].getSequenceAsString().substring(start)); + } + } + + out.append(newline); + j++; + } + + out.append(newline); + } + + return out.toString(); + } +}