X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FClustalFile.java;h=c5581867c48d1934c4c4b8cfaede90e62de08147;hb=refs%2Fheads%2Fpatch%2FJAL-1236_customClustalHeader;hp=d62d185d9afda45c67c1e5efa4b0f419490de4fa;hpb=3a993bbe274824870c78bd7695c42fa93908cb30;p=jalview.git diff --git a/src/jalview/io/ClustalFile.java b/src/jalview/io/ClustalFile.java index d62d185..c558186 100755 --- a/src/jalview/io/ClustalFile.java +++ b/src/jalview/io/ClustalFile.java @@ -1,75 +1,103 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer - * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.io; -import java.io.*; -import java.util.*; +import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceI; +import jalview.util.Format; -import jalview.datamodel.*; -import jalview.util.*; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.StringTokenizer; +import java.util.Vector; -public class ClustalFile - extends AlignFile +public class ClustalFile extends AlignFile { public ClustalFile() { + _initHeader(); } - public ClustalFile(String inFile, String type) - throws IOException + public ClustalFile(String inFile, DataSourceType sourceType) + throws IOException { - super(inFile, type); + super(inFile, sourceType); + _initHeader(); } + public ClustalFile(FileParse source) throws IOException { super(source); + _initHeader(); + } + + private void _initHeader() + { + try { + clustalHeader = jalview.bin.Cache.getDefault("CLUSTAL_HEADER","CLUSTAL"); + } catch (Error e) {}; + } + + @Override public void initData() { super.initData(); } - public void parse() - throws IOException + @Override + public void parse() throws IOException { int i = 0; boolean flag = false; - - Vector headers = new Vector(); - Hashtable seqhash = new Hashtable(); + boolean top = false; + StringBuffer pssecstr = new StringBuffer(); + StringBuffer consstr = new StringBuffer(); + Vector headers = new Vector<>(); + Map seqhash = new HashMap<>(); StringBuffer tempseq; String line, id; StringTokenizer str; try { - while ( (line = nextLine()) != null) + while ((line = nextLine()) != null) { - if (line.indexOf(" ") != 0) + if (line.length() == 0) + { + top = true; + } + boolean isConservation = line.startsWith(SPACE) + || line.startsWith(TAB); + if (!isConservation) { - str = new StringTokenizer(line, " "); + str = new StringTokenizer(line); if (str.hasMoreTokens()) { id = str.nextToken(); - + // TODO: JAL-1236 other tokens may be indicative of a header for Clustal format if (id.equalsIgnoreCase("CLUSTAL")) { flag = true; @@ -80,7 +108,7 @@ public class ClustalFile { if (seqhash.containsKey(id)) { - tempseq = (StringBuffer) seqhash.get(id); + tempseq = seqhash.get(id); } else { @@ -88,7 +116,7 @@ public class ClustalFile seqhash.put(id, tempseq); } - if (! (headers.contains(id))) + if (!(headers.contains(id))) { headers.addElement(id); } @@ -97,6 +125,7 @@ public class ClustalFile { tempseq.append(str.nextToken()); } + top = false; } } } @@ -105,9 +134,22 @@ public class ClustalFile flag = true; } } + else + { + if (line.matches("\\s+(-|\\.|\\(|\\[|\\]|\\))+")) + { + if (top) + { + pssecstr.append(line.trim()); + } + else + { + consstr.append(line.trim()); + } + } + } } - } - catch (IOException e) + } catch (IOException e) { System.err.println("Exception parsing clustal file " + e); e.printStackTrace(); @@ -117,56 +159,74 @@ public class ClustalFile { this.noSeqs = headers.size(); - //Add sequences to the hash + // Add sequences to the hash for (i = 0; i < headers.size(); i++) { if (seqhash.get(headers.elementAt(i)) != null) { if (maxLength < seqhash.get(headers.elementAt(i)).toString() - .length()) + .length()) { maxLength = seqhash.get(headers.elementAt(i)).toString() - .length(); + .length(); } Sequence newSeq = parseId(headers.elementAt(i).toString()); - newSeq.setSequence(seqhash.get(headers.elementAt(i).toString()). - toString()); + newSeq.setSequence( + seqhash.get(headers.elementAt(i).toString()).toString()); seqs.addElement(newSeq); } else { - System.err.println( - "Clustal File Reader: Can't find sequence for " + - headers.elementAt(i)); + System.err.println("Clustal File Reader: Can't find sequence for " + + headers.elementAt(i)); + } + } + AlignmentAnnotation lastssa = null; + if (pssecstr.length() == maxLength) + { + Vector ss = new Vector<>(); + AlignmentAnnotation ssa = lastssa = StockholmFile + .parseAnnotationRow(ss, "secondary structure", + pssecstr.toString()); + ssa.label = "Secondary Structure"; + annotations.addElement(ssa); + } + if (consstr.length() == maxLength) + { + Vector ss = new Vector<>(); + AlignmentAnnotation ssa = StockholmFile.parseAnnotationRow(ss, + "secondary structure", consstr.toString()); + ssa.label = "Consensus Secondary Structure"; + if (lastssa == null || !lastssa.getRNAStruc() + .equals(ssa.getRNAStruc().replace('-', '.'))) + { + annotations.addElement(ssa); } } } } - - public String print() - { - return print(getSeqsAsArray()); - } - - public String print(SequenceI[] s) + /** + * clustal header - customise if needed + */ + public String clustalHeader = "CLUSTAL"; + + @Override + public String print(SequenceI[] s, boolean jvsuffix) { - StringBuffer out = new StringBuffer("CLUSTAL\n\n"); + StringBuffer out = new StringBuffer(clustalHeader + newline + newline); int max = 0; int maxid = 0; int i = 0; - while ( (i < s.length) && (s[i] != null)) + while ((i < s.length) && (s[i] != null)) { - String tmp = printId(s[i]); + String tmp = printId(s[i], jvsuffix); - if (s[i].getSequence().length > max) - { - max = s[i].getSequence().length; - } + max = Math.max(max, s[i].getLength()); if (tmp.length() > maxid) { @@ -184,37 +244,38 @@ public class ClustalFile maxid++; int len = 60; - int nochunks = (max / len) + 1; + int nochunks = (max / len) + (max % len > 0 ? 1 : 0); for (i = 0; i < nochunks; i++) { int j = 0; - while ( (j < s.length) && (s[j] != null)) + while ((j < s.length) && (s[j] != null)) { - out.append(new Format("%-" + maxid + "s").form(printId(s[j]) + " ")); + out.append(new Format("%-" + maxid + "s") + .form(printId(s[j], jvsuffix) + " ")); - int start = i * len; - int end = start + len; + int chunkStart = i * len; + int chunkEnd = chunkStart + len; - if ( (end < s[j].getSequence().length) && - (start < s[j].getSequence().length)) + int length = s[j].getLength(); + if ((chunkEnd < length) && (chunkStart < length)) { - out.append(s[j].getSequenceAsString(start, end)); + out.append(s[j].getSequenceAsString(chunkStart, chunkEnd)); } else { - if (start < s[j].getSequence().length) + if (chunkStart < length) { - out.append(s[j].getSequenceAsString().substring(start)); + out.append(s[j].getSequenceAsString().substring(chunkStart)); } } - out.append("\n"); + out.append(newline); j++; } - out.append("\n"); + out.append(newline); } return out.toString();