-/*
- * Created on 17-May-2005
- * Slurped into VamsasClient object set on 12th Jan 2006
- *
- * TODO To change the template for this generated file go to
- * Window - Preferences - Java - Code Style - Code Templates
- */
-package uk.ac.vamsas.objects.utils;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
-import java.util.Hashtable;
-import java.util.Vector;
-import java.util.regex.Pattern;
-
-import uk.ac.vamsas.objects.core.*;
-
-/**
- * @author jimp
- *
- * TODO To change the template for this generated type comment go to
- * Window - Preferences - Java - Code Style - Code Templates
- */
-public class SeqSet {
-
- public static void write_Fasta(OutputStream os, SequenceType[] seqs) throws IOException {
- write_Fasta(os, seqs, 80);
- }
-
- public static void write_Fasta(OutputStream os, SequenceType[] seqs, boolean width80) throws IOException {
- write_Fasta(os, seqs, (width80) ? 80 : 0);
- }
-
- public static void write_Fasta(OutputStream os, SequenceType[] seqs, int width) throws IOException {
- int i, nseq = seqs.length;
- BufferedWriter fasta_out = new BufferedWriter(new OutputStreamWriter(os));
- System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
- for (i=0; i<nseq; i++) {
- fasta_out.write(">"+seqs[i].getName()+"\n");
- if (width<=0) {
- fasta_out.write(seqs[i].getSequence()+"\n");
- } else {
- // TODO: adapt to SymbolDictionary labelwidths
- String tempseq = seqs[i].getSequence();
- int j=0, k=tempseq.length();
- while (j<k) {
- int d = k-j;
- if (d>=width) {
- fasta_out.write(tempseq, j, width);
- } else {
- fasta_out.write(tempseq, j, d);
- }
- fasta_out.write("\n");
- j+=width;
- }
- }
- }
- fasta_out.flush();
- }
- /**
- * TODO: introduce a dictionary parameter for qualified sequence symbols
- * Reads a sequence set from a stream - will only read prescribed amino acid
- * symbols.
- * @param os
- * @return
- * @throws IOException
- */
- public static Sequence[] read_SeqFasta(InputStream os) throws IOException {
- Vector seqs = new Vector();
- int nseq = 0;
- BufferedReader infasta = new BufferedReader(new InputStreamReader(os));
- System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
- // TODO: decide on return type - SequenceType is a partly complete vamsas Vobject - either for a dataset or alignment sequence
- // so could go in either!
- String line;
- Sequence seq = null;
- Pattern aaMatch = Pattern.compile("[ARNDCQEGHILKMFPSTUWYV]", Pattern.CASE_INSENSITIVE);
- String sname = "", seqstr=null;
- do {
- line = infasta.readLine();
- if (line==null || line.startsWith(">")) {
- if (seqstr!=null)
- seqs.add((Object) Seq.newSequence(sname.substring(1), seqstr, SymbolDictionary.STANDARD_AA, 0,0));
- sname = line; // remove >
- seqstr="";
- } else {
- String subseq = Pattern.compile("//s+").matcher(line).replaceAll("");
- seqstr += subseq;
- }
- } while (line!=null);
- nseq = seqs.size();
- if (nseq>0) {
- // TODO:POSS: should really return a sequence if there's only one in the file.
- Sequence[] seqset = new Sequence[nseq];
- for (int i=0; i<nseq; i++) {
- seqset[i] = (Sequence) seqs.elementAt(i);
- }
- return seqset;
- }
-
- return null;
- }
-
- public static Hashtable uniquify(SequenceType[] sequences) {
- System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
- // TODO: do we need this with vamsas sequences ?
- // Generate a safely named sequence set and a hash to recover the sequence names
- Hashtable map = new Hashtable();
- for (int i = 0; i < sequences.length; i++) {
- String safename = new String("Sequence" + i);
- map.put(safename, sequences[i].getName());
- sequences[i].setName(safename);
- }
- return map;
- }
-
- public static boolean deuniquify(Hashtable map, SequenceType[] sequences) {
- System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
- // TODO: do we need this with vamsas sequences ?
- // recover unsafe sequence names for a sequence set
- boolean allfound = true;
- for (int i = 0; i < sequences.length; i++) {
- if (map.containsKey(sequences[i].getName())) {
- String unsafename = (String) map.get(sequences[i].getName());
- sequences[i].setName(unsafename);
- } else {
- allfound = false;
- }
- }
- return allfound;
- }
-
-}
+/*\r
+ * This file is part of the Vamsas Client version 0.1. \r
+ * Copyright 2009 by Jim Procter, Iain Milne, Pierre Marguerite, \r
+ * Andrew Waterhouse and Dominik Lindner.\r
+ * \r
+ * Earlier versions have also been incorporated into Jalview version 2.4 \r
+ * since 2008, and TOPALi version 2 since 2007.\r
+ * \r
+ * The Vamsas Client is free software: you can redistribute it and/or modify\r
+ * it under the terms of the GNU Lesser General Public License as published by\r
+ * the Free Software Foundation, either version 3 of the License, or\r
+ * (at your option) any later version.\r
+ * \r
+ * The Vamsas Client is distributed in the hope that it will be useful,\r
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+ * GNU Lesser General Public License for more details.\r
+ * \r
+ * You should have received a copy of the GNU Lesser General Public License\r
+ * along with the Vamsas Client. If not, see <http://www.gnu.org/licenses/>.\r
+ */\r
+package uk.ac.vamsas.objects.utils;\r
+\r
+import java.io.BufferedReader;\r
+import java.io.BufferedWriter;\r
+import java.io.IOException;\r
+import java.io.InputStream;\r
+import java.io.InputStreamReader;\r
+import java.io.OutputStream;\r
+import java.io.OutputStreamWriter;\r
+import java.util.Hashtable;\r
+import java.util.Vector;\r
+import java.util.regex.Pattern;\r
+\r
+import uk.ac.vamsas.objects.core.*;\r
+\r
+/**\r
+ * @author jimp\r
+ * \r
+ * TODO To change the template for this generated type comment go to\r
+ * Window - Preferences - Java - Code Style - Code Templates\r
+ */\r
+public class SeqSet {\r
+\r
+ public static void write_Fasta(OutputStream os, SequenceType[] seqs)\r
+ throws IOException {\r
+ write_Fasta(os, seqs, 80);\r
+ }\r
+\r
+ public static void write_Fasta(OutputStream os, SequenceType[] seqs,\r
+ boolean width80) throws IOException {\r
+ write_Fasta(os, seqs, (width80) ? 80 : 0);\r
+ }\r
+\r
+ public static void write_Fasta(OutputStream os, SequenceType[] seqs, int width)\r
+ throws IOException {\r
+ int i, nseq = seqs.length;\r
+ BufferedWriter fasta_out = new BufferedWriter(new OutputStreamWriter(os));\r
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this\r
+ // method\r
+ for (i = 0; i < nseq; i++) {\r
+ fasta_out.write(">" + seqs[i].getName() + "\n");\r
+ if (width <= 0) {\r
+ fasta_out.write(seqs[i].getSequence() + "\n");\r
+ } else {\r
+ // TODO: adapt to SymbolDictionary labelwidths\r
+ String tempseq = seqs[i].getSequence();\r
+ int j = 0, k = tempseq.length();\r
+ while (j < k) {\r
+ int d = k - j;\r
+ if (d >= width) {\r
+ fasta_out.write(tempseq, j, width);\r
+ } else {\r
+ fasta_out.write(tempseq, j, d);\r
+ }\r
+ fasta_out.write("\n");\r
+ j += width;\r
+ }\r
+ }\r
+ }\r
+ fasta_out.flush();\r
+ }\r
+\r
+ /**\r
+ * TODO: introduce a dictionary parameter for qualified sequence symbols Reads\r
+ * a sequence set from a stream - will only read prescribed amino acid\r
+ * symbols.\r
+ * \r
+ * @param os\r
+ * @return\r
+ * @throws IOException\r
+ */\r
+ public static Sequence[] read_SeqFasta(InputStream os) throws IOException {\r
+ Vector seqs = new Vector();\r
+ int nseq = 0;\r
+ BufferedReader infasta = new BufferedReader(new InputStreamReader(os));\r
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this\r
+ // method\r
+ // TODO: decide on return type - SequenceType is a partly complete vamsas\r
+ // Vobject - either for a dataset or alignment sequence\r
+ // so could go in either!\r
+ String line;\r
+ Sequence seq = null;\r
+ Pattern aaMatch = Pattern.compile("[ARNDCQEGHILKMFPSTUWYV]",\r
+ Pattern.CASE_INSENSITIVE);\r
+ String sname = "", seqstr = null;\r
+ do {\r
+ line = infasta.readLine();\r
+ if (line == null || line.startsWith(">")) {\r
+ if (seqstr != null)\r
+ seqs.add((Object) Seq.newSequence(sname.substring(1), seqstr,\r
+ SymbolDictionary.STANDARD_AA, 0, 0));\r
+ sname = line; // remove >\r
+ seqstr = "";\r
+ } else {\r
+ String subseq = Pattern.compile("//s+").matcher(line).replaceAll("");\r
+ seqstr += subseq;\r
+ }\r
+ } while (line != null);\r
+ nseq = seqs.size();\r
+ if (nseq > 0) {\r
+ // TODO:POSS: should really return a sequence if there's only one in the\r
+ // file.\r
+ Sequence[] seqset = new Sequence[nseq];\r
+ for (int i = 0; i < nseq; i++) {\r
+ seqset[i] = (Sequence) seqs.elementAt(i);\r
+ }\r
+ return seqset;\r
+ }\r
+\r
+ return null;\r
+ }\r
+\r
+ public static Hashtable uniquify(SequenceType[] sequences) {\r
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this\r
+ // method\r
+ // TODO: do we need this with vamsas sequences ?\r
+ // Generate a safely named sequence set and a hash to recover the sequence\r
+ // names\r
+ Hashtable map = new Hashtable();\r
+ for (int i = 0; i < sequences.length; i++) {\r
+ String safename = new String("Sequence" + i);\r
+ map.put(safename, sequences[i].getName());\r
+ sequences[i].setName(safename);\r
+ }\r
+ return map;\r
+ }\r
+\r
+ public static boolean deuniquify(Hashtable map, SequenceType[] sequences) {\r
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this\r
+ // method\r
+ // TODO: do we need this with vamsas sequences ?\r
+ // recover unsafe sequence names for a sequence set\r
+ boolean allfound = true;\r
+ for (int i = 0; i < sequences.length; i++) {\r
+ if (map.containsKey(sequences[i].getName())) {\r
+ String unsafename = (String) map.get(sequences[i].getName());\r
+ sequences[i].setName(unsafename);\r
+ } else {\r
+ allfound = false;\r
+ }\r
+ }\r
+ return allfound;\r
+ }\r
+\r
+}\r