2 * This file is part of the Vamsas Client version 0.2.
\r
3 * Copyright 2010 by Jim Procter, Iain Milne, Pierre Marguerite,
\r
4 * Andrew Waterhouse and Dominik Lindner.
\r
6 * Earlier versions have also been incorporated into Jalview version 2.4
\r
7 * since 2008, and TOPALi version 2 since 2007.
\r
9 * The Vamsas Client is free software: you can redistribute it and/or modify
\r
10 * it under the terms of the GNU Lesser General Public License as published by
\r
11 * the Free Software Foundation, either version 3 of the License, or
\r
12 * (at your option) any later version.
\r
14 * The Vamsas Client is distributed in the hope that it will be useful,
\r
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
17 * GNU Lesser General Public License for more details.
\r
19 * You should have received a copy of the GNU Lesser General Public License
\r
20 * along with the Vamsas Client. If not, see <http://www.gnu.org/licenses/>.
\r
22 package uk.ac.vamsas.objects.utils;
\r
24 import java.io.BufferedReader;
\r
25 import java.io.BufferedWriter;
\r
26 import java.io.IOException;
\r
27 import java.io.InputStream;
\r
28 import java.io.InputStreamReader;
\r
29 import java.io.OutputStream;
\r
30 import java.io.OutputStreamWriter;
\r
31 import java.util.Hashtable;
\r
32 import java.util.Vector;
\r
33 import java.util.regex.Pattern;
\r
35 import uk.ac.vamsas.objects.core.*;
\r
40 * TODO To change the template for this generated type comment go to
\r
41 * Window - Preferences - Java - Code Style - Code Templates
\r
43 public class SeqSet {
\r
45 public static void write_Fasta(OutputStream os, SequenceType[] seqs)
\r
46 throws IOException {
\r
47 write_Fasta(os, seqs, 80);
\r
50 public static void write_Fasta(OutputStream os, SequenceType[] seqs,
\r
51 boolean width80) throws IOException {
\r
52 write_Fasta(os, seqs, (width80) ? 80 : 0);
\r
55 public static void write_Fasta(OutputStream os, SequenceType[] seqs, int width)
\r
56 throws IOException {
\r
57 int i, nseq = seqs.length;
\r
58 BufferedWriter fasta_out = new BufferedWriter(new OutputStreamWriter(os));
\r
59 System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this
\r
61 for (i = 0; i < nseq; i++) {
\r
62 fasta_out.write(">" + seqs[i].getName() + "\n");
\r
64 fasta_out.write(seqs[i].getSequence() + "\n");
\r
66 // TODO: adapt to SymbolDictionary labelwidths
\r
67 String tempseq = seqs[i].getSequence();
\r
68 int j = 0, k = tempseq.length();
\r
72 fasta_out.write(tempseq, j, width);
\r
74 fasta_out.write(tempseq, j, d);
\r
76 fasta_out.write("\n");
\r
85 * TODO: introduce a dictionary parameter for qualified sequence symbols Reads
\r
86 * a sequence set from a stream - will only read prescribed amino acid
\r
91 * @throws IOException
\r
93 public static Sequence[] read_SeqFasta(InputStream os) throws IOException {
\r
94 Vector seqs = new Vector();
\r
96 BufferedReader infasta = new BufferedReader(new InputStreamReader(os));
\r
97 System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this
\r
99 // TODO: decide on return type - SequenceType is a partly complete vamsas
\r
100 // Vobject - either for a dataset or alignment sequence
\r
101 // so could go in either!
\r
103 Sequence seq = null;
\r
104 Pattern aaMatch = Pattern.compile("[ARNDCQEGHILKMFPSTUWYV]",
\r
105 Pattern.CASE_INSENSITIVE);
\r
106 String sname = "", seqstr = null;
\r
108 line = infasta.readLine();
\r
109 if (line == null || line.startsWith(">")) {
\r
110 if (seqstr != null)
\r
111 seqs.add((Object) Seq.newSequence(sname.substring(1), seqstr,
\r
112 SymbolDictionary.STANDARD_AA, 0, 0));
\r
113 sname = line; // remove >
\r
116 String subseq = Pattern.compile("//s+").matcher(line).replaceAll("");
\r
119 } while (line != null);
\r
120 nseq = seqs.size();
\r
122 // TODO:POSS: should really return a sequence if there's only one in the
\r
124 Sequence[] seqset = new Sequence[nseq];
\r
125 for (int i = 0; i < nseq; i++) {
\r
126 seqset[i] = (Sequence) seqs.elementAt(i);
\r
134 public static Hashtable uniquify(SequenceType[] sequences) {
\r
135 System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this
\r
137 // TODO: do we need this with vamsas sequences ?
\r
138 // Generate a safely named sequence set and a hash to recover the sequence
\r
140 Hashtable map = new Hashtable();
\r
141 for (int i = 0; i < sequences.length; i++) {
\r
142 String safename = new String("Sequence" + i);
\r
143 map.put(safename, sequences[i].getName());
\r
144 sequences[i].setName(safename);
\r
149 public static boolean deuniquify(Hashtable map, SequenceType[] sequences) {
\r
150 System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this
\r
152 // TODO: do we need this with vamsas sequences ?
\r
153 // recover unsafe sequence names for a sequence set
\r
154 boolean allfound = true;
\r
155 for (int i = 0; i < sequences.length; i++) {
\r
156 if (map.containsKey(sequences[i].getName())) {
\r
157 String unsafename = (String) map.get(sequences[i].getName());
\r
158 sequences[i].setName(unsafename);
\r