2 * Created on 17-May-2005
4 * TODO To change the template for this generated file go to
5 * Window - Preferences - Java - Code Style - Code Templates
7 package org.vamsas.objects.utils;
9 import java.io.BufferedReader;
10 import java.io.BufferedWriter;
11 import java.io.IOException;
12 import java.io.InputStream;
13 import java.io.InputStreamReader;
14 import java.io.OutputStream;
15 import java.io.OutputStreamWriter;
16 import java.util.Hashtable;
17 import java.util.Vector;
18 import java.util.regex.Pattern;
20 import vamsas.objects.simple.Sequence;
25 * TODO To change the template for this generated type comment go to
26 * Window - Preferences - Java - Code Style - Code Templates
28 public class SeqSet extends vamsas.objects.simple.SequenceSet {
30 public static void write_Fasta(OutputStream os, Sequence[] seqs) throws IOException {
31 write_Fasta(os, seqs, 80);
34 public static void write_Fasta(OutputStream os, Sequence[] seqs, boolean width80) throws IOException {
35 write_Fasta(os, seqs, (width80) ? 80 : 0);
38 public static void write_Fasta(OutputStream os, Sequence[] seqs, int width) throws IOException {
39 int i, nseq = seqs.length;
40 BufferedWriter fasta_out = new BufferedWriter(new OutputStreamWriter(os));
41 for (i=0; i<nseq; i++) {
42 fasta_out.write(">"+seqs[i].getId()+"\n");
44 fasta_out.write(seqs[i].getSeq()+"\n");
46 String tempseq = seqs[i].getSeq();
47 int j=0, k=tempseq.length();
51 fasta_out.write(tempseq, j, width);
53 fasta_out.write(tempseq, j, d);
55 fasta_out.write("\n");
63 * TODO: introduce a dictionary parameter for qualified sequence symbols
64 * Reads a sequence set from a stream - will only read prescribed amino acid
70 public static Sequence[] read_SeqFasta(InputStream os) throws IOException {
71 Vector seqs = new Vector();
73 BufferedReader infasta = new BufferedReader(new InputStreamReader(os));
75 /// TODO: decide on where this routine should live... current best guess is vamsas.objects.io
78 Pattern aaMatch = Pattern.compile("[ARNDCQEGHILKMFPSTUWYV]", Pattern.CASE_INSENSITIVE);
79 String sname = "", seqstr=null;
81 line = infasta.readLine();
82 if (line==null || line.startsWith(">")) {
84 seqs.add((Object) new Sequence(sname.substring(1), seqstr));
85 sname = line; // remove >
88 String subseq = Pattern.compile("//s+").matcher(line).replaceAll("");
94 // TODO:POSS: should really return a sequence if there's only one in the file.
95 Sequence[] seqset = new Sequence[nseq];
96 for (int i=0; i<nseq; i++) {
97 seqset[i] = (Sequence) seqs.elementAt(i);
105 public static Hashtable uniquify(Sequence[] sequences) {
106 // Generate a safely named sequence set and a hash to recover the sequence names
107 Hashtable map = new Hashtable();
108 for (int i = 0; i < sequences.length; i++) {
109 String safename = new String("Sequence" + i);
110 map.put(safename, sequences[i].getId());
111 sequences[i].setId(safename);
116 public static boolean deuniquify(Hashtable map, Sequence[] sequences) {
117 // recover unsafe sequence names for a sequence set
118 boolean allfound = true;
119 for (int i = 0; i < sequences.length; i++) {
120 if (map.containsKey(sequences[i].getId())) {
121 String unsafename = (String) map.get(sequences[i].getId());
122 sequences[i].setId(unsafename);