/* * This file is part of the Vamsas Client version 0.2. * Copyright 2010 by Jim Procter, Iain Milne, Pierre Marguerite, * Andrew Waterhouse and Dominik Lindner. * * Earlier versions have also been incorporated into Jalview version 2.4 * since 2008, and TOPALi version 2 since 2007. * * The Vamsas Client is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The Vamsas Client is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the Vamsas Client. If not, see . */ package uk.ac.vamsas.objects.utils; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.util.Hashtable; import java.util.Vector; import java.util.regex.Pattern; import uk.ac.vamsas.objects.core.*; /** * @author jimp * * TODO To change the template for this generated type comment go to * Window - Preferences - Java - Code Style - Code Templates */ public class SeqSet { public static void write_Fasta(OutputStream os, SequenceType[] seqs) throws IOException { write_Fasta(os, seqs, 80); } public static void write_Fasta(OutputStream os, SequenceType[] seqs, boolean width80) throws IOException { write_Fasta(os, seqs, (width80) ? 80 : 0); } public static void write_Fasta(OutputStream os, SequenceType[] seqs, int width) throws IOException { int i, nseq = seqs.length; BufferedWriter fasta_out = new BufferedWriter(new OutputStreamWriter(os)); System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this // method for (i = 0; i < nseq; i++) { fasta_out.write(">" + seqs[i].getName() + "\n"); if (width <= 0) { fasta_out.write(seqs[i].getSequence() + "\n"); } else { // TODO: adapt to SymbolDictionary labelwidths String tempseq = seqs[i].getSequence(); int j = 0, k = tempseq.length(); while (j < k) { int d = k - j; if (d >= width) { fasta_out.write(tempseq, j, width); } else { fasta_out.write(tempseq, j, d); } fasta_out.write("\n"); j += width; } } } fasta_out.flush(); } /** * TODO: introduce a dictionary parameter for qualified sequence symbols Reads * a sequence set from a stream - will only read prescribed amino acid * symbols. * * @param os * @return * @throws IOException */ public static Sequence[] read_SeqFasta(InputStream os) throws IOException { Vector seqs = new Vector(); int nseq = 0; BufferedReader infasta = new BufferedReader(new InputStreamReader(os)); System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this // method // TODO: decide on return type - SequenceType is a partly complete vamsas // Vobject - either for a dataset or alignment sequence // so could go in either! String line; Sequence seq = null; Pattern aaMatch = Pattern.compile("[ARNDCQEGHILKMFPSTUWYV]", Pattern.CASE_INSENSITIVE); String sname = "", seqstr = null; do { line = infasta.readLine(); if (line == null || line.startsWith(">")) { if (seqstr != null) seqs.add((Object) Seq.newSequence(sname.substring(1), seqstr, SymbolDictionary.STANDARD_AA, 0, 0)); sname = line; // remove > seqstr = ""; } else { String subseq = Pattern.compile("//s+").matcher(line).replaceAll(""); seqstr += subseq; } } while (line != null); nseq = seqs.size(); if (nseq > 0) { // TODO:POSS: should really return a sequence if there's only one in the // file. Sequence[] seqset = new Sequence[nseq]; for (int i = 0; i < nseq; i++) { seqset[i] = (Sequence) seqs.elementAt(i); } return seqset; } return null; } public static Hashtable uniquify(SequenceType[] sequences) { System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this // method // TODO: do we need this with vamsas sequences ? // Generate a safely named sequence set and a hash to recover the sequence // names Hashtable map = new Hashtable(); for (int i = 0; i < sequences.length; i++) { String safename = new String("Sequence" + i); map.put(safename, sequences[i].getName()); sequences[i].setName(safename); } return map; } public static boolean deuniquify(Hashtable map, SequenceType[] sequences) { System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this // method // TODO: do we need this with vamsas sequences ? // recover unsafe sequence names for a sequence set boolean allfound = true; for (int i = 0; i < sequences.length; i++) { if (map.containsKey(sequences[i].getName())) { String unsafename = (String) map.get(sequences[i].getName()); sequences[i].setName(unsafename); } else { allfound = false; } } return allfound; } }