/* * This file is part of the Vamsas Client version 0.2. * Copyright 2010 by Jim Procter, Iain Milne, Pierre Marguerite, * Andrew Waterhouse and Dominik Lindner. * * Earlier versions have also been incorporated into Jalview version 2.4 * since 2008, and TOPALi version 2 since 2007. * * The Vamsas Client is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The Vamsas Client is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the Vamsas Client. If not, see . */ package uk.ac.vamsas.objects.utils; import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.Hashtable; import java.util.StringTokenizer; import java.util.Vector; import java.util.regex.Pattern; import uk.ac.vamsas.objects.core.*; /** * @author jimp * * TODO To change the template for this generated type comment go to * Window - Preferences - Java - Code Style - Code Templates */ public class SeqAln extends uk.ac.vamsas.objects.core.Alignment { public static Sequence[] ReadClustalFile(InputStream os) throws Exception { System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this // method Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE); String gapchars = ""; char gapchar = '-'; int i = 0; boolean flag = false; Vector headers = new Vector(); Hashtable seqhash = new Hashtable(); Sequence[] seqs = null; int noSeqs = 0; String line; try { BufferedReader ins = new BufferedReader(new InputStreamReader(os)); while ((line = ins.readLine()) != null) { if (line.indexOf(" ") != 0) { java.util.StringTokenizer str = new StringTokenizer(line, " "); String id = ""; if (str.hasMoreTokens()) { id = str.nextToken(); if (id.equals("CLUSTAL")) { flag = true; } else { if (flag) { StringBuffer tempseq; if (seqhash.containsKey(id)) { tempseq = (StringBuffer) seqhash.get(id); } else { tempseq = new StringBuffer(); seqhash.put(id, tempseq); } if (!(headers.contains(id))) { headers.addElement(id); } tempseq.append(str.nextToken()); } } } } } } catch (IOException e) { throw (new Exception("Exception parsing clustal file ", e)); } if (flag) { noSeqs = headers.size(); // Add sequences to the hash seqs = new Sequence[headers.size()]; for (i = 0; i < headers.size(); i++) { if (seqhash.get(headers.elementAt(i)) != null) { // TODO: develop automatic dictionary typing for sequences Sequence newSeq = Seq.newSequence(headers.elementAt(i).toString(), seqhash.get(headers.elementAt(i).toString()).toString(), SymbolDictionary.STANDARD_AA, 0, 0); seqs[i] = newSeq; } else { throw (new Exception("Bizarreness! Can't find sequence for " + headers.elementAt(i))); } } } return seqs; } public static void WriteClustalWAlignment(java.io.OutputStream os, Alignment seqAl) throws IOException { System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this // method AlignmentSequence[] s = seqAl.getAlignmentSequence(); java.io.BufferedWriter out = new BufferedWriter( new java.io.OutputStreamWriter(os)); out.write("CLUSTAL\n\n"); int max = 0; int maxid = 0; int i = 0; while (i < s.length && s[i] != null) { String tmp = s[i].getId(); if (s[i].getSequence().length() > max) { max = s[i].getSequence().length(); } if (tmp.length() > maxid) { maxid = tmp.length(); } i++; } if (maxid < 15) { maxid = 15; } maxid++; int len = 60; int nochunks = max / len + 1; for (i = 0; i < nochunks; i++) { int j = 0; while (j < s.length && s[j] != null) { out.write(new Format("%-" + maxid + "s").form(s[j].getId() + " ")); int start = i * len; int end = start + len; if (end < s[j].getSequence().length() && start < s[j].getSequence().length()) { out.write(s[j].getSequence().substring(start, end) + "\n"); } else { if (start < s[j].getSequence().length()) { out.write(s[j].getSequence().substring(start) + "\n"); } } j++; } out.write("\n"); } } /** * manufacture an alignment/dataset from an array of sequences * * @param origin * @param seqs * @return * @throws Exception */ public static Alignment make_Alignment(Entry origin, Sequence[] seqs) throws Exception { System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this // method Alignment al = new Alignment(); al.setProvenance(ProvenanceStuff.newProvenance(origin)); Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE); boolean gapsset = false; char gapchar = '-'; int seqLength = 0; for (int i = 0, nseq = seqs.length; i < nseq; i++) { String seq = seqs[i].getSequence(); String gaps = nonGap.matcher(seq).replaceAll(""); if (seqLength == 0) { seqLength = seq.length(); } else if (seqLength != seq.length()) throw (new Exception(i + "th Sequence (>" + seqs[i].getId() + ") is not aligned.\n"));// TODO: move this to assertions part of // Alignment // common check for any sequence... if (gaps != null && gaps.length() > 0) { if (!gapsset) gapchar = gaps.charAt(0); for (int c = 0, gc = gaps.length(); c < gc; c++) { if (gapchar != gaps.charAt(c)) { throw (new IOException("Inconsistent gap characters in sequence " + i + ": '" + seq + "'")); } } } AlignmentSequence sq = new AlignmentSequence(); // TODO: use as basis of default AlignSequence(Sequence) constructor. sq.setSequence(seq); sq.setName(seqs[i].getId()); sq.setRefid(seqs[i].getVorbaId()); sq.setStart(seqs[i].getStart()); sq.setEnd(seqs[i].getEnd()); al.addAlignmentSequence(sq); } al.setGapChar(String.valueOf(gapchar)); return al; } public static Alignment read_FastaAlignment(InputStream os, Entry entry) throws Exception { Sequence[] seqs; System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this // method try { seqs = SeqSet.read_SeqFasta(os); if (seqs == null) throw (new Exception("Empty alignment stream!\n")); } catch (Exception e) { throw new Exception("Invalid fasta alignment\n", e); } return make_Alignment(entry, seqs); } public static Alignment read_ClustalAlignment(InputStream os, Entry entry) throws Exception { Sequence[] seqs; try { seqs = SeqAln.ReadClustalFile(os); if (seqs == null) throw (new Exception("Empty alignment stream!\n")); } catch (Exception e) { throw new Exception("Invalid fasta alignment\n", e); } System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this // method return make_Alignment(entry, seqs); } }