import java.util.Vector;
import java.util.regex.Pattern;
-import vamsas.objects.simple.Alignment;
-import vamsas.objects.simple.Sequence;
-import vamsas.objects.simple.SequenceSet;
+import org.vamsas.objects.core.*;
/**
* @author jimp
- *
- * TODO To change the template for this generated type comment go to
- * Window - Preferences - Java - Code Style - Code Templates
+ *
+ * TODO To change the template for this generated type comment go to Window -
+ * Preferences - Java - Code Style - Code Templates
*/
-public class SeqAln extends vamsas.objects.simple.Alignment {
-
- public static Sequence[] ReadClustalFile(InputStream os) throws Exception {
-
- Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);
- String gapchars = "";
- char gapchar='-';
-
- int i = 0;
- boolean flag = false;
-
- Vector headers = new Vector();
- Hashtable seqhash = new Hashtable();
- Sequence[] seqs=null;
- int noSeqs = 0;
- String line;
-
- try {
- BufferedReader ins = new BufferedReader(new InputStreamReader(os));
- while ((line = ins.readLine()) != null) {
- if (line.indexOf(" ") != 0) {
- java.util.StringTokenizer str = new StringTokenizer(line," ");
- String id = "";
-
- if (str.hasMoreTokens()) {
- id = str.nextToken();
- if (id.equals("CLUSTAL")) {
- flag = true;
- } else {
- if (flag) {
- StringBuffer tempseq;
- if (seqhash.containsKey(id)) {
- tempseq = (StringBuffer)seqhash.get(id);
- } else {
- tempseq = new StringBuffer();
- seqhash.put(id,tempseq);
- }
-
- if (!(headers.contains(id))) {
- headers.addElement(id);
- }
-
- tempseq.append(str.nextToken());
- }
- }
- }
- }
- }
-
- } catch (IOException e) {
- throw(new Exception("Exception parsing clustal file ",e));
- }
-
- if (flag) {
- noSeqs = headers.size();
-
- //Add sequences to the hash
- seqs = new Sequence[headers.size()];
- for (i = 0; i < headers.size(); i++ ) {
- if ( seqhash.get(headers.elementAt(i)) != null) {
-
- Sequence newSeq = new Sequence(headers.elementAt(i).toString(),
- seqhash.get(headers.elementAt(i).toString()).toString());
-
- seqs[i]=newSeq;
-
- } else {
- throw(new Exception("Bizarreness! Can't find sequence for " + headers.elementAt(i)));
- }
- }
- }
- return seqs;
- }
-
- public static void WriteClustalWAlignment(java.io.OutputStream os, Alignment seqAl) throws IOException {
- Sequence[] s = seqAl.getSeqs().getSeqs();
-
- java.io.BufferedWriter out = new BufferedWriter(new java.io.OutputStreamWriter(os));
-
- out.write("CLUSTAL\n\n");
-
- int max = 0;
- int maxid = 0;
-
- int i = 0;
-
- while (i < s.length && s[i] != null) {
- String tmp = s[i].getId();
-
- if (s[i].getSeq().length() > max) {
- max = s[i].getSeq().length();
- }
- if (tmp.length() > maxid) {
- maxid = tmp.length();
- }
- i++;
- }
-
- if (maxid < 15) {
- maxid = 15;
- }
- maxid++;
- int len = 60;
- int nochunks = max / len + 1;
-
- for (i = 0; i < nochunks; i++) {
- int j = 0;
- while ( j < s.length && s[j] != null) {
- out.write(new Format("%-" + maxid + "s").form(s[j].getId()+" "));
- int start = i*len;
- int end = start + len;
-
- if (end < s[j].getSeq().length() && start < s[j].getSeq().length() ) {
- out.write(s[j].getSeq().substring(start,end) + "\n");
- } else {
- if (start < s[j].getSeq().length()) {
- out.write(s[j].getSeq().substring(start) + "\n");
- }
- }
- j++;
- }
- out.write("\n");
-
- }
- }
-
-
- public static Alignment make_Alignment(vamsas.objects.simple.Action origin, Sequence[] seqs, String[] source) throws Exception {
- Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);
- boolean gapsset = false;
- char gapchar='-';
- int seqLength = 0;
-
-
- for (int i=0, nseq=seqs.length; i<nseq; i++) {
- String seq = seqs[i].getSeq();
- String gaps = nonGap.matcher(seq).replaceAll("");
- if (seqLength==0) {
- seqLength=seq.length();
- } else
- if (seqLength!=seq.length())
- throw(new Exception(i+"th Sequence (>"+seqs[i].getId()+") is not aligned.\n"));//TODO: move this to assertions part of Alignment
-
- // common check for any sequence...
- if (gaps!=null && gaps.length()>0) {
- if (!gapsset)
- gapchar = gaps.charAt(0);
- for (int c=0, gc=gaps.length(); c<gc; c++) {
- if (gapchar!=gaps.charAt(c)) {
- throw(new IOException("Inconsistent gap characters in sequence "+i+": '"+seq+"'"));
- }
- }
- }
- }
-
- return new Alignment(origin, new SequenceSet(seqs), source, new String(""+gapchar));
- }
-
- public static Alignment read_FastaAlignment(InputStream os, String[] source) throws Exception {
- Sequence[] seqs;
- try {
- seqs = SeqSet.read_SeqFasta(os);
- if (seqs==null)
- throw(new Exception("Empty alignment stream!\n"));
- } catch (Exception e) {
- throw new Exception("Invalid fasta alignment\n",e);
- }
-
- return make_Alignment(new vamsas.objects.simple.Action(source[0]), seqs, source);
- }
- public static Alignment read_ClustalAlignment(InputStream os, String[] source) throws Exception {
- Sequence[] seqs;
- try {
- seqs = SeqAln.ReadClustalFile(os);
- if (seqs==null)
- throw(new Exception("Empty alignment stream!\n"));
- } catch (Exception e) {
- throw new Exception("Invalid fasta alignment\n",e);
- }
-
- return make_Alignment(new vamsas.objects.simple.Action(source[0]), seqs, source);
- }
+public class SeqAln extends org.vamsas.objects.core.Alignment {
+
+ public static Sequence[] ReadClustalFile(InputStream os) throws Exception {
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+ Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);
+ String gapchars = "";
+ char gapchar = '-';
+
+ int i = 0;
+ boolean flag = false;
+
+ Vector headers = new Vector();
+ Hashtable seqhash = new Hashtable();
+ Sequence[] seqs = null;
+ int noSeqs = 0;
+ String line;
+
+ try {
+ BufferedReader ins = new BufferedReader(new InputStreamReader(os));
+ while ((line = ins.readLine()) != null) {
+ if (line.indexOf(" ") != 0) {
+ java.util.StringTokenizer str = new StringTokenizer(line, " ");
+ String id = "";
+
+ if (str.hasMoreTokens()) {
+ id = str.nextToken();
+ if (id.equals("CLUSTAL")) {
+ flag = true;
+ } else {
+ if (flag) {
+ StringBuffer tempseq;
+ if (seqhash.containsKey(id)) {
+ tempseq = (StringBuffer) seqhash.get(id);
+ } else {
+ tempseq = new StringBuffer();
+ seqhash.put(id, tempseq);
+ }
+
+ if (!(headers.contains(id))) {
+ headers.addElement(id);
+ }
+
+ tempseq.append(str.nextToken());
+ }
+ }
+ }
+ }
+ }
+
+ } catch (IOException e) {
+ throw (new Exception("Exception parsing clustal file ", e));
+ }
+
+ if (flag) {
+ noSeqs = headers.size();
+
+ // Add sequences to the hash
+ seqs = new Sequence[headers.size()];
+ for (i = 0; i < headers.size(); i++) {
+ if (seqhash.get(headers.elementAt(i)) != null) {
+ // TODO: develop automatic dictionary typing for sequences
+ Sequence newSeq = Seq.newSequence(headers.elementAt(i).toString(),
+ seqhash.get(headers.elementAt(i).toString()).toString(),
+ SymbolDictionary.STANDARD_AA,0,0);
+
+ seqs[i] = newSeq;
+
+ } else {
+ throw (new Exception("Bizarreness! Can't find sequence for "
+ + headers.elementAt(i)));
+ }
+ }
+ }
+ return seqs;
+ }
+
+ public static void WriteClustalWAlignment(java.io.OutputStream os,
+ Alignment seqAl) throws IOException {
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+ AlignmentSequence[] s = seqAl.getAlignmentSequence();
+
+ java.io.BufferedWriter out = new BufferedWriter(
+ new java.io.OutputStreamWriter(os));
+
+ out.write("CLUSTAL\n\n");
+
+ int max = 0;
+ int maxid = 0;
+
+ int i = 0;
+
+ while (i < s.length && s[i] != null) {
+ String tmp = s[i].getId();
+
+ if (s[i].getSequence().length() > max) {
+ max = s[i].getSequence().length();
+ }
+ if (tmp.length() > maxid) {
+ maxid = tmp.length();
+ }
+ i++;
+ }
+
+ if (maxid < 15) {
+ maxid = 15;
+ }
+ maxid++;
+ int len = 60;
+ int nochunks = max / len + 1;
+
+ for (i = 0; i < nochunks; i++) {
+ int j = 0;
+ while (j < s.length && s[j] != null) {
+ out.write(new Format("%-" + maxid + "s").form(s[j].getId() + " "));
+ int start = i * len;
+ int end = start + len;
+
+ if (end < s[j].getSequence().length() && start < s[j].getSequence().length()) {
+ out.write(s[j].getSequence().substring(start, end) + "\n");
+ } else {
+ if (start < s[j].getSequence().length()) {
+ out.write(s[j].getSequence().substring(start) + "\n");
+ }
+ }
+ j++;
+ }
+ out.write("\n");
+
+ }
+ }
+ /**
+ * manufacture an alignment/dataset from an array of sequences
+ * @param origin
+ * @param seqs
+ * @return
+ * @throws Exception
+ */
+ public static Alignment make_Alignment(Entry origin,
+ Sequence[] seqs) throws Exception {
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+ Alignment al = new Alignment();
+ al.setProvenance(ProvenanceStuff.newProvenance(origin));
+
+ Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);
+ boolean gapsset = false;
+ char gapchar = '-';
+ int seqLength = 0;
+
+ for (int i = 0, nseq = seqs.length; i < nseq; i++) {
+ String seq = seqs[i].getSequence();
+ String gaps = nonGap.matcher(seq).replaceAll("");
+ if (seqLength == 0) {
+ seqLength = seq.length();
+ } else if (seqLength != seq.length())
+ throw (new Exception(i + "th Sequence (>" + seqs[i].getId()
+ + ") is not aligned.\n"));// TODO: move this to assertions part of
+ // Alignment
+
+ // common check for any sequence...
+ if (gaps != null && gaps.length() > 0) {
+ if (!gapsset)
+ gapchar = gaps.charAt(0);
+ for (int c = 0, gc = gaps.length(); c < gc; c++) {
+ if (gapchar != gaps.charAt(c)) {
+ throw (new IOException("Inconsistent gap characters in sequence "
+ + i + ": '" + seq + "'"));
+ }
+ }
+ }
+ AlignmentSequence sq = new AlignmentSequence();
+ // TODO: use as basis of default AlignSequence(Sequence) constructor.
+ sq.setSequence(seq);
+ sq.setName(seqs[i].getId());
+ sq.setRefid(seqs[i].getVorbaId());
+ sq.setStart(seqs[i].getStart());
+ sq.setEnd(seqs[i].getEnd());
+ al.addAlignmentSequence(sq);
+ }
+ al.setGapChar(String.valueOf(gapchar));
+ return al;
+ }
+
+ public static Alignment read_FastaAlignment(InputStream os, Entry entry)
+ throws Exception {
+ Sequence[] seqs;
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+ try {
+ seqs = SeqSet.read_SeqFasta(os);
+ if (seqs == null)
+ throw (new Exception("Empty alignment stream!\n"));
+ } catch (Exception e) {
+ throw new Exception("Invalid fasta alignment\n", e);
+ }
+
+ return make_Alignment(entry, seqs);
+ }
+
+ public static Alignment read_ClustalAlignment(InputStream os, Entry entry)
+ throws Exception {
+ Sequence[] seqs;
+ try {
+ seqs = SeqAln.ReadClustalFile(os);
+ if (seqs == null)
+ throw (new Exception("Empty alignment stream!\n"));
+ } catch (Exception e) {
+ throw new Exception("Invalid fasta alignment\n", e);
+ }
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+ return make_Alignment(entry, seqs);
+ }
}