refactored and incorporated basic io routines from vamsas web services stack.
[vamsas.git] / src / org / vamsas / objects / utils / SeqAln.java
index dfe5e5b..5e351f9 100644 (file)
@@ -17,196 +17,222 @@ import java.util.StringTokenizer;
 import java.util.Vector;
 import java.util.regex.Pattern;
 
-import vamsas.objects.simple.Alignment;
-import vamsas.objects.simple.Sequence;
-import vamsas.objects.simple.SequenceSet;
+import org.vamsas.objects.core.*;
 
 /**
  * @author jimp
- *
- * TODO To change the template for this generated type comment go to
- * Window - Preferences - Java - Code Style - Code Templates
+ * 
+ * TODO To change the template for this generated type comment go to Window -
+ * Preferences - Java - Code Style - Code Templates
  */
-public class SeqAln extends vamsas.objects.simple.Alignment {
-       
-       public static Sequence[] ReadClustalFile(InputStream os) throws Exception {
-
-               Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);        
-               String gapchars = "";
-               char gapchar='-';
-       
-               int     i    = 0;
-           boolean flag = false;
-           
-           Vector    headers = new Vector();
-           Hashtable seqhash = new Hashtable();
-           Sequence[] seqs=null;
-           int noSeqs = 0;
-           String line;
-
-           try {
-               BufferedReader ins = new BufferedReader(new InputStreamReader(os));
-               while ((line = ins.readLine()) != null) {
-                       if (line.indexOf(" ") != 0) {
-                               java.util.StringTokenizer str = new StringTokenizer(line," ");
-                               String id = "";
-                               
-                               if (str.hasMoreTokens()) {
-                                       id = str.nextToken();
-                                       if (id.equals("CLUSTAL")) {
-                                               flag = true;
-                                       } else {
-                                               if (flag) {
-                                                       StringBuffer tempseq;
-                                                       if (seqhash.containsKey(id)) {
-                                                               tempseq = (StringBuffer)seqhash.get(id);
-                                                       } else {
-                                                               tempseq = new StringBuffer();
-                                                               seqhash.put(id,tempseq);
-                                                       }
-                                                       
-                                                       if (!(headers.contains(id))) {
-                                                               headers.addElement(id);
-                                                       }
-                                                       
-                                                       tempseq.append(str.nextToken());
-                                               }
-                                       }
-                               }
-                       }
-               }
-               
-           } catch (IOException e) {
-               throw(new Exception("Exception parsing clustal file ",e));
-           }
-           
-           if (flag) {
-               noSeqs = headers.size();
-               
-               //Add sequences to the hash
-               seqs = new Sequence[headers.size()];
-             for (i = 0; i < headers.size(); i++ ) {
-               if ( seqhash.get(headers.elementAt(i)) != null) {
-                       
-                       Sequence newSeq = new Sequence(headers.elementAt(i).toString(),
-                                                seqhash.get(headers.elementAt(i).toString()).toString());
-                       
-                       seqs[i]=newSeq;
-
-               } else {
-                 throw(new Exception("Bizarreness! Can't find sequence for " + headers.elementAt(i)));
-               }
-             }
-           }
-           return seqs;
-         }
-
-         public static void WriteClustalWAlignment(java.io.OutputStream os, Alignment seqAl) throws IOException {
-               Sequence[] s = seqAl.getSeqs().getSeqs();
-               
-               java.io.BufferedWriter out = new BufferedWriter(new java.io.OutputStreamWriter(os));
-               
-               out.write("CLUSTAL\n\n");
-
-           int max = 0;
-           int maxid = 0;
-
-           int i = 0;
-
-           while (i < s.length && s[i] != null) {
-             String tmp = s[i].getId();
-
-             if (s[i].getSeq().length() > max) {
-               max = s[i].getSeq().length();
-             }
-             if (tmp.length() > maxid) {
-               maxid = tmp.length();
-             }
-             i++;
-           }
-
-           if (maxid < 15) {
-             maxid = 15;
-           }
-           maxid++;
-           int len = 60;
-           int nochunks =  max / len + 1;
-
-           for (i = 0; i < nochunks; i++) {
-             int j = 0;
-             while ( j < s.length && s[j] != null) {
-               out.write(new Format("%-" + maxid + "s").form(s[j].getId()+" "));
-               int start = i*len;
-               int end = start + len;
-
-               if (end < s[j].getSeq().length() && start < s[j].getSeq().length() ) {
-                 out.write(s[j].getSeq().substring(start,end) + "\n");
-               } else {
-                 if (start < s[j].getSeq().length()) {
-                   out.write(s[j].getSeq().substring(start) + "\n");
-                 }
-               }
-               j++;
-             }
-             out.write("\n");
-
-           }
-        }
-
-
-         public static Alignment make_Alignment(vamsas.objects.simple.Action origin, Sequence[] seqs, String[] source) throws Exception {
-               Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);        
-               boolean gapsset = false;
-               char gapchar='-';
-               int seqLength = 0;
-               
-               
-               for (int i=0, nseq=seqs.length; i<nseq; i++) {
-                       String seq = seqs[i].getSeq();
-                       String gaps = nonGap.matcher(seq).replaceAll("");
-                       if (seqLength==0) {
-                               seqLength=seq.length();
-                       } else 
-                               if (seqLength!=seq.length())
-                                       throw(new Exception(i+"th Sequence (>"+seqs[i].getId()+") is not aligned.\n"));//TODO: move this to assertions part of Alignment
-                       
-                       // common check for any sequence...
-                       if (gaps!=null && gaps.length()>0) {
-                               if (!gapsset)
-                                       gapchar = gaps.charAt(0);
-                               for (int c=0, gc=gaps.length(); c<gc; c++) {
-                                       if (gapchar!=gaps.charAt(c)) {
-                                               throw(new IOException("Inconsistent gap characters in sequence "+i+": '"+seq+"'"));
-                                       }
-                               }
-                       }
-               }
-
-               return new Alignment(origin, new SequenceSet(seqs), source, new String(""+gapchar));
-         }
-         
-         public static Alignment read_FastaAlignment(InputStream os, String[] source) throws Exception {
-               Sequence[] seqs;
-               try {
-                       seqs = SeqSet.read_SeqFasta(os);
-                       if (seqs==null)
-                               throw(new Exception("Empty alignment stream!\n"));
-               } catch (Exception e) {
-                       throw new Exception("Invalid fasta alignment\n",e);
-               }
-               
-               return make_Alignment(new vamsas.objects.simple.Action(source[0]), seqs, source);
-         }
-         public static Alignment read_ClustalAlignment(InputStream os, String[] source) throws Exception {
-               Sequence[] seqs;
-               try {
-                       seqs = SeqAln.ReadClustalFile(os);
-                       if (seqs==null)
-                               throw(new Exception("Empty alignment stream!\n"));
-               } catch (Exception e) {
-                       throw new Exception("Invalid fasta alignment\n",e);
-               }
-               
-               return make_Alignment(new vamsas.objects.simple.Action(source[0]), seqs, source);
-         }
+public class SeqAln extends org.vamsas.objects.core.Alignment {
+
+  public static Sequence[] ReadClustalFile(InputStream os) throws Exception {
+    System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+    Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);
+    String gapchars = "";
+    char gapchar = '-';
+
+    int i = 0;
+    boolean flag = false;
+
+    Vector headers = new Vector();
+    Hashtable seqhash = new Hashtable();
+    Sequence[] seqs = null;
+    int noSeqs = 0;
+    String line;
+
+    try {
+      BufferedReader ins = new BufferedReader(new InputStreamReader(os));
+      while ((line = ins.readLine()) != null) {
+        if (line.indexOf(" ") != 0) {
+          java.util.StringTokenizer str = new StringTokenizer(line, " ");
+          String id = "";
+
+          if (str.hasMoreTokens()) {
+            id = str.nextToken();
+            if (id.equals("CLUSTAL")) {
+              flag = true;
+            } else {
+              if (flag) {
+                StringBuffer tempseq;
+                if (seqhash.containsKey(id)) {
+                  tempseq = (StringBuffer) seqhash.get(id);
+                } else {
+                  tempseq = new StringBuffer();
+                  seqhash.put(id, tempseq);
+                }
+
+                if (!(headers.contains(id))) {
+                  headers.addElement(id);
+                }
+
+                tempseq.append(str.nextToken());
+              }
+            }
+          }
+        }
+      }
+
+    } catch (IOException e) {
+      throw (new Exception("Exception parsing clustal file ", e));
+    }
+
+    if (flag) {
+      noSeqs = headers.size();
+
+      // Add sequences to the hash
+      seqs = new Sequence[headers.size()];
+      for (i = 0; i < headers.size(); i++) {
+        if (seqhash.get(headers.elementAt(i)) != null) {
+          // TODO: develop automatic dictionary typing for sequences
+          Sequence newSeq = Seq.newSequence(headers.elementAt(i).toString(),
+              seqhash.get(headers.elementAt(i).toString()).toString(),
+              SymbolDictionary.STANDARD_AA,0,0);
+
+          seqs[i] = newSeq;
+
+        } else {
+          throw (new Exception("Bizarreness! Can't find sequence for "
+              + headers.elementAt(i)));
+        }
+      }
+    }
+    return seqs;
+  }
+
+  public static void WriteClustalWAlignment(java.io.OutputStream os,
+      Alignment seqAl) throws IOException {
+    System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+    AlignmentSequence[] s = seqAl.getAlignmentSequence();
+
+    java.io.BufferedWriter out = new BufferedWriter(
+        new java.io.OutputStreamWriter(os));
+
+    out.write("CLUSTAL\n\n");
+
+    int max = 0;
+    int maxid = 0;
+
+    int i = 0;
+
+    while (i < s.length && s[i] != null) {
+      String tmp = s[i].getId();
+
+      if (s[i].getSequence().length() > max) {
+        max = s[i].getSequence().length();
+      }
+      if (tmp.length() > maxid) {
+        maxid = tmp.length();
+      }
+      i++;
+    }
+
+    if (maxid < 15) {
+      maxid = 15;
+    }
+    maxid++;
+    int len = 60;
+    int nochunks = max / len + 1;
+
+    for (i = 0; i < nochunks; i++) {
+      int j = 0;
+      while (j < s.length && s[j] != null) {
+        out.write(new Format("%-" + maxid + "s").form(s[j].getId() + " "));
+        int start = i * len;
+        int end = start + len;
+
+        if (end < s[j].getSequence().length() && start < s[j].getSequence().length()) {
+          out.write(s[j].getSequence().substring(start, end) + "\n");
+        } else {
+          if (start < s[j].getSequence().length()) {
+            out.write(s[j].getSequence().substring(start) + "\n");
+          }
+        }
+        j++;
+      }
+      out.write("\n");
+
+    }
+  }
+  /**
+   * manufacture an alignment/dataset from an array of sequences
+   * @param origin
+   * @param seqs
+   * @return
+   * @throws Exception
+   */
+  public static Alignment make_Alignment(Entry origin,
+      Sequence[] seqs) throws Exception {
+    System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+    Alignment al = new Alignment();
+    al.setProvenance(ProvenanceStuff.newProvenance(origin));
+    
+    Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);
+    boolean gapsset = false;
+    char gapchar = '-';
+    int seqLength = 0;
+
+    for (int i = 0, nseq = seqs.length; i < nseq; i++) {
+      String seq = seqs[i].getSequence();
+      String gaps = nonGap.matcher(seq).replaceAll("");
+      if (seqLength == 0) {
+        seqLength = seq.length();
+      } else if (seqLength != seq.length())
+        throw (new Exception(i + "th Sequence (>" + seqs[i].getId()
+            + ") is not aligned.\n"));// TODO: move this to assertions part of
+                                      // Alignment
+
+      // common check for any sequence...
+      if (gaps != null && gaps.length() > 0) {
+        if (!gapsset)
+          gapchar = gaps.charAt(0);
+        for (int c = 0, gc = gaps.length(); c < gc; c++) {
+          if (gapchar != gaps.charAt(c)) {
+            throw (new IOException("Inconsistent gap characters in sequence "
+                + i + ": '" + seq + "'"));
+          }
+        }
+      }
+      AlignmentSequence sq = new AlignmentSequence();
+      // TODO: use as basis of default AlignSequence(Sequence) constructor.
+      sq.setSequence(seq);
+      sq.setName(seqs[i].getId());
+      sq.setRefid(seqs[i].getVorbaId());
+      sq.setStart(seqs[i].getStart());
+      sq.setEnd(seqs[i].getEnd());
+      al.addAlignmentSequence(sq);
+    }
+    al.setGapChar(String.valueOf(gapchar));
+    return al;
+  }
+
+  public static Alignment read_FastaAlignment(InputStream os, Entry entry)
+      throws Exception {
+    Sequence[] seqs;
+    System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+    try {
+      seqs = SeqSet.read_SeqFasta(os);
+      if (seqs == null)
+        throw (new Exception("Empty alignment stream!\n"));
+    } catch (Exception e) {
+      throw new Exception("Invalid fasta alignment\n", e);
+    }
+    
+    return make_Alignment(entry, seqs);
+  }
+
+  public static Alignment read_ClustalAlignment(InputStream os, Entry entry)
+      throws Exception {
+    Sequence[] seqs;
+    try {
+      seqs = SeqAln.ReadClustalFile(os);
+      if (seqs == null)
+        throw (new Exception("Empty alignment stream!\n"));
+    } catch (Exception e) {
+      throw new Exception("Invalid fasta alignment\n", e);
+    }
+    System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+    return make_Alignment(entry, seqs);
+  }
 }