import java.util.Vector;
import java.util.regex.Pattern;
-import vamsas.objects.simple.Alignment;
-import vamsas.objects.simple.Sequence;
-import vamsas.objects.simple.SequenceSet;
+import org.vamsas.objects.core.*;
/**
* @author jimp
- *
- * TODO To change the template for this generated type comment go to
- * Window - Preferences - Java - Code Style - Code Templates
+ *
+ * TODO To change the template for this generated type comment go to Window -
+ * Preferences - Java - Code Style - Code Templates
*/
-public class SeqAln extends vamsas.objects.simple.Alignment {
-
- public static Sequence[] ReadClustalFile(InputStream os) throws Exception {
-
- Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);
- String gapchars = "";
- char gapchar='-';
-
- int i = 0;
- boolean flag = false;
-
- Vector headers = new Vector();
- Hashtable seqhash = new Hashtable();
- Sequence[] seqs=null;
- int noSeqs = 0;
- String line;
-
- try {
- BufferedReader ins = new BufferedReader(new InputStreamReader(os));
- while ((line = ins.readLine()) != null) {
- if (line.indexOf(" ") != 0) {
- java.util.StringTokenizer str = new StringTokenizer(line," ");
- String id = "";
-
- if (str.hasMoreTokens()) {
- id = str.nextToken();
- if (id.equals("CLUSTAL")) {
- flag = true;
- } else {
- if (flag) {
- StringBuffer tempseq;
- if (seqhash.containsKey(id)) {
- tempseq = (StringBuffer)seqhash.get(id);
- } else {
- tempseq = new StringBuffer();
- seqhash.put(id,tempseq);
- }
-
- if (!(headers.contains(id))) {
- headers.addElement(id);
- }
-
- tempseq.append(str.nextToken());
- }
- }
- }
- }
- }
-
- } catch (IOException e) {
- throw(new Exception("Exception parsing clustal file ",e));
- }
-
- if (flag) {
- noSeqs = headers.size();
-
- //Add sequences to the hash
- seqs = new Sequence[headers.size()];
- for (i = 0; i < headers.size(); i++ ) {
- if ( seqhash.get(headers.elementAt(i)) != null) {
-
- Sequence newSeq = new Sequence(headers.elementAt(i).toString(),
- seqhash.get(headers.elementAt(i).toString()).toString());
-
- seqs[i]=newSeq;
-
- } else {
- throw(new Exception("Bizarreness! Can't find sequence for " + headers.elementAt(i)));
- }
- }
- }
- return seqs;
- }
-
- public static void WriteClustalWAlignment(java.io.OutputStream os, Alignment seqAl) throws IOException {
- Sequence[] s = seqAl.getSeqs().getSeqs();
-
- java.io.BufferedWriter out = new BufferedWriter(new java.io.OutputStreamWriter(os));
-
- out.write("CLUSTAL\n\n");
-
- int max = 0;
- int maxid = 0;
-
- int i = 0;
-
- while (i < s.length && s[i] != null) {
- String tmp = s[i].getId();
-
- if (s[i].getSeq().length() > max) {
- max = s[i].getSeq().length();
- }
- if (tmp.length() > maxid) {
- maxid = tmp.length();
- }
- i++;
- }
-
- if (maxid < 15) {
- maxid = 15;
- }
- maxid++;
- int len = 60;
- int nochunks = max / len + 1;
-
- for (i = 0; i < nochunks; i++) {
- int j = 0;
- while ( j < s.length && s[j] != null) {
- out.write(new Format("%-" + maxid + "s").form(s[j].getId()+" "));
- int start = i*len;
- int end = start + len;
-
- if (end < s[j].getSeq().length() && start < s[j].getSeq().length() ) {
- out.write(s[j].getSeq().substring(start,end) + "\n");
- } else {
- if (start < s[j].getSeq().length()) {
- out.write(s[j].getSeq().substring(start) + "\n");
- }
- }
- j++;
- }
- out.write("\n");
-
- }
- }
-
-
- public static Alignment make_Alignment(vamsas.objects.simple.Action origin, Sequence[] seqs, String[] source) throws Exception {
- Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);
- boolean gapsset = false;
- char gapchar='-';
- int seqLength = 0;
-
-
- for (int i=0, nseq=seqs.length; i<nseq; i++) {
- String seq = seqs[i].getSeq();
- String gaps = nonGap.matcher(seq).replaceAll("");
- if (seqLength==0) {
- seqLength=seq.length();
- } else
- if (seqLength!=seq.length())
- throw(new Exception(i+"th Sequence (>"+seqs[i].getId()+") is not aligned.\n"));//TODO: move this to assertions part of Alignment
-
- // common check for any sequence...
- if (gaps!=null && gaps.length()>0) {
- if (!gapsset)
- gapchar = gaps.charAt(0);
- for (int c=0, gc=gaps.length(); c<gc; c++) {
- if (gapchar!=gaps.charAt(c)) {
- throw(new IOException("Inconsistent gap characters in sequence "+i+": '"+seq+"'"));
- }
- }
- }
- }
-
- return new Alignment(origin, new SequenceSet(seqs), source, new String(""+gapchar));
- }
-
- public static Alignment read_FastaAlignment(InputStream os, String[] source) throws Exception {
- Sequence[] seqs;
- try {
- seqs = SeqSet.read_SeqFasta(os);
- if (seqs==null)
- throw(new Exception("Empty alignment stream!\n"));
- } catch (Exception e) {
- throw new Exception("Invalid fasta alignment\n",e);
- }
-
- return make_Alignment(new vamsas.objects.simple.Action(source[0]), seqs, source);
- }
- public static Alignment read_ClustalAlignment(InputStream os, String[] source) throws Exception {
- Sequence[] seqs;
- try {
- seqs = SeqAln.ReadClustalFile(os);
- if (seqs==null)
- throw(new Exception("Empty alignment stream!\n"));
- } catch (Exception e) {
- throw new Exception("Invalid fasta alignment\n",e);
- }
-
- return make_Alignment(new vamsas.objects.simple.Action(source[0]), seqs, source);
- }
+public class SeqAln extends org.vamsas.objects.core.Alignment {
+
+ public static Sequence[] ReadClustalFile(InputStream os) throws Exception {
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+ Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);
+ String gapchars = "";
+ char gapchar = '-';
+
+ int i = 0;
+ boolean flag = false;
+
+ Vector headers = new Vector();
+ Hashtable seqhash = new Hashtable();
+ Sequence[] seqs = null;
+ int noSeqs = 0;
+ String line;
+
+ try {
+ BufferedReader ins = new BufferedReader(new InputStreamReader(os));
+ while ((line = ins.readLine()) != null) {
+ if (line.indexOf(" ") != 0) {
+ java.util.StringTokenizer str = new StringTokenizer(line, " ");
+ String id = "";
+
+ if (str.hasMoreTokens()) {
+ id = str.nextToken();
+ if (id.equals("CLUSTAL")) {
+ flag = true;
+ } else {
+ if (flag) {
+ StringBuffer tempseq;
+ if (seqhash.containsKey(id)) {
+ tempseq = (StringBuffer) seqhash.get(id);
+ } else {
+ tempseq = new StringBuffer();
+ seqhash.put(id, tempseq);
+ }
+
+ if (!(headers.contains(id))) {
+ headers.addElement(id);
+ }
+
+ tempseq.append(str.nextToken());
+ }
+ }
+ }
+ }
+ }
+
+ } catch (IOException e) {
+ throw (new Exception("Exception parsing clustal file ", e));
+ }
+
+ if (flag) {
+ noSeqs = headers.size();
+
+ // Add sequences to the hash
+ seqs = new Sequence[headers.size()];
+ for (i = 0; i < headers.size(); i++) {
+ if (seqhash.get(headers.elementAt(i)) != null) {
+ // TODO: develop automatic dictionary typing for sequences
+ Sequence newSeq = Seq.newSequence(headers.elementAt(i).toString(),
+ seqhash.get(headers.elementAt(i).toString()).toString(),
+ SymbolDictionary.STANDARD_AA,0,0);
+
+ seqs[i] = newSeq;
+
+ } else {
+ throw (new Exception("Bizarreness! Can't find sequence for "
+ + headers.elementAt(i)));
+ }
+ }
+ }
+ return seqs;
+ }
+
+ public static void WriteClustalWAlignment(java.io.OutputStream os,
+ Alignment seqAl) throws IOException {
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+ AlignmentSequence[] s = seqAl.getAlignmentSequence();
+
+ java.io.BufferedWriter out = new BufferedWriter(
+ new java.io.OutputStreamWriter(os));
+
+ out.write("CLUSTAL\n\n");
+
+ int max = 0;
+ int maxid = 0;
+
+ int i = 0;
+
+ while (i < s.length && s[i] != null) {
+ String tmp = s[i].getId();
+
+ if (s[i].getSequence().length() > max) {
+ max = s[i].getSequence().length();
+ }
+ if (tmp.length() > maxid) {
+ maxid = tmp.length();
+ }
+ i++;
+ }
+
+ if (maxid < 15) {
+ maxid = 15;
+ }
+ maxid++;
+ int len = 60;
+ int nochunks = max / len + 1;
+
+ for (i = 0; i < nochunks; i++) {
+ int j = 0;
+ while (j < s.length && s[j] != null) {
+ out.write(new Format("%-" + maxid + "s").form(s[j].getId() + " "));
+ int start = i * len;
+ int end = start + len;
+
+ if (end < s[j].getSequence().length() && start < s[j].getSequence().length()) {
+ out.write(s[j].getSequence().substring(start, end) + "\n");
+ } else {
+ if (start < s[j].getSequence().length()) {
+ out.write(s[j].getSequence().substring(start) + "\n");
+ }
+ }
+ j++;
+ }
+ out.write("\n");
+
+ }
+ }
+ /**
+ * manufacture an alignment/dataset from an array of sequences
+ * @param origin
+ * @param seqs
+ * @return
+ * @throws Exception
+ */
+ public static Alignment make_Alignment(Entry origin,
+ Sequence[] seqs) throws Exception {
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+ Alignment al = new Alignment();
+ al.setProvenance(ProvenanceStuff.newProvenance(origin));
+
+ Pattern nonGap = Pattern.compile("[A-Z*0-9]", Pattern.CASE_INSENSITIVE);
+ boolean gapsset = false;
+ char gapchar = '-';
+ int seqLength = 0;
+
+ for (int i = 0, nseq = seqs.length; i < nseq; i++) {
+ String seq = seqs[i].getSequence();
+ String gaps = nonGap.matcher(seq).replaceAll("");
+ if (seqLength == 0) {
+ seqLength = seq.length();
+ } else if (seqLength != seq.length())
+ throw (new Exception(i + "th Sequence (>" + seqs[i].getId()
+ + ") is not aligned.\n"));// TODO: move this to assertions part of
+ // Alignment
+
+ // common check for any sequence...
+ if (gaps != null && gaps.length() > 0) {
+ if (!gapsset)
+ gapchar = gaps.charAt(0);
+ for (int c = 0, gc = gaps.length(); c < gc; c++) {
+ if (gapchar != gaps.charAt(c)) {
+ throw (new IOException("Inconsistent gap characters in sequence "
+ + i + ": '" + seq + "'"));
+ }
+ }
+ }
+ AlignmentSequence sq = new AlignmentSequence();
+ // TODO: use as basis of default AlignSequence(Sequence) constructor.
+ sq.setSequence(seq);
+ sq.setName(seqs[i].getId());
+ sq.setRefid(seqs[i].getVorbaId());
+ sq.setStart(seqs[i].getStart());
+ sq.setEnd(seqs[i].getEnd());
+ al.addAlignmentSequence(sq);
+ }
+ al.setGapChar(String.valueOf(gapchar));
+ return al;
+ }
+
+ public static Alignment read_FastaAlignment(InputStream os, Entry entry)
+ throws Exception {
+ Sequence[] seqs;
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+ try {
+ seqs = SeqSet.read_SeqFasta(os);
+ if (seqs == null)
+ throw (new Exception("Empty alignment stream!\n"));
+ } catch (Exception e) {
+ throw new Exception("Invalid fasta alignment\n", e);
+ }
+
+ return make_Alignment(entry, seqs);
+ }
+
+ public static Alignment read_ClustalAlignment(InputStream os, Entry entry)
+ throws Exception {
+ Sequence[] seqs;
+ try {
+ seqs = SeqAln.ReadClustalFile(os);
+ if (seqs == null)
+ throw (new Exception("Empty alignment stream!\n"));
+ } catch (Exception e) {
+ throw new Exception("Invalid fasta alignment\n", e);
+ }
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+ return make_Alignment(entry, seqs);
+ }
}
/*
* Created on 17-May-2005
+ * Slurped into VamsasClient object set on 12th Jan 2006
*
* TODO To change the template for this generated file go to
* Window - Preferences - Java - Code Style - Code Templates
import java.util.Vector;
import java.util.regex.Pattern;
-import vamsas.objects.simple.Sequence;
+import org.vamsas.objects.core.*;
/**
* @author jimp
* TODO To change the template for this generated type comment go to
* Window - Preferences - Java - Code Style - Code Templates
*/
-public class SeqSet extends vamsas.objects.simple.SequenceSet {
+public class SeqSet {
- public static void write_Fasta(OutputStream os, Sequence[] seqs) throws IOException {
+ public static void write_Fasta(OutputStream os, SequenceType[] seqs) throws IOException {
write_Fasta(os, seqs, 80);
}
- public static void write_Fasta(OutputStream os, Sequence[] seqs, boolean width80) throws IOException {
+ public static void write_Fasta(OutputStream os, SequenceType[] seqs, boolean width80) throws IOException {
write_Fasta(os, seqs, (width80) ? 80 : 0);
}
- public static void write_Fasta(OutputStream os, Sequence[] seqs, int width) throws IOException {
+ public static void write_Fasta(OutputStream os, SequenceType[] seqs, int width) throws IOException {
int i, nseq = seqs.length;
BufferedWriter fasta_out = new BufferedWriter(new OutputStreamWriter(os));
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
for (i=0; i<nseq; i++) {
- fasta_out.write(">"+seqs[i].getId()+"\n");
+ fasta_out.write(">"+seqs[i].getName()+"\n");
if (width<=0) {
- fasta_out.write(seqs[i].getSeq()+"\n");
+ fasta_out.write(seqs[i].getSequence()+"\n");
} else {
- String tempseq = seqs[i].getSeq();
+ // TODO: adapt to SymbolDictionary labelwidths
+ String tempseq = seqs[i].getSequence();
int j=0, k=tempseq.length();
while (j<k) {
int d = k-j;
Vector seqs = new Vector();
int nseq = 0;
BufferedReader infasta = new BufferedReader(new InputStreamReader(os));
-
- /// TODO: decide on where this routine should live... current best guess is vamsas.objects.io
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+ // TODO: decide on return type - SequenceType is a partly complete vamsas object - either for a dataset or alignment sequence
+ // so could go in either!
String line;
Sequence seq = null;
Pattern aaMatch = Pattern.compile("[ARNDCQEGHILKMFPSTUWYV]", Pattern.CASE_INSENSITIVE);
line = infasta.readLine();
if (line==null || line.startsWith(">")) {
if (seqstr!=null)
- seqs.add((Object) new Sequence(sname.substring(1), seqstr));
+ seqs.add((Object) Seq.newSequence(sname.substring(1), seqstr, SymbolDictionary.STANDARD_AA, 0,0));
sname = line; // remove >
seqstr="";
} else {
return null;
}
- public static Hashtable uniquify(Sequence[] sequences) {
+ public static Hashtable uniquify(SequenceType[] sequences) {
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+ // TODO: do we need this with vamsas sequences ?
// Generate a safely named sequence set and a hash to recover the sequence names
Hashtable map = new Hashtable();
for (int i = 0; i < sequences.length; i++) {
String safename = new String("Sequence" + i);
- map.put(safename, sequences[i].getId());
- sequences[i].setId(safename);
+ map.put(safename, sequences[i].getName());
+ sequences[i].setName(safename);
}
return map;
}
- public static boolean deuniquify(Hashtable map, Sequence[] sequences) {
+ public static boolean deuniquify(Hashtable map, SequenceType[] sequences) {
+ System.err.println("NOT FULLY IMPLEMENTED!"); // TODO: Finish adapting this method
+ // TODO: do we need this with vamsas sequences ?
// recover unsafe sequence names for a sequence set
boolean allfound = true;
for (int i = 0; i < sequences.length; i++) {
- if (map.containsKey(sequences[i].getId())) {
- String unsafename = (String) map.get(sequences[i].getId());
- sequences[i].setId(unsafename);
+ if (map.containsKey(sequences[i].getName())) {
+ String unsafename = (String) map.get(sequences[i].getName());
+ sequences[i].setName(unsafename);
} else {
allfound = false;
}