* method
*/
public static final String[] WRITEABLE_FORMATS = new String[]
- { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "AMSA" };
+ { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "STH", "AMSA" };
/**
* List of extensions corresponding to file format types in WRITABLE_FNAMES
* that are writable by the application.
*/
public static final String[] WRITABLE_EXTENSIONS = new String[]
- { "fa, fasta, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa", "jar" };
+ { "fa, fasta, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa", "jar", "sto,stk" };
/**
* List of writable formats by the application. Order must correspond with the
* WRITABLE_EXTENSIONS list of formats.
*/
public static final String[] WRITABLE_FNAMES = new String[]
- { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview" };
+ { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview", "STH"};
/**
* List of readable format file extensions by application in order
}
else if (format.equalsIgnoreCase("STH"))
{
- afile = new StockholmFile();
+ afile = new StockholmFile(alignment);
}
else if (format.equalsIgnoreCase("AMSA"))
{
{
System.out.println("Reading file: " + f);
AppletFormatAdapter afa = new AppletFormatAdapter();
+ String fName = f.getName();
+ String extension = fName.substring(fName.lastIndexOf(".") + 1, fName.length());
+ if (extension.equals("stk") || extension.equals("sto"))
+ {
+ afa.test(f);
+ }
+ else
+ {
Runtime r = Runtime.getRuntime();
System.gc();
long memf = -r.totalMemory() + r.freeMemory();
System.out
.println("Difference between free memory now and before is "
+ (memf / (1024.0 * 1024.0) * 1.0) + " MB");
-
+ }
} catch (Exception e)
{
System.err.println("Exception when dealing with " + i
+ "'th argument: " + args[i] + "\n" + e);
}
+
}
else
{
}
}
+ private void test(File f) {
+ System.out.println("Reading file: " + f);
+ String ff = f.getPath();
+ try
+ {
+ Alignment al = readFile(ff, FILE, new IdentifyFile().Identify(ff, FILE));
+ for (int i = 0; i < al.getSequencesArray().length; ++i) {
+ al.getSequenceAt(i).setDatasetSequence(al.getSequenceAt(i));
+ }
+ AlignFile stFile = new StockholmFile(al);
+ stFile.setSeqs(al.getSequencesArray());
+
+ String stockholmoutput = stFile.print();
+ Alignment al_input = readFile(stockholmoutput, AppletFormatAdapter.PASTE, "STH");
+ if (al != null && al_input!= null)
+ {
+ System.out.println("Alignment contains: " + al.getHeight() + " and " + al_input.getHeight()
+ + " sequences; " + al.getWidth() + " and " + al_input.getWidth() + " columns.");
+ AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation();
+ AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation();
+
+ // check Alignment annotation
+ if (aa_new != null && aa_original != null)
+ {
+ System.out.println("Alignment contains: " + aa_new.length
+ + " and " + aa_original.length + " alignment annotation(s)");
+ for (int i = 0; i < aa_original.length; i++)
+ {
+ if (!equalss(aa_original[i], aa_new[i]))
+ System.out.println("Different alignment annotation");
+ }
+ }
+
+ // check sequences, annotation and features
+ SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length];
+ seq_original = al.getSequencesArray();
+ SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length];
+ seq_new = al_input.getSequencesArray();
+ SequenceFeature[] sequenceFeatures_original,sequenceFeatures_new;
+ AlignmentAnnotation annot_original, annot_new;
+ //
+ for (int i = 0; i < al.getSequencesArray().length; i++)
+ {
+ String name = seq_original[i].getName();
+ int start = seq_original[i].getStart();
+ int end = seq_original[i].getEnd();
+ System.out.println("Check sequence: " + name + "/" + start + "-" + end);
+
+ // search equal sequence
+ for (int in = 0; in < al_input.getSequencesArray().length; in++) {
+ if (name.equals(seq_new[in].getName()) &&
+ start == seq_new[in].getStart() &&
+ end ==seq_new[in].getEnd())
+ {
+ String ss_original = seq_original[i].getSequenceAsString();
+ String ss_new = seq_new[in].getSequenceAsString();
+ if (!ss_original.equals(ss_new))
+ {
+ System.out.println("The sequences " + name + "/" + start + "-" + end + " are not equal");
+ }
+
+ // compare sequence features
+ if (seq_original[i].getSequenceFeatures() != null && seq_new[in].getSequenceFeatures() != null)
+ {
+ System.out.println("There are feature!!!");
+ sequenceFeatures_original = new SequenceFeature[seq_original[i].getSequenceFeatures().length];
+ sequenceFeatures_original = seq_original[i].getSequenceFeatures();
+ sequenceFeatures_new = new SequenceFeature[seq_new[in].getSequenceFeatures().length];
+ sequenceFeatures_new = seq_new[in].getSequenceFeatures();
+
+ if (seq_original[i].getSequenceFeatures().length == seq_new[in].getSequenceFeatures().length)
+ {
+ for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++) {
+ if (!sequenceFeatures_original[feat].equals(sequenceFeatures_new[feat])) {
+ System.out.println("Different features");
+ break;
+ }
+ }
+ } else
+ {
+ System.out.println("different number of features");
+ }
+ } else if (seq_original[i].getSequenceFeatures() == null && seq_new[in].getSequenceFeatures() == null)
+ {
+ System.out.println("No sequence features");
+ } else if (seq_original[i].getSequenceFeatures() != null && seq_new[in].getSequenceFeatures() == null)
+ {
+ System.out.println("Coudn't compare sequence features new one");
+ }
+ // compare alignment annotation
+ if (al.getSequenceAt(i).getAnnotation() != null && al_input.getSequenceAt(in).getAnnotation() != null)
+ {
+ for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++)
+ {
+ if (al.getSequenceAt(i).getAnnotation()[j] != null &&
+ al_input.getSequenceAt(in).getAnnotation()[j] != null)
+ {
+ annot_original = al.getSequenceAt(i).getAnnotation()[j];
+ annot_new = al_input.getSequenceAt(in).getAnnotation()[j];
+ if (!equalss(annot_original, annot_new))
+ System.out.println("Different annotation");
+ }
+ }
+ } else if (al.getSequenceAt(i).getAnnotation() == null && al_input.getSequenceAt(in).getAnnotation() == null)
+ {
+ System.out.println("No annotations");
+ } else if (al.getSequenceAt(i).getAnnotation() != null && al_input.getSequenceAt(in).getAnnotation() == null)
+ {
+ System.out.println("Coudn't compare annotations new one");
+ }
+ break;
+ }
+ }
+ }
+ } else
+ {
+ System.out.println("Couldn't read alignment");
+ }
+ } catch (Exception e)
+ {
+ System.err.println("Couln't format the alignment for output file.");
+ e.printStackTrace(System.err);
+ }
+ }
+
+ /*
+ * compare annotations
+ */
+ private boolean equalss(AlignmentAnnotation annot_or, AlignmentAnnotation annot_new)
+ {
+ if (annot_or.annotations.length != annot_new.annotations.length)
+ {
+ return false;
+ }
+ for (int i = 0; i < annot_or.annotations.length; i++)
+ {
+ if (annot_or.annotations[i] != null && annot_new.annotations[i] != null)
+ {
+ if (!annot_or.annotations[i].displayCharacter.equals(annot_new.annotations[i].displayCharacter) &&
+ annot_or.annotations[i].secondaryStructure != annot_new.annotations[i].secondaryStructure &&
+ !annot_or.annotations[i].description.equals(annot_new.annotations[i].description))
+ {
+ return false;
+ }
+ } else if (annot_or.annotations[i] == null && annot_new.annotations[i] == null)
+ {
+ continue;
+ } else
+ {
+ return false;
+ }
+ }
+ return true;
+ }
/**
* try to discover how to access the given file as a valid datasource that
* will be identified as the given type.
import com.stevesoft.pat.*;
import jalview.datamodel.*;
+import jalview.util.Format;
// import org.apache.log4j.*;
public class StockholmFile extends AlignFile
{
// static Logger logger = Logger.getLogger("jalview.io.StockholmFile");
-
+ StringBuffer out; // output buffer
+ AlignmentI al;
+
public StockholmFile()
{
}
+ /**
+ * Creates a new StockholmFile object for output.
+ */
+ public StockholmFile(AlignmentI al)
+ {
+ this.al = al;
+ }
+
public StockholmFile(String inFile, String type) throws IOException
{
super(inFile, type);
if (rend.search(line))
{
// End of the alignment, pass stuff back
-
this.noSeqs = seqs.size();
+
+ String propety = null;
+ Regex pf = new Regex("PF[0-9]{5}(.*)"); // Finds AC for Pfam
+ Regex rf = new Regex("RF[0-9]{5}(.*)"); // Finds AC for Rfam
+ if (getAlignmentProperty("AC") != null)
+ {
+ String dbType = getAlignmentProperty("AC").toString();
+ if (pf.search(dbType))
+ {
+ propety = "PFAM";
+ }
+ else if (rf.search(dbType))
+ {
+ propety = "RFAM";
+ }
+ }
// logger.debug("Number of sequences: " + this.noSeqs);
Enumeration accs = seqs.keys();
while (accs.hasMoreElements())
String desc = (String) accAnnotations.get("DE");
seqO.setDescription((desc == null) ? "" : desc);
}
+
+
+
// Add DB References (if any)
if (accAnnotations != null && accAnnotations.containsKey("DR"))
{
String src = dbr.substring(0, dbr.indexOf(";"));
String acn = dbr.substring(dbr.indexOf(";") + 1);
jalview.util.DBRefUtils.parseToDbRef(seqO, src, "0", acn);
- // seqO.addDBRef(dbref);
- }
+ }
}
- if (accAnnotations != null && accAnnotations.containsKey("SS"))
+
+ if (accAnnotations != null && accAnnotations.containsKey("AC") && propety != null)
{
- Vector v = (Vector) accAnnotations.get("SS");
-
- for (int i = 0; i < v.size(); i++)
+ String dbr = (String) accAnnotations.get("AC");
+ if (dbr != null)
{
- AlignmentAnnotation an = (AlignmentAnnotation) v.elementAt(i);
- seqO.addAlignmentAnnotation(an);
- // annotations.add(an);
- }
+ String src = propety;
+ String acn = dbr.toString();
+ jalview.util.DBRefUtils.parseToDbRef(seqO, src, "0", acn);
+ }
}
-
+
+
Hashtable features = null;
// We need to adjust the positions of all features to account for gaps
try
// TODO: map coding region to core jalview feature types
String type = i.nextElement().toString();
Hashtable content = (Hashtable) features.remove(type);
+
+ // add alignment annotation for this feature
+ String key = type2id(type);
+ if (key != null)
+ {
+ if (accAnnotations != null && accAnnotations.containsKey(key))
+ {
+ Vector vv = (Vector) accAnnotations.get(key);
+ for (int ii = 0; ii < vv.size(); ii++)
+ {
+ AlignmentAnnotation an = (AlignmentAnnotation) vv.elementAt(ii);
+ seqO.addAlignmentAnnotation(an);
+ }
+ }
+ }
+
Enumeration j = content.keys();
while (j.hasMoreElements())
{
}
ns += seq;
content.put(description, ns);
-
- if (type.equals("SS"))
+ Hashtable strucAnn;
+ if (seqAnn.containsKey(acc))
{
- Hashtable strucAnn;
- if (seqAnn.containsKey(acc))
- {
- strucAnn = (Hashtable) seqAnn.get(acc);
- }
- else
- {
- strucAnn = new Hashtable();
- }
-
- Vector newStruc = new Vector();
- parseAnnotationRow(newStruc, type, ns);
-
- strucAnn.put(type, newStruc);
- seqAnn.put(acc, strucAnn);
+ strucAnn = (Hashtable) seqAnn.get(acc);
}
+ else
+ {
+ strucAnn = new Hashtable();
+ }
+
+ Vector newStruc = new Vector();
+ parseAnnotationRow(newStruc, type, ns);
+ strucAnn.put(type, newStruc);
+ seqAnn.put(acc, strucAnn);
}
else
{
convert1 = openparen.replaceAll(annots);
convert2 = closeparen.replaceAll(convert1);
annots = convert2;
-
- String type = (label.indexOf("_cons") == label.length() - 5) ? label
- .substring(0, label.length() - 5) : label;
+
+ String type = label;
+ if (label.contains("_cons")) {
+ type = (label.indexOf("_cons") == label.length() - 5) ? label
+ .substring(0, label.length() - 5) : label;
+ }
boolean ss = false;
type = id2type(type);
if (type.equals("secondary structure"))
}
return annot;
}
-
- public static String print(SequenceI[] s)
+
+ public String print(SequenceI[] s)
{
- return "not yet implemented";
+ // find max length of id
+ int max = 0;
+ int maxid = 0;
+ int in = 0;
+ Hashtable dataRef = null;
+ while ((in < s.length) && (s[in] != null))
+ {
+ String tmp = printId(s[in]);
+ if (s[in].getSequence().length > max)
+ {
+ max = s[in].getSequence().length;
+ }
+
+ if (tmp.length() > maxid)
+ {
+ maxid = tmp.length();
+ }
+ if (s[in].getDBRef() != null)
+ {
+ for (int idb = 0; idb < s[in].getDBRef().length; idb++)
+ {
+ if (dataRef == null)
+ dataRef = new Hashtable();
+
+ String datAs1 = s[in].getDBRef()[idb].getSource().toString() + " ; " +s[in].getDBRef()[idb].getAccessionId().toString();
+ dataRef.put(tmp, datAs1);
+ }
+ }
+ in++;
+ }
+ maxid += 9;
+ int i = 0;
+
+ // output database type
+ if (al.getProperties() != null)
+ {
+ if (!al.getProperties().isEmpty())
+ {
+ Enumeration key = al.getProperties().keys();
+ Enumeration val = al.getProperties().elements();
+ while (key.hasMoreElements())
+ {
+ out.append("#=GF " + key.nextElement() + " " + val.nextElement());
+ out.append(newline);
+ }
+ }
+ }
+
+ // output database accessions
+ if (dataRef != null)
+ {
+ Enumeration en = dataRef.keys();
+ while (en.hasMoreElements())
+ {
+ Object idd = en.nextElement();
+ String type = (String) dataRef.remove(idd);
+ out.append(new Format("%-" + (maxid - 2) + "s").form("#=GS " +idd.toString() + " "));
+ if (type.contains("PFAM") || type.contains("RFAM") )
+ {
+
+ out.append(" AC " + type.substring(type.indexOf(";") + 1));
+ } else
+ {
+ out.append(" DR " + type + " ");
+ }
+ out.append(newline);
+ }
+ }
+
+ // output annotations
+ while (i < s.length && s[i] != null)
+ {
+ if (s[i].getDatasetSequence() != null)
+ {
+ SequenceI ds = s[i].getDatasetSequence();
+ AlignmentAnnotation[] alAnot;
+ Annotation[] ann;
+ Annotation annot;
+ alAnot = s[i].getAnnotation();
+ String feature = "";
+ if (alAnot != null)
+ {
+ for (int j = 0; j < alAnot.length; j++)
+ {
+ if (ds.getSequenceFeatures() != null)
+ {
+ feature = ds.getSequenceFeatures()[0].type;
+ }
+ String key = type2id(feature);
+
+
+ if (key == null)
+ continue;
+
+ // out.append("#=GR ");
+ out.append(new Format("%-" + maxid + "s").form("#=GR " + printId(s[i]) + " " + key + " "));
+ ann = alAnot[j].annotations;
+ String seq = "";
+ for (int k = 0; k < ann.length; k++)
+ {
+ annot = ann[k];
+ String ch = (annot == null) ? Character.toString(s[i].getCharAt(k)) : annot.displayCharacter;
+ if (ch.length() == 0)
+ {
+ if (key.equals("SS")) {
+ char ll = annot.secondaryStructure;
+ seq = (Character.toString(ll).equals(" ")) ? seq + "C" : seq + ll;
+ } else {
+ seq += ".";
+ }
+ } else if (ch.length() == 1) {
+ seq += ch;
+ } else if (ch.length() > 1) {
+ seq += ch.charAt(1) ;
+ }
+ }
+ out.append(seq);
+ out.append(newline);
+ }
+ }
+ }
+
+ out.append(new Format("%-" + maxid + "s").form(printId(s[i])+" "));
+ out.append(s[i].getSequenceAsString());
+ out.append(newline);
+ i++;
+ }
+
+ // alignment annotation
+ AlignmentAnnotation aa;
+ if (al.getAlignmentAnnotation() != null)
+ {
+ for (int ia = 0; ia < al.getAlignmentAnnotation().length; ia++)
+ {
+ aa = al.getAlignmentAnnotation()[ia];
+ if (aa.autoCalculated || !aa.visible)
+ {
+ continue;
+ }
+ String seq = "";
+ String label;
+
+ if (aa.label.equals("seq"))
+ label = "seq_cons";
+ else
+ label = type2id(aa.label.toLowerCase())+"_cons";
+
+ if (label == null)
+ label = aa.label;
+
+ out.append(new Format("%-" + maxid + "s").form("#=GC " + label+" "));
+ for (int j = 0; j < aa.annotations.length; j++)
+ {
+ String ch = (aa.annotations[j] == null) ? "-" : aa.annotations[j].displayCharacter ;
+ if (ch.length() == 0)
+ {
+ char ll = aa.annotations[j].secondaryStructure;
+ if (Character.toString(ll).equals(" "))
+ seq += "C";
+ else
+ seq += ll;
+ } else if (ch.length() == 1) {
+ seq += ch;
+ } else if (ch.length() > 1) {
+ seq += ch.charAt(1) ;
+ }
+ }
+ out.append(seq);
+ out.append(newline);
+ }
+ }
+ return out.toString();
}
public String print()
{
- return print(getSeqsAsArray());
+ out = new StringBuffer();
+ out.append("# STOCKHOLM 1.0");
+ out.append(newline);
+ print(getSeqsAsArray());
+
+ out.append("//");
+ out.append(newline);
+ return out.toString();
}
private static Hashtable typeIds = null;
+ id);
return id;
}
+
+ protected static String type2id(String type)
+ {
+ String key = null;
+ Enumeration e = typeIds.keys();
+ while (e.hasMoreElements())
+ {
+ Object ll = e.nextElement();
+ if (typeIds.get(ll).toString().equals(type))
+ {
+ key = (String) ll;
+ break;
+ }
+ }
+ if (key != null)
+ {
+ return (String) key;
+ }
+ System.err.println("Warning : Unknown Stockholm annotation type: "
+ + type);
+ return key;
+ }
/**
* //ssline is complete secondary structure line private AlignmentAnnotation
* addHelices(Vector annotation, String label, String ssline) {