X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FStockholmFile.java;h=344c4e1e940f9c295bf35b4f8ef6f34a26b179b6;hb=3609d4b908fa64cab35f2348401baab3347188fc;hp=3afb9673f10f5f1709162c0af5c9248035f84f5b;hpb=247737b13c78af1123f9e41edc44d9f16c1a2358;p=jalview.git diff --git a/src/jalview/io/StockholmFile.java b/src/jalview/io/StockholmFile.java index 3afb967..344c4e1 100644 --- a/src/jalview/io/StockholmFile.java +++ b/src/jalview/io/StockholmFile.java @@ -35,6 +35,7 @@ import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.schemes.ResidueProperties; import jalview.util.Comparison; +import jalview.util.DBRefUtils; import jalview.util.Format; import jalview.util.MessageManager; @@ -43,12 +44,10 @@ import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Enumeration; -import java.util.HashMap; import java.util.Hashtable; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.Vector; import com.stevesoft.pat.Regex; @@ -340,17 +339,14 @@ public class StockholmFile extends AlignFile if (accAnnotations != null && accAnnotations.containsKey("AC")) { - if (dbsource != null) + String dbr = (String) accAnnotations.get("AC"); + if (dbr != null) { - String dbr = (String) accAnnotations.get("AC"); - if (dbr != null) - { - // we could get very clever here - but for now - just try to - // guess accession type from source of alignment plus structure - // of accession - guessDatabaseFor(seqO, dbr, dbsource); - - } + // we could get very clever here - but for now - just try to + // guess accession type from type of sequence, source of alignment plus + // structure + // of accession + guessDatabaseFor(seqO, dbr, dbsource); } // else - do what ? add the data anyway and prompt the user to // specify what references these are ? @@ -535,6 +531,9 @@ public class StockholmFile extends AlignFile treeName = an.stringMatched(2); treeString = new StringBuffer(); } + // TODO: JAL-3532 - this is where GF comments and database references are lost + // suggest overriding this method for Stockholm files to catch and properly + // process CC, DR etc into multivalued properties setAlignmentProperty(an.stringMatched(1), an.stringMatched(2)); } } @@ -764,6 +763,12 @@ public class StockholmFile extends AlignFile st = -1; } } + if (dbsource == null) + { + // make up an origin based on whether the sequence looks like it is nucleotide + // or protein + dbsource = (seqO.isProtein()) ? "PFAM" : "RFAM"; + } if (dbsource.equals("PFAM")) { seqdb = "UNIPROT"; @@ -943,6 +948,11 @@ public class StockholmFile extends AlignFile return annot; } + private String dbref_to_ac_record(DBRefEntry ref) + { + return ref.getSource().toString() + " ; " + + ref.getAccessionId().toString(); + } @Override public String print(SequenceI[] s, boolean jvSuffix) { @@ -955,8 +965,10 @@ public class StockholmFile extends AlignFile int maxid = 0; int in = 0; Hashtable dataRef = null; + boolean isAA = s[in].isProtein(); while ((in < s.length) && (s[in] != null)) { + String tmp = printId(s[in], jvSuffix); max = Math.max(max, s[in].getLength()); @@ -966,17 +978,33 @@ public class StockholmFile extends AlignFile } if (s[in].getDBRefs() != null) { - for (int idb = 0; idb < s[in].getDBRefs().length; idb++) + if (dataRef == null) + { + dataRef = new Hashtable(); + } + List primrefs = s[in].getPrimaryDBRefs(); + if (primrefs.size() >= 1) { - if (dataRef == null) + dataRef.put(tmp, dbref_to_ac_record(primrefs.get(0))); + } + else + { + for (int idb = 0; idb < s[in].getDBRefs().length; idb++) { - dataRef = new Hashtable(); + DBRefEntry dbref = s[in].getDBRefs()[idb]; + dataRef.put(tmp, dbref_to_ac_record(dbref)); + // if we put in a uniprot or EMBL record then we're done: + if (isAA && DBRefSource.UNIPROT + .equals(DBRefUtils.getCanonicalName(dbref.getSource()))) + { + break; + } + if (!isAA && DBRefSource.EMBL + .equals(DBRefUtils.getCanonicalName(dbref.getSource()))) + { + break; + } } - - String datAs1 = s[in].getDBRefs()[idb].getSource().toString() - + " ; " - + s[in].getDBRefs()[idb].getAccessionId().toString(); - dataRef.put(tmp, datAs1); } } in++; @@ -1009,7 +1037,8 @@ public class StockholmFile extends AlignFile String type = (String) dataRef.remove(idd); out.append(new Format("%-" + (maxid - 2) + "s") .form("#=GS " + idd.toString() + " ")); - if (type.contains("PFAM") || type.contains("RFAM")) + if (isAA && type.contains("UNIPROT") + || (!isAA && type.contains("EMBL"))) { out.append(" AC " + type.substring(type.indexOf(";") + 1)); @@ -1029,36 +1058,40 @@ public class StockholmFile extends AlignFile if (alAnot != null) { Annotation[] ann; + for (int j = 0; j < alAnot.length; j++) { - - String key = type2id(alAnot[j].label); - boolean isrna = alAnot[j].isValidStruc(); - - if (isrna) - { - // hardwire to secondary structure if there is RNA secondary - // structure on the annotation - key = "SS"; - } - if (key == null) + if (alAnot[j].annotations != null) { + String key = type2id(alAnot[j].label); + boolean isrna = alAnot[j].isValidStruc(); - continue; - } + if (isrna) + { + // hardwire to secondary structure if there is RNA secondary + // structure on the annotation + key = "SS"; + } + if (key == null) + { - // out.append("#=GR "); - out.append(new Format("%-" + maxid + "s").form( - "#=GR " + printId(s[i], jvSuffix) + " " + key + " ")); - ann = alAnot[j].annotations; - String seq = ""; - for (int k = 0; k < ann.length; k++) - { - seq += outputCharacter(key, k, isrna, ann, s[i]); + continue; + } + + // out.append("#=GR "); + out.append(new Format("%-" + maxid + "s").form( + "#=GR " + printId(s[i], jvSuffix) + " " + key + " ")); + ann = alAnot[j].annotations; + String seq = ""; + for (int k = 0; k < ann.length; k++) + { + seq += outputCharacter(key, k, isrna, ann, s[i]); + } + out.append(seq); + out.append(newline); } - out.append(seq); - out.append(newline); } + } out.append(new Format("%-" + maxid + "s")