JAL-3692 parse multiline feature qualifiers and escaped quotes

[jalview.git] / src / jalview / io / StockholmFile.java
diff --git a/src/jalview/io/StockholmFile.java b/src/jalview/io/StockholmFile.java

index 0e73af1..4697262 100644 (file)
--- a/src/jalview/io/StockholmFile.java
+++ b/src/jalview/io/StockholmFile.java
@@ -28,12 +28,14 @@ import jalview.datamodel.AlignmentAnnotation;
  import jalview.datamodel.AlignmentI;
  import jalview.datamodel.Annotation;
  import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
  import jalview.datamodel.Mapping;
  import jalview.datamodel.Sequence;
  import jalview.datamodel.SequenceFeature;
  import jalview.datamodel.SequenceI;
  import jalview.schemes.ResidueProperties;
  import jalview.util.Comparison;
+import jalview.util.DBRefUtils;
  import jalview.util.Format;
  import jalview.util.MessageManager;
  
@@ -332,17 +334,14 @@ public class StockholmFile extends AlignFile
  
            if (accAnnotations != null && accAnnotations.containsKey("AC"))
            {
-            if (dbsource != null)
+            String dbr = (String) accAnnotations.get("AC");
+            if (dbr != null)
              {
-              String dbr = (String) accAnnotations.get("AC");
-              if (dbr != null)
-              {
-                // we could get very clever here - but for now - just try to
-                // guess accession type from source of alignment plus structure
-                // of accession
-                guessDatabaseFor(seqO, dbr, dbsource);
-
-              }
+              // we could get very clever here - but for now - just try to
+              // guess accession type from type of sequence, source of alignment plus
+              // structure
+              // of accession
+              guessDatabaseFor(seqO, dbr, dbsource);
              }
              // else - do what ? add the data anyway and prompt the user to
              // specify what references these are ?
@@ -527,6 +526,9 @@ public class StockholmFile extends AlignFile
                treeName = an.stringMatched(2);
                treeString = new StringBuffer();
              }
+            // TODO: JAL-3532 - this is where GF comments and database references are lost
+            // suggest overriding this method for Stockholm files to catch and properly
+            // process CC, DR etc into multivalued properties
              setAlignmentProperty(an.stringMatched(1), an.stringMatched(2));
            }
          }
@@ -755,6 +757,12 @@ public class StockholmFile extends AlignFile
          st = -1;
        }
      }
+    if (dbsource == null)
+    {
+      // make up an origin based on whether the sequence looks like it is nucleotide
+      // or protein
+      dbsource = (seqO.isProtein()) ? "PFAM" : "RFAM";
+    }
      if (dbsource.equals("PFAM"))
      {
        seqdb = "UNIPROT";
@@ -930,6 +938,11 @@ public class StockholmFile extends AlignFile
      return annot;
    }
  
+  private String dbref_to_ac_record(DBRefEntry ref)
+  {
+    return ref.getSource().toString() + " ; "
+            + ref.getAccessionId().toString();
+  }
    @Override
    public String print(SequenceI[] s, boolean jvSuffix)
    {
@@ -942,8 +955,10 @@ public class StockholmFile extends AlignFile
      int maxid = 0;
      int in = 0;
      Hashtable dataRef = null;
+    boolean isAA = s[in].isProtein();
      while ((in < s.length) && (s[in] != null))
      {
+
        String tmp = printId(s[in], jvSuffix);
        max = Math.max(max, s[in].getLength());
  
@@ -953,17 +968,33 @@ public class StockholmFile extends AlignFile
        }
        if (s[in].getDBRefs() != null)
        {
-        for (int idb = 0; idb < s[in].getDBRefs().length; idb++)
+        if (dataRef == null)
+        {
+          dataRef = new Hashtable();
+        }
+        List<DBRefEntry> primrefs = s[in].getPrimaryDBRefs();
+        if (primrefs.size() >= 1)
          {
-          if (dataRef == null)
+          dataRef.put(tmp, dbref_to_ac_record(primrefs.get(0)));
+        }
+        else
+        {
+          for (int idb = 0; idb < s[in].getDBRefs().length; idb++)
            {
-            dataRef = new Hashtable();
+            DBRefEntry dbref = s[in].getDBRefs()[idb];
+            dataRef.put(tmp, dbref_to_ac_record(dbref));
+            // if we put in a uniprot or EMBL record then we're done:
+            if (isAA && DBRefSource.UNIPROT
+                    .equals(DBRefUtils.getCanonicalName(dbref.getSource())))
+            {
+              break;
+            }
+            if (!isAA && DBRefSource.EMBL
+                    .equals(DBRefUtils.getCanonicalName(dbref.getSource())))
+            {
+              break;
+            }
            }
-
-          String datAs1 = s[in].getDBRefs()[idb].getSource().toString()
-                  + " ; "
-                  + s[in].getDBRefs()[idb].getAccessionId().toString();
-          dataRef.put(tmp, datAs1);
          }
        }
        in++;
@@ -996,7 +1027,8 @@ public class StockholmFile extends AlignFile
          String type = (String) dataRef.remove(idd);
          out.append(new Format("%-" + (maxid - 2) + "s")
                  .form("#=GS " + idd.toString() + " "));
-        if (type.contains("PFAM") || type.contains("RFAM"))
+        if (isAA && type.contains("UNIPROT")
+                || (!isAA && type.contains("EMBL")))
          {
  
            out.append(" AC " + type.substring(type.indexOf(";") + 1));