JAL-3017 parse multiple variant elements for uniprot feature
authorJim Procter <jprocter@issues.jalview.org>
Sat, 2 Jun 2018 13:17:31 +0000 (14:17 +0100)
committerJim Procter <jprocter@issues.jalview.org>
Mon, 4 Jun 2018 09:00:42 +0000 (10:00 +0100)
 Conflicts:
src/jalview/datamodel/xdb/uniprot/UniprotFeature.java
Merged JAL-3017 into upstream JAL-2920 patches for HGVC nomenclature
note - should properly deal with delins variants http://varnomen.hgvs.org/recommendations/protein/variant/delins/

resources/uniprot_mapping.xml
src/jalview/datamodel/xdb/uniprot/UniprotFeature.java
src/jalview/ws/dbsources/Uniprot.java
test/jalview/ws/dbsources/UniprotTest.java

index 832d3e5..4c9ad5f 100755 (executable)
@@ -69,7 +69,7 @@
     <field name="end">
       <bind-xml name="position" node="attribute" location="location/end"/>
     </field>
-    <field name="variation">
+    <field name="variation" collection="vector" type="string">
      <bind-xml name="variation"/>
     </field>
     <field name="original">
index 91d2e19..8bd5652 100644 (file)
@@ -20,6 +20,8 @@
  */
 package jalview.datamodel.xdb.uniprot;
 
+import java.util.Vector;
+
 /**
  * A data model class for binding from Uniprot XML via uniprot_mapping.xml
  */
@@ -31,7 +33,7 @@ public class UniprotFeature
 
   private String original = null;
 
-  private String variation = null;
+  private Vector<String> variation = null;
 
   private String status;
 
@@ -110,12 +112,12 @@ public class UniprotFeature
     this.original = original;
   }
 
-  public String getVariation()
+  public Vector<String> getVariation()
   {
     return variation;
   }
 
-  public void setVariation(String variant)
+  public void setVariation(Vector<String> variant)
   {
     this.variation = variant;
   }
index 11fff9d..fc4f318 100644 (file)
@@ -42,6 +42,7 @@ import java.io.Reader;
 import java.net.URL;
 import java.net.URLConnection;
 import java.util.ArrayList;
+import java.util.List;
 import java.util.Vector;
 
 import org.exolab.castor.mapping.Mapping;
@@ -302,24 +303,41 @@ public class Uniprot extends DbSourceProxyImpl
   protected static String getDescription(UniprotFeature uf)
   {
     String orig = uf.getOriginal();
-    String var = uf.getVariation();
+    List<String> variants = uf.getVariation();
     StringBuilder sb = new StringBuilder();
 
     /*
      * append variant in standard format if present
      * e.g. p.Arg59Lys
      */
-    if (orig != null && !orig.isEmpty() && var != null && !var.isEmpty())
+    if (orig != null && !orig.isEmpty() && variants != null
+            && !variants.isEmpty())
     {
-      sb.append("p.");
-      String orig3 = ResidueProperties.aa2Triplet.get(orig);
-      sb.append(orig3 == null ? orig : StringUtils.toSentenceCase(orig3));
-      sb.append(Integer.toString(uf.getPosition()));
-      String var3 = ResidueProperties.aa2Triplet.get(var);
-      sb.append(var3 == null ? var : StringUtils.toSentenceCase(var3));
-      sb.append(" ");
-    }
+      int p = 0;
+      for (String var : variants)
+      {
+        sb.append("p.");
+        String orig3 = ResidueProperties.aa2Triplet.get(orig);
+        sb.append(orig3 == null ? orig : StringUtils.toSentenceCase(orig3));
+        sb.append(Integer.toString(uf.getPosition()));
+        for (int c = 0, clen = var.length(); c < clen; c++)
+        {
+          char varchar = var.charAt(c);
+          String var3 = ResidueProperties.aa2Triplet.get("" + varchar);
 
+          sb.append(var3 != null ? StringUtils.toSentenceCase(var3)
+                  : "" + varchar);
+        }
+        if (++p != variants.size())
+        {
+          sb.append("<br/>");
+        }
+        else
+        {
+          sb.append(" ");
+        }
+      }
+    }
     String description = uf.getDescription();
     if (description != null)
     {
index 0ee4388..b187b67 100644 (file)
@@ -35,6 +35,7 @@ import java.io.Reader;
 import java.io.StringReader;
 import java.util.Vector;
 
+import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
@@ -66,6 +67,7 @@ public class UniprotTest
           + "<feature type=\"sequence variant\"><original>M</original><variation>L</variation><location><position position=\"41\"/></location></feature>"
           + "<feature type=\"sequence variant\" description=\"Pathogenic\"><original>M</original><variation>L</variation><location><position position=\"41\"/></location></feature>"
           + "<feature type=\"sequence variant\" description=\"Pathogenic\"><original>M</original><location><position position=\"41\"/></location></feature>"
+          + "<feature type=\"sequence variant\" description=\"Foo\"><variation>L</variation><variation>LMV</variation><original>M</original><location><position position=\"42\"/></location></feature>"
           + "<sequence length=\"10\" mass=\"27410\" checksum=\"8CB760AACF88FE6C\" modified=\"2008-01-15\" version=\"1\">MHAPL VSKDL</sequence></entry>"
           + "</uniprot>";
 
@@ -101,7 +103,7 @@ public class UniprotTest
      * Check sequence features
      */
     Vector<UniprotFeature> features = entry.getFeature();
-    assertEquals(6, features.size());
+    assertEquals(7, features.size());
     UniprotFeature sf = features.get(0);
     assertEquals("signal peptide", sf.getType());
     assertNull(sf.getDescription());
@@ -141,6 +143,16 @@ public class UniprotTest
     assertEquals(41, sf.getPosition());
     assertEquals(41, sf.getBegin());
     assertEquals(41, sf.getEnd());
+
+    sf = features.get(6);
+    assertEquals("sequence variant", sf.getType());
+    assertEquals("Foo",
+            sf.getDescription());
+    assertEquals(42, sf.getPosition());
+    assertEquals(42, sf.getBegin());
+    assertEquals(42, sf.getEnd());
+    Assert.assertEquals(Uniprot.getDescription(sf),
+            "p.Met42Leu<br/>p.Met42LeuMetVal Foo");
     /*
      * Check cross-references
      */
@@ -221,10 +233,14 @@ public class UniprotTest
 
     uf.setPosition(23);
     uf.setOriginal("K");
-    uf.setVariation("y");
+    Vector<String> vars = new Vector();
+    vars.add("y");
+    uf.setVariation(vars);
     assertEquals("p.Lys23Tyr Hello", Uniprot.getDescription(uf));
 
-    uf.setVariation("z"); // unknown variant - fails gracefully
+    vars.clear();
+    vars.add("z"); // unknown variant - fails gracefully
+    uf.setVariation(vars);
     assertEquals("p.Lys23z Hello", Uniprot.getDescription(uf));
 
     uf.setVariation(null); // variant missing - is ignored