JAL-2920 use HGVS notation for protein variant feature
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 21 May 2018 11:17:01 +0000 (12:17 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 21 May 2018 11:17:01 +0000 (12:17 +0100)
src/jalview/datamodel/xdb/uniprot/UniprotFeature.java
src/jalview/ws/dbsources/Uniprot.java
test/jalview/ws/dbsources/UniprotTest.java

index 4c2ae24..91d2e19 100644 (file)
@@ -51,19 +51,7 @@ public class UniprotFeature
 
   public String getDescription()
   {
-    if (description == null && variation == null && original == null)
-    {
-      return null;
-    }
-    return (description == null ? "" : description)
-            + (variation != null
-                    ? (description != null ? " " : "") + "Variation: '"
-                            + variation + "'"
-                    : "")
-            + (original != null
-                    ? ((description != null || variation != null) ? " "
-                            : "") + "Original: '" + original + "'"
-                    : "");
+    return description;
   }
 
   public void setDescription(String d)
index 6b09eb6..11fff9d 100644 (file)
@@ -32,6 +32,8 @@ import jalview.datamodel.SequenceI;
 import jalview.datamodel.xdb.uniprot.UniprotEntry;
 import jalview.datamodel.xdb.uniprot.UniprotFeature;
 import jalview.datamodel.xdb.uniprot.UniprotFile;
+import jalview.schemes.ResidueProperties;
+import jalview.util.StringUtils;
 import jalview.ws.seqfetcher.DbSourceProxyImpl;
 
 import java.io.InputStream;
@@ -278,7 +280,7 @@ public class Uniprot extends DbSourceProxyImpl
       for (UniprotFeature uf : entry.getFeature())
       {
         SequenceFeature copy = new SequenceFeature(uf.getType(),
-                uf.getDescription(), uf.getBegin(), uf.getEnd(), "Uniprot");
+                getDescription(uf), uf.getBegin(), uf.getEnd(), "Uniprot");
         copy.setStatus(uf.getStatus());
         sequence.addSequenceFeature(copy);
       }
@@ -291,6 +293,43 @@ public class Uniprot extends DbSourceProxyImpl
   }
 
   /**
+   * Constructs a feature description from the description and (optionally)
+   * original and variant fields of the Uniprot XML feature
+   * 
+   * @param uf
+   * @return
+   */
+  protected static String getDescription(UniprotFeature uf)
+  {
+    String orig = uf.getOriginal();
+    String var = uf.getVariation();
+    StringBuilder sb = new StringBuilder();
+
+    /*
+     * append variant in standard format if present
+     * e.g. p.Arg59Lys
+     */
+    if (orig != null && !orig.isEmpty() && var != null && !var.isEmpty())
+    {
+      sb.append("p.");
+      String orig3 = ResidueProperties.aa2Triplet.get(orig);
+      sb.append(orig3 == null ? orig : StringUtils.toSentenceCase(orig3));
+      sb.append(Integer.toString(uf.getPosition()));
+      String var3 = ResidueProperties.aa2Triplet.get(var);
+      sb.append(var3 == null ? var : StringUtils.toSentenceCase(var3));
+      sb.append(" ");
+    }
+
+    String description = uf.getDescription();
+    if (description != null)
+    {
+      sb.append(description);
+    }
+
+    return sb.toString();
+  }
+
+  /**
    * 
    * @param entry
    *          UniportEntry
index c603a11..0ee4388 100644 (file)
@@ -123,22 +123,21 @@ public class UniprotTest
 
     sf = features.get(3);
     assertEquals("sequence variant", sf.getType());
-    assertEquals("Variation: 'L' Original: 'M'", sf.getDescription());
+    assertNull(sf.getDescription());
     assertEquals(41, sf.getPosition());
     assertEquals(41, sf.getBegin());
     assertEquals(41, sf.getEnd());
 
     sf = features.get(4);
     assertEquals("sequence variant", sf.getType());
-    assertEquals("Pathogenic Variation: 'L' Original: 'M'",
-            sf.getDescription());
+    assertEquals("Pathogenic", sf.getDescription());
     assertEquals(41, sf.getPosition());
     assertEquals(41, sf.getBegin());
     assertEquals(41, sf.getEnd());
 
     sf = features.get(5);
     assertEquals("sequence variant", sf.getType());
-    assertEquals("Pathogenic Original: 'M'", sf.getDescription());
+    assertEquals("Pathogenic", sf.getDescription());
     assertEquals(41, sf.getPosition());
     assertEquals(41, sf.getBegin());
     assertEquals(41, sf.getEnd());
@@ -210,4 +209,25 @@ public class UniprotTest
     assertEquals(expectedDescription,
             Uniprot.getUniprotEntryDescription(entry));
   }
+
+  @Test(groups = { "Functional" })
+  public void testGetDescription()
+  {
+    UniprotFeature uf = new UniprotFeature();
+    assertEquals("", Uniprot.getDescription(uf));
+
+    uf.setDescription("Hello");
+    assertEquals("Hello", Uniprot.getDescription(uf));
+
+    uf.setPosition(23);
+    uf.setOriginal("K");
+    uf.setVariation("y");
+    assertEquals("p.Lys23Tyr Hello", Uniprot.getDescription(uf));
+
+    uf.setVariation("z"); // unknown variant - fails gracefully
+    assertEquals("p.Lys23z Hello", Uniprot.getDescription(uf));
+
+    uf.setVariation(null); // variant missing - is ignored
+    assertEquals("Hello", Uniprot.getDescription(uf));
+  }
 }