From: Jim Procter Date: Wed, 6 Jun 2018 15:12:58 +0000 (+0100) Subject: Merge branch 'bug/JAL-2920_uniprotvariantfeature' into releases/Release_2_10_4_Branch X-Git-Tag: Release_2_10_5~60 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=782db63cc15bc2261c398a87da13fd729da7a18d;hp=efffcd38913c0e67f670a66fb29c96e9cf1508b1;p=jalview.git Merge branch 'bug/JAL-2920_uniprotvariantfeature' into releases/Release_2_10_4_Branch --- diff --git a/resources/uniprot_mapping.xml b/resources/uniprot_mapping.xml index 4c9ad5f..68868c4 100755 --- a/resources/uniprot_mapping.xml +++ b/resources/uniprot_mapping.xml @@ -18,6 +18,7 @@ * The Jalview Authors are detailed in the 'AUTHORS' file. --> + diff --git a/src/jalview/datamodel/xdb/uniprot/UniprotFeature.java b/src/jalview/datamodel/xdb/uniprot/UniprotFeature.java index 3bae87e..8bd5652 100644 --- a/src/jalview/datamodel/xdb/uniprot/UniprotFeature.java +++ b/src/jalview/datamodel/xdb/uniprot/UniprotFeature.java @@ -53,40 +53,7 @@ public class UniprotFeature public String getDescription() { - if (description == null && variation == null && original == null) - { - return null; - } - StringBuilder sb = new StringBuilder(); - if (description != null) - { - sb.append(description); - } - if (variation != null && variation.size() > 0) - { - int i = 0; - for (String var : variation) - { - if (i++ > 0) - { - sb.append(","); - } - if (sb.length() > 0) - { - sb.append(" "); - } - sb.append("Variation: '" + var + "'"); - } - } - if (original != null) - { - if (sb.length() > 0) - { - sb.append(" "); - } - sb.append("Original: '" + original + "'"); - } - return sb.toString(); + return description; } public void setDescription(String d) diff --git a/src/jalview/ws/dbsources/Uniprot.java b/src/jalview/ws/dbsources/Uniprot.java index 6b09eb6..c311ea9 100644 --- a/src/jalview/ws/dbsources/Uniprot.java +++ b/src/jalview/ws/dbsources/Uniprot.java @@ -32,6 +32,8 @@ import jalview.datamodel.SequenceI; import jalview.datamodel.xdb.uniprot.UniprotEntry; import jalview.datamodel.xdb.uniprot.UniprotFeature; import jalview.datamodel.xdb.uniprot.UniprotFile; +import jalview.schemes.ResidueProperties; +import jalview.util.StringUtils; import jalview.ws.seqfetcher.DbSourceProxyImpl; import java.io.InputStream; @@ -40,6 +42,7 @@ import java.io.Reader; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; +import java.util.List; import java.util.Vector; import org.exolab.castor.mapping.Mapping; @@ -278,7 +281,7 @@ public class Uniprot extends DbSourceProxyImpl for (UniprotFeature uf : entry.getFeature()) { SequenceFeature copy = new SequenceFeature(uf.getType(), - uf.getDescription(), uf.getBegin(), uf.getEnd(), "Uniprot"); + getDescription(uf), uf.getBegin(), uf.getEnd(), "Uniprot"); copy.setStatus(uf.getStatus()); sequence.addSequenceFeature(copy); } @@ -291,6 +294,86 @@ public class Uniprot extends DbSourceProxyImpl } /** + * Constructs a feature description from the description and (optionally) + * original and variant fields of the Uniprot XML feature + * + * @param uf + * @return + */ + protected static String getDescription(UniprotFeature uf) + { + String orig = uf.getOriginal(); + List variants = uf.getVariation(); + StringBuilder sb = new StringBuilder(); + + /* + * append variant in standard format if present + * e.g. p.Arg59Lys + */ + if (orig != null && !orig.isEmpty() && variants != null + && !variants.isEmpty()) + { + int p = 0; + for (String var : variants) + { + // TODO proper HGVS nomenclature for delins structural variations + // http://varnomen.hgvs.org/recommendations/protein/variant/delins/ + // for now we are pragmatic - any orig/variant sequence longer than + // three characters is shown with single-character notation rather than + // three-letter notation + sb.append("p."); + if (orig.length() < 4) + { + for (int c = 0, clen = orig.length(); c < clen; c++) + { + char origchar = orig.charAt(c); + String orig3 = ResidueProperties.aa2Triplet.get("" + origchar); + sb.append(orig3 == null ? origchar + : StringUtils.toSentenceCase(orig3)); + } + } + else + { + sb.append(orig); + } + + sb.append(Integer.toString(uf.getPosition())); + + if (var.length() < 4) + { + for (int c = 0, clen = var.length(); c < clen; c++) + { + char varchar = var.charAt(c); + String var3 = ResidueProperties.aa2Triplet.get("" + varchar); + + sb.append(var3 != null ? StringUtils.toSentenceCase(var3) + : "" + varchar); + } + } + else + { + sb.append(var); + } + if (++p != variants.size()) + { + sb.append("\n"); + } + else + { + sb.append(" "); + } + } + } + String description = uf.getDescription(); + if (description != null) + { + sb.append(description); + } + + return sb.toString(); + } + + /** * * @param entry * UniportEntry diff --git a/test/jalview/ws/dbsources/UniprotTest.java b/test/jalview/ws/dbsources/UniprotTest.java index b70e581..ab79f10 100644 --- a/test/jalview/ws/dbsources/UniprotTest.java +++ b/test/jalview/ws/dbsources/UniprotTest.java @@ -35,6 +35,7 @@ import java.io.Reader; import java.io.StringReader; import java.util.Vector; +import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -67,6 +68,8 @@ public class UniprotTest + "ML" + "M" + "LLMVM" + + "LLLMVML" + + "LLLMVKMLML" + "MHAPL VSKDL" + ""; @@ -102,7 +105,7 @@ public class UniprotTest * Check sequence features */ Vector features = entry.getFeature(); - assertEquals(7, features.size()); + assertEquals(9, features.size()); UniprotFeature sf = features.get(0); assertEquals("signal peptide", sf.getType()); assertNull(sf.getDescription()); @@ -124,33 +127,47 @@ public class UniprotTest sf = features.get(3); assertEquals("sequence variant", sf.getType()); - assertEquals("Variation: 'L' Original: 'M'", sf.getDescription()); + assertNull(sf.getDescription()); assertEquals(41, sf.getPosition()); assertEquals(41, sf.getBegin()); assertEquals(41, sf.getEnd()); sf = features.get(4); assertEquals("sequence variant", sf.getType()); - assertEquals("Pathogenic Variation: 'L' Original: 'M'", - sf.getDescription()); + assertEquals("Pathogenic", sf.getDescription()); assertEquals(41, sf.getPosition()); assertEquals(41, sf.getBegin()); assertEquals(41, sf.getEnd()); sf = features.get(5); assertEquals("sequence variant", sf.getType()); - assertEquals("Pathogenic Original: 'M'", sf.getDescription()); + assertEquals("Pathogenic", sf.getDescription()); assertEquals(41, sf.getPosition()); assertEquals(41, sf.getBegin()); assertEquals(41, sf.getEnd()); sf = features.get(6); assertEquals("sequence variant", sf.getType()); - assertEquals("Foo Variation: 'L', Variation: 'LMV' Original: 'M'", + assertEquals("Foo", sf.getDescription()); assertEquals(42, sf.getPosition()); assertEquals(42, sf.getBegin()); assertEquals(42, sf.getEnd()); + Assert.assertEquals(Uniprot.getDescription(sf), + "p.Met42Leu" + "\n" + "p.Met42LeuMetVal Foo"); + + sf = features.get(7); + assertEquals(42, sf.getBegin()); + assertEquals(43, sf.getEnd()); + Assert.assertEquals(Uniprot.getDescription(sf), + "p.MetLeu42LeuLeu" + "\n" + "p.MetLeu42LeuMetVal Foo"); + + sf = features.get(8); + assertEquals(42, sf.getBegin()); + assertEquals(45, sf.getEnd()); + Assert.assertEquals(Uniprot.getDescription(sf), + "p.MLML42LeuLeu" + "\n" + "p.MLML42LMVK Foo Too"); + /* * Check cross-references */ @@ -219,4 +236,29 @@ public class UniprotTest assertEquals(expectedDescription, Uniprot.getUniprotEntryDescription(entry)); } + + @Test(groups = { "Functional" }) + public void testGetDescription() + { + UniprotFeature uf = new UniprotFeature(); + assertEquals("", Uniprot.getDescription(uf)); + + uf.setDescription("Hello"); + assertEquals("Hello", Uniprot.getDescription(uf)); + + uf.setPosition(23); + uf.setOriginal("K"); + Vector vars = new Vector(); + vars.add("y"); + uf.setVariation(vars); + assertEquals("p.Lys23Tyr Hello", Uniprot.getDescription(uf)); + + vars.clear(); + vars.add("z"); // unknown variant - fails gracefully + uf.setVariation(vars); + assertEquals("p.Lys23z Hello", Uniprot.getDescription(uf)); + + uf.setVariation(null); // variant missing - is ignored + assertEquals("Hello", Uniprot.getDescription(uf)); + } }