From ceaa7e68d47ff6a236c7445bd99c9e8d8bd7558e Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 21 May 2018 12:17:01 +0100 Subject: [PATCH 1/1] JAL-2920 use HGVS notation for protein variant feature --- .../datamodel/xdb/uniprot/UniprotFeature.java | 14 +------ src/jalview/ws/dbsources/Uniprot.java | 41 +++++++++++++++++++- test/jalview/ws/dbsources/UniprotTest.java | 28 +++++++++++-- 3 files changed, 65 insertions(+), 18 deletions(-) diff --git a/src/jalview/datamodel/xdb/uniprot/UniprotFeature.java b/src/jalview/datamodel/xdb/uniprot/UniprotFeature.java index 4c2ae24..91d2e19 100644 --- a/src/jalview/datamodel/xdb/uniprot/UniprotFeature.java +++ b/src/jalview/datamodel/xdb/uniprot/UniprotFeature.java @@ -51,19 +51,7 @@ public class UniprotFeature public String getDescription() { - if (description == null && variation == null && original == null) - { - return null; - } - return (description == null ? "" : description) - + (variation != null - ? (description != null ? " " : "") + "Variation: '" - + variation + "'" - : "") - + (original != null - ? ((description != null || variation != null) ? " " - : "") + "Original: '" + original + "'" - : ""); + return description; } public void setDescription(String d) diff --git a/src/jalview/ws/dbsources/Uniprot.java b/src/jalview/ws/dbsources/Uniprot.java index 6b09eb6..11fff9d 100644 --- a/src/jalview/ws/dbsources/Uniprot.java +++ b/src/jalview/ws/dbsources/Uniprot.java @@ -32,6 +32,8 @@ import jalview.datamodel.SequenceI; import jalview.datamodel.xdb.uniprot.UniprotEntry; import jalview.datamodel.xdb.uniprot.UniprotFeature; import jalview.datamodel.xdb.uniprot.UniprotFile; +import jalview.schemes.ResidueProperties; +import jalview.util.StringUtils; import jalview.ws.seqfetcher.DbSourceProxyImpl; import java.io.InputStream; @@ -278,7 +280,7 @@ public class Uniprot extends DbSourceProxyImpl for (UniprotFeature uf : entry.getFeature()) { SequenceFeature copy = new SequenceFeature(uf.getType(), - uf.getDescription(), uf.getBegin(), uf.getEnd(), "Uniprot"); + getDescription(uf), uf.getBegin(), uf.getEnd(), "Uniprot"); copy.setStatus(uf.getStatus()); sequence.addSequenceFeature(copy); } @@ -291,6 +293,43 @@ public class Uniprot extends DbSourceProxyImpl } /** + * Constructs a feature description from the description and (optionally) + * original and variant fields of the Uniprot XML feature + * + * @param uf + * @return + */ + protected static String getDescription(UniprotFeature uf) + { + String orig = uf.getOriginal(); + String var = uf.getVariation(); + StringBuilder sb = new StringBuilder(); + + /* + * append variant in standard format if present + * e.g. p.Arg59Lys + */ + if (orig != null && !orig.isEmpty() && var != null && !var.isEmpty()) + { + sb.append("p."); + String orig3 = ResidueProperties.aa2Triplet.get(orig); + sb.append(orig3 == null ? orig : StringUtils.toSentenceCase(orig3)); + sb.append(Integer.toString(uf.getPosition())); + String var3 = ResidueProperties.aa2Triplet.get(var); + sb.append(var3 == null ? var : StringUtils.toSentenceCase(var3)); + sb.append(" "); + } + + String description = uf.getDescription(); + if (description != null) + { + sb.append(description); + } + + return sb.toString(); + } + + /** * * @param entry * UniportEntry diff --git a/test/jalview/ws/dbsources/UniprotTest.java b/test/jalview/ws/dbsources/UniprotTest.java index c603a11..0ee4388 100644 --- a/test/jalview/ws/dbsources/UniprotTest.java +++ b/test/jalview/ws/dbsources/UniprotTest.java @@ -123,22 +123,21 @@ public class UniprotTest sf = features.get(3); assertEquals("sequence variant", sf.getType()); - assertEquals("Variation: 'L' Original: 'M'", sf.getDescription()); + assertNull(sf.getDescription()); assertEquals(41, sf.getPosition()); assertEquals(41, sf.getBegin()); assertEquals(41, sf.getEnd()); sf = features.get(4); assertEquals("sequence variant", sf.getType()); - assertEquals("Pathogenic Variation: 'L' Original: 'M'", - sf.getDescription()); + assertEquals("Pathogenic", sf.getDescription()); assertEquals(41, sf.getPosition()); assertEquals(41, sf.getBegin()); assertEquals(41, sf.getEnd()); sf = features.get(5); assertEquals("sequence variant", sf.getType()); - assertEquals("Pathogenic Original: 'M'", sf.getDescription()); + assertEquals("Pathogenic", sf.getDescription()); assertEquals(41, sf.getPosition()); assertEquals(41, sf.getBegin()); assertEquals(41, sf.getEnd()); @@ -210,4 +209,25 @@ public class UniprotTest assertEquals(expectedDescription, Uniprot.getUniprotEntryDescription(entry)); } + + @Test(groups = { "Functional" }) + public void testGetDescription() + { + UniprotFeature uf = new UniprotFeature(); + assertEquals("", Uniprot.getDescription(uf)); + + uf.setDescription("Hello"); + assertEquals("Hello", Uniprot.getDescription(uf)); + + uf.setPosition(23); + uf.setOriginal("K"); + uf.setVariation("y"); + assertEquals("p.Lys23Tyr Hello", Uniprot.getDescription(uf)); + + uf.setVariation("z"); // unknown variant - fails gracefully + assertEquals("p.Lys23z Hello", Uniprot.getDescription(uf)); + + uf.setVariation(null); // variant missing - is ignored + assertEquals("Hello", Uniprot.getDescription(uf)); + } } -- 1.7.10.2