From be77f1e015d9b97391e23541850f37ea03b41b9b Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Sat, 2 Jun 2018 14:17:31 +0100 Subject: [PATCH] JAL-3017 parse multiple variant elements for uniprot feature Conflicts: src/jalview/datamodel/xdb/uniprot/UniprotFeature.java Merged JAL-3017 into upstream JAL-2920 patches for HGVC nomenclature note - should properly deal with delins variants http://varnomen.hgvs.org/recommendations/protein/variant/delins/ --- resources/uniprot_mapping.xml | 2 +- .../datamodel/xdb/uniprot/UniprotFeature.java | 8 +++-- src/jalview/ws/dbsources/Uniprot.java | 38 ++++++++++++++------ test/jalview/ws/dbsources/UniprotTest.java | 22 ++++++++++-- 4 files changed, 53 insertions(+), 17 deletions(-) diff --git a/resources/uniprot_mapping.xml b/resources/uniprot_mapping.xml index 832d3e5..4c9ad5f 100755 --- a/resources/uniprot_mapping.xml +++ b/resources/uniprot_mapping.xml @@ -69,7 +69,7 @@ - + diff --git a/src/jalview/datamodel/xdb/uniprot/UniprotFeature.java b/src/jalview/datamodel/xdb/uniprot/UniprotFeature.java index 91d2e19..8bd5652 100644 --- a/src/jalview/datamodel/xdb/uniprot/UniprotFeature.java +++ b/src/jalview/datamodel/xdb/uniprot/UniprotFeature.java @@ -20,6 +20,8 @@ */ package jalview.datamodel.xdb.uniprot; +import java.util.Vector; + /** * A data model class for binding from Uniprot XML via uniprot_mapping.xml */ @@ -31,7 +33,7 @@ public class UniprotFeature private String original = null; - private String variation = null; + private Vector variation = null; private String status; @@ -110,12 +112,12 @@ public class UniprotFeature this.original = original; } - public String getVariation() + public Vector getVariation() { return variation; } - public void setVariation(String variant) + public void setVariation(Vector variant) { this.variation = variant; } diff --git a/src/jalview/ws/dbsources/Uniprot.java b/src/jalview/ws/dbsources/Uniprot.java index 11fff9d..fc4f318 100644 --- a/src/jalview/ws/dbsources/Uniprot.java +++ b/src/jalview/ws/dbsources/Uniprot.java @@ -42,6 +42,7 @@ import java.io.Reader; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; +import java.util.List; import java.util.Vector; import org.exolab.castor.mapping.Mapping; @@ -302,24 +303,41 @@ public class Uniprot extends DbSourceProxyImpl protected static String getDescription(UniprotFeature uf) { String orig = uf.getOriginal(); - String var = uf.getVariation(); + List variants = uf.getVariation(); StringBuilder sb = new StringBuilder(); /* * append variant in standard format if present * e.g. p.Arg59Lys */ - if (orig != null && !orig.isEmpty() && var != null && !var.isEmpty()) + if (orig != null && !orig.isEmpty() && variants != null + && !variants.isEmpty()) { - sb.append("p."); - String orig3 = ResidueProperties.aa2Triplet.get(orig); - sb.append(orig3 == null ? orig : StringUtils.toSentenceCase(orig3)); - sb.append(Integer.toString(uf.getPosition())); - String var3 = ResidueProperties.aa2Triplet.get(var); - sb.append(var3 == null ? var : StringUtils.toSentenceCase(var3)); - sb.append(" "); - } + int p = 0; + for (String var : variants) + { + sb.append("p."); + String orig3 = ResidueProperties.aa2Triplet.get(orig); + sb.append(orig3 == null ? orig : StringUtils.toSentenceCase(orig3)); + sb.append(Integer.toString(uf.getPosition())); + for (int c = 0, clen = var.length(); c < clen; c++) + { + char varchar = var.charAt(c); + String var3 = ResidueProperties.aa2Triplet.get("" + varchar); + sb.append(var3 != null ? StringUtils.toSentenceCase(var3) + : "" + varchar); + } + if (++p != variants.size()) + { + sb.append("
"); + } + else + { + sb.append(" "); + } + } + } String description = uf.getDescription(); if (description != null) { diff --git a/test/jalview/ws/dbsources/UniprotTest.java b/test/jalview/ws/dbsources/UniprotTest.java index 0ee4388..b187b67 100644 --- a/test/jalview/ws/dbsources/UniprotTest.java +++ b/test/jalview/ws/dbsources/UniprotTest.java @@ -35,6 +35,7 @@ import java.io.Reader; import java.io.StringReader; import java.util.Vector; +import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -66,6 +67,7 @@ public class UniprotTest + "ML" + "ML" + "M" + + "LLMVM" + "MHAPL VSKDL" + ""; @@ -101,7 +103,7 @@ public class UniprotTest * Check sequence features */ Vector features = entry.getFeature(); - assertEquals(6, features.size()); + assertEquals(7, features.size()); UniprotFeature sf = features.get(0); assertEquals("signal peptide", sf.getType()); assertNull(sf.getDescription()); @@ -141,6 +143,16 @@ public class UniprotTest assertEquals(41, sf.getPosition()); assertEquals(41, sf.getBegin()); assertEquals(41, sf.getEnd()); + + sf = features.get(6); + assertEquals("sequence variant", sf.getType()); + assertEquals("Foo", + sf.getDescription()); + assertEquals(42, sf.getPosition()); + assertEquals(42, sf.getBegin()); + assertEquals(42, sf.getEnd()); + Assert.assertEquals(Uniprot.getDescription(sf), + "p.Met42Leu
p.Met42LeuMetVal Foo"); /* * Check cross-references */ @@ -221,10 +233,14 @@ public class UniprotTest uf.setPosition(23); uf.setOriginal("K"); - uf.setVariation("y"); + Vector vars = new Vector(); + vars.add("y"); + uf.setVariation(vars); assertEquals("p.Lys23Tyr Hello", Uniprot.getDescription(uf)); - uf.setVariation("z"); // unknown variant - fails gracefully + vars.clear(); + vars.add("z"); // unknown variant - fails gracefully + uf.setVariation(vars); assertEquals("p.Lys23z Hello", Uniprot.getDescription(uf)); uf.setVariation(null); // variant missing - is ignored -- 1.7.10.2