From 57af23f63ef752b8530bd7eae9722d04fd537368 Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Tue, 17 Mar 2015 15:59:19 +0000 Subject: [PATCH] JAL-1617 hack to add a fake xref if the uniprot xref isn't present on the embl entry --- src/jalview/datamodel/xdb/embl/EmblEntry.java | 40 ++++++++++++++++++++-- test/jalview/ws/seqfetcher/DbRefFetcherTest.java | 23 ++++++++++++- 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index d501ef1..fc57b27 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -590,8 +590,10 @@ public class EmblEntry } } Sequence product = null; + DBRefEntry protEMBLCDS = null; exon = adjustForPrStart(prstart, exon); - + boolean noProteinDbref=true; + if (prseq != null && prname != null && prid != null) { // extract proteins. @@ -669,8 +671,12 @@ public class EmblEntry if (product != null) { product.addDBRef(pcdnaref); - } - + protEMBLCDS = new DBRefEntry(pcdnaref); + protEMBLCDS.setSource(DBRefSource.EMBLCDSProduct); + product.addDBRef(protEMBLCDS); + + } + } } // add cds feature to dna seq - this may include the stop codon @@ -724,6 +730,7 @@ public class EmblEntry + ref.getAccessionId()); } } + noProteinDbref = false; } if (product != null) { @@ -746,6 +753,33 @@ public class EmblEntry } dna.addDBRef(ref); } + if (noProteinDbref && product != null) + { + // add protein coding reference to dna sequence so xref matches + if (protEMBLCDS == null) + { + protEMBLCDS = new DBRefEntry(); + protEMBLCDS.setAccessionId(prid); + protEMBLCDS.setSource(DBRefSource.EMBLCDSProduct); + protEMBLCDS.setVersion(getVersion()); + protEMBLCDS + .setMap(new Mapping(product, map.getMap().getInverse())); + } + product.addDBRef(protEMBLCDS); + + // Add converse mapping reference + if (map != null) + { + Mapping pmap = new Mapping(product, protEMBLCDS.getMap().getMap() + .getInverse()); + DBRefEntry ncMap = new DBRefEntry(protEMBLCDS); + ncMap.setMap(pmap); + if (map.getTo() != null) + { + dna.addDBRef(ncMap); + } + } + } } } diff --git a/test/jalview/ws/seqfetcher/DbRefFetcherTest.java b/test/jalview/ws/seqfetcher/DbRefFetcherTest.java index afaadbb..f58ead5 100644 --- a/test/jalview/ws/seqfetcher/DbRefFetcherTest.java +++ b/test/jalview/ws/seqfetcher/DbRefFetcherTest.java @@ -50,6 +50,7 @@ public class DbRefFetcherTest @BeforeClass public static void setUpBeforeClass() throws Exception { + jalview.bin.Cache.initLogger(); } /** @@ -102,13 +103,33 @@ public class DbRefFetcherTest { String retrievalId = "CAA23748"; // "V00488"; DbSourceProxy embl = new SequenceFetcher().getSourceProxy(DBRefSource.EMBL).get(0); - assertNotNull("Couldn't find the EMBL retrieval client",embl); + assertNotNull("Couldn't find the EMBL retrieval client", embl); + verifyProteinNucleotideXref(retrievalId, embl); + } + + @Test + public void testEmblCDSUniprotProductRecovery() throws Exception + { + String retrievalId = "AAH29712"; + DbSourceProxy embl = new SequenceFetcher().getSourceProxy( + DBRefSource.EMBLCDS).get(0); + assertNotNull("Couldn't find the EMBL retrieval client", embl); + verifyProteinNucleotideXref(retrievalId, embl); + } + + private void verifyProteinNucleotideXref(String retrievalId, + DbSourceProxy embl) throws Exception + { AlignmentI alsq = embl.getSequenceRecords(retrievalId); assertNotNull("Couldn't find the EMBL record " + retrievalId, alsq); assertEquals("Didn't retrieve right number of records", 1, alsq.getHeight()); DBRefEntry[] dr = DBRefUtils.selectRefs(alsq.getSequenceAt(0).getDBRef(), DBRefSource.PROTEINSEQ); assertNotNull(dr); assertEquals("Expected a single Uniprot cross reference", 1, dr.length); + assertEquals("Expected cross refernce map to be one amino acid", dr[0] + .getMap().getMappedWidth(), 1); + assertEquals("Expected local refernce map to be 3 nucleotides", dr[0] + .getMap().getWidth(), 3); AlignmentI sprods = CrossRef.findXrefSequences(alsq.getSequencesArray(), true, dr[0].getSource(), alsq.getDataset()); assertNotNull( "Couldn't recover cross reference sequence from dataset. Was it ever added ?", -- 1.7.10.2