From 9ad1e437d5d6366f0b06fbfbdb446a720ca57104 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Wed, 18 Mar 2015 20:27:08 +0000 Subject: [PATCH] Merge hack/JAL-1617 fake DBRef to develop --- src/jalview/datamodel/DBRefEntry.java | 5 +++ src/jalview/datamodel/DBRefSource.java | 13 ++++--- src/jalview/datamodel/xdb/embl/EmblEntry.java | 40 ++++++++++++++++++++-- src/jalview/util/DBRefUtils.java | 26 ++++++++------ test/jalview/ws/seqfetcher/DbRefFetcherTest.java | 23 ++++++++++++- 5 files changed, 88 insertions(+), 19 deletions(-) diff --git a/src/jalview/datamodel/DBRefEntry.java b/src/jalview/datamodel/DBRefEntry.java index 54bdf20..0581845 100755 --- a/src/jalview/datamodel/DBRefEntry.java +++ b/src/jalview/datamodel/DBRefEntry.java @@ -193,4 +193,9 @@ public class DBRefEntry return ((source != null) ? source : "") + ":" + ((accessionId != null) ? accessionId : ""); } + + public String toString() + { + return getSrcAccString(); + } } diff --git a/src/jalview/datamodel/DBRefSource.java b/src/jalview/datamodel/DBRefSource.java index b1824f7..6982594 100755 --- a/src/jalview/datamodel/DBRefSource.java +++ b/src/jalview/datamodel/DBRefSource.java @@ -38,12 +38,15 @@ public class DBRefSource /** * UNIPROT Entry Name */ - public static String UP_NAME = "UNIPROT_NAME"; + public static String UP_NAME = "UNIPROT_NAME".toUpperCase(); /** * Uniprot Knowledgebase/TrEMBL as served from EMBL protein products. */ - public static final String UNIPROTKB = "UniProtKB/TrEMBL"; + public static final String UNIPROTKB = "UniProtKB/TrEMBL".toUpperCase(); + + public static final String EMBLCDSProduct = "EMBLCDSProtein" + .toUpperCase(); /** * PDB Entry Code @@ -73,7 +76,7 @@ public class DBRefSource /** * GeneDB ID */ - public static final String GENEDB = "GeneDB"; + public static final String GENEDB = "GeneDB".toUpperCase(); /** * List of databases whose sequences might have coding regions annotated @@ -85,10 +88,10 @@ public class DBRefSource { EMBLCDS, GENEDB }; public static final String[] PROTEINDBS = - { UNIPROT, PDB, UNIPROTKB }; + { UNIPROT, PDB, UNIPROTKB, EMBLCDSProduct }; public static final String[] PROTEINSEQ = - { UNIPROT, UNIPROTKB }; + { UNIPROT, UNIPROTKB, EMBLCDSProduct }; public static final String[] PROTEINSTR = { PDB }; diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index d501ef1..fc57b27 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -590,8 +590,10 @@ public class EmblEntry } } Sequence product = null; + DBRefEntry protEMBLCDS = null; exon = adjustForPrStart(prstart, exon); - + boolean noProteinDbref=true; + if (prseq != null && prname != null && prid != null) { // extract proteins. @@ -669,8 +671,12 @@ public class EmblEntry if (product != null) { product.addDBRef(pcdnaref); - } - + protEMBLCDS = new DBRefEntry(pcdnaref); + protEMBLCDS.setSource(DBRefSource.EMBLCDSProduct); + product.addDBRef(protEMBLCDS); + + } + } } // add cds feature to dna seq - this may include the stop codon @@ -724,6 +730,7 @@ public class EmblEntry + ref.getAccessionId()); } } + noProteinDbref = false; } if (product != null) { @@ -746,6 +753,33 @@ public class EmblEntry } dna.addDBRef(ref); } + if (noProteinDbref && product != null) + { + // add protein coding reference to dna sequence so xref matches + if (protEMBLCDS == null) + { + protEMBLCDS = new DBRefEntry(); + protEMBLCDS.setAccessionId(prid); + protEMBLCDS.setSource(DBRefSource.EMBLCDSProduct); + protEMBLCDS.setVersion(getVersion()); + protEMBLCDS + .setMap(new Mapping(product, map.getMap().getInverse())); + } + product.addDBRef(protEMBLCDS); + + // Add converse mapping reference + if (map != null) + { + Mapping pmap = new Mapping(product, protEMBLCDS.getMap().getMap() + .getInverse()); + DBRefEntry ncMap = new DBRefEntry(protEMBLCDS); + ncMap.setMap(pmap); + if (map.getTo() != null) + { + dna.addDBRef(ncMap); + } + } + } } } diff --git a/src/jalview/util/DBRefUtils.java b/src/jalview/util/DBRefUtils.java index 9a4ffc8..8163f05 100755 --- a/src/jalview/util/DBRefUtils.java +++ b/src/jalview/util/DBRefUtils.java @@ -20,9 +20,15 @@ */ package jalview.util; -import java.util.*; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.PDBEntry; +import jalview.datamodel.SequenceI; -import jalview.datamodel.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Hashtable; +import java.util.Map; +import java.util.Vector; public class DBRefUtils { @@ -48,8 +54,8 @@ public class DBRefUtils { return dbrefs; } - Hashtable srcs = new Hashtable(); - Vector res = new Vector(); + Map srcs = new HashMap(); + ArrayList res = new ArrayList(); for (int i = 0; i < sources.length; i++) { @@ -59,18 +65,14 @@ public class DBRefUtils { if (srcs.containsKey(dbrefs[i].getSource())) { - res.addElement(dbrefs[i]); + res.add(dbrefs[i]); } } if (res.size() > 0) { DBRefEntry[] reply = new DBRefEntry[res.size()]; - for (int i = 0; i < res.size(); i++) - { - reply[i] = (DBRefEntry) res.elementAt(i); - } - return reply; + return res.toArray(reply); } res = null; // there are probable memory leaks in the hashtable! @@ -169,7 +171,9 @@ public class DBRefUtils DbRefComp comparator) { if (ref == null || entry == null) + { return null; + } Vector rfs = new Vector(); for (int i = 0; i < ref.length; i++) { @@ -303,6 +307,7 @@ public class DBRefUtils { if ((refa.getMap() == null && refb.getMap() == null) || (refa.getMap() != null && refb.getMap() != null)) + { if ((refb.getMap().getMap() == null && refa.getMap().getMap() == null) || (refb.getMap().getMap() != null && refa.getMap().getMap() != null && refb @@ -311,6 +316,7 @@ public class DBRefUtils { return true; } + } } } return false; diff --git a/test/jalview/ws/seqfetcher/DbRefFetcherTest.java b/test/jalview/ws/seqfetcher/DbRefFetcherTest.java index afaadbb..f58ead5 100644 --- a/test/jalview/ws/seqfetcher/DbRefFetcherTest.java +++ b/test/jalview/ws/seqfetcher/DbRefFetcherTest.java @@ -50,6 +50,7 @@ public class DbRefFetcherTest @BeforeClass public static void setUpBeforeClass() throws Exception { + jalview.bin.Cache.initLogger(); } /** @@ -102,13 +103,33 @@ public class DbRefFetcherTest { String retrievalId = "CAA23748"; // "V00488"; DbSourceProxy embl = new SequenceFetcher().getSourceProxy(DBRefSource.EMBL).get(0); - assertNotNull("Couldn't find the EMBL retrieval client",embl); + assertNotNull("Couldn't find the EMBL retrieval client", embl); + verifyProteinNucleotideXref(retrievalId, embl); + } + + @Test + public void testEmblCDSUniprotProductRecovery() throws Exception + { + String retrievalId = "AAH29712"; + DbSourceProxy embl = new SequenceFetcher().getSourceProxy( + DBRefSource.EMBLCDS).get(0); + assertNotNull("Couldn't find the EMBL retrieval client", embl); + verifyProteinNucleotideXref(retrievalId, embl); + } + + private void verifyProteinNucleotideXref(String retrievalId, + DbSourceProxy embl) throws Exception + { AlignmentI alsq = embl.getSequenceRecords(retrievalId); assertNotNull("Couldn't find the EMBL record " + retrievalId, alsq); assertEquals("Didn't retrieve right number of records", 1, alsq.getHeight()); DBRefEntry[] dr = DBRefUtils.selectRefs(alsq.getSequenceAt(0).getDBRef(), DBRefSource.PROTEINSEQ); assertNotNull(dr); assertEquals("Expected a single Uniprot cross reference", 1, dr.length); + assertEquals("Expected cross refernce map to be one amino acid", dr[0] + .getMap().getMappedWidth(), 1); + assertEquals("Expected local refernce map to be 3 nucleotides", dr[0] + .getMap().getWidth(), 3); AlignmentI sprods = CrossRef.findXrefSequences(alsq.getSequencesArray(), true, dr[0].getSource(), alsq.getDataset()); assertNotNull( "Couldn't recover cross reference sequence from dataset. Was it ever added ?", -- 1.7.10.2