From a61896507a80dd6dcafaee02a872678022afe943 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 1 Apr 2016 11:26:16 +0100 Subject: [PATCH] JAL-1705 fetch all Ensembl xrefs (except GO terms) --- src/jalview/ext/ensembl/EnsemblCdna.java | 14 ------------ src/jalview/ext/ensembl/EnsemblGene.java | 12 ---------- src/jalview/ext/ensembl/EnsemblProtein.java | 10 --------- src/jalview/ext/ensembl/EnsemblSeqProxy.java | 19 +--------------- src/jalview/ext/ensembl/EnsemblSymbol.java | 4 ++-- src/jalview/ext/ensembl/EnsemblXref.java | 26 ++++++++++------------ test/jalview/ext/ensembl/EnsemblXrefTest.java | 29 ++++--------------------- 7 files changed, 18 insertions(+), 96 deletions(-) diff --git a/src/jalview/ext/ensembl/EnsemblCdna.java b/src/jalview/ext/ensembl/EnsemblCdna.java index 856be74..e141db4 100644 --- a/src/jalview/ext/ensembl/EnsemblCdna.java +++ b/src/jalview/ext/ensembl/EnsemblCdna.java @@ -4,9 +4,6 @@ import jalview.datamodel.SequenceFeature; import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyI; -import java.util.Arrays; -import java.util.List; - import com.stevesoft.pat.Regex; /** @@ -19,9 +16,6 @@ import com.stevesoft.pat.Regex; */ public class EnsemblCdna extends EnsemblSeqProxy { - private static final List CROSS_REFERENCES = Arrays - .asList(new String[] { "Uniprot/SWISSPROT", "Uniprot/SPTREMBL" }); - /* * accepts ENST or ENSTG with 11 digits * or ENSMUST or similar for other species @@ -114,12 +108,4 @@ public class EnsemblCdna extends EnsemblSeqProxy return false; } - @Override - protected List getCrossReferenceDatabases() - { - return CROSS_REFERENCES; - // 30/01/16 also found Vega_transcript, OTTT, ENS_LRG_transcript, UCSC, - // HGNC_trans_name, RefSeq_mRNA, RefSeq_mRNA_predicted - } - } diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index fa1e474..0bc6a74 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -29,9 +29,6 @@ import com.stevesoft.pat.Regex; */ public class EnsemblGene extends EnsemblSeqProxy { - private static final List CROSS_REFERENCES = Arrays - .asList(new String[] { "CCDS" }); - private static final String GENE_PREFIX = "gene:"; /* @@ -465,15 +462,6 @@ public class EnsemblGene extends EnsemblSeqProxy return false; } - @Override - protected List getCrossReferenceDatabases() - { - // found these for ENSG00000157764 on 30/01/2016: - // return new String[] {"Vega_gene", "OTTG", "ENS_LRG_gene", "ArrayExpress", - // "EntrezGene", "HGNC", "MIM_GENE", "MIM_MORBID", "WikiGene"}; - return CROSS_REFERENCES; - } - /** * Override to do nothing as Ensembl doesn't return a protein sequence for a * gene identifier diff --git a/src/jalview/ext/ensembl/EnsemblProtein.java b/src/jalview/ext/ensembl/EnsemblProtein.java index 97796a5..0facbb5 100644 --- a/src/jalview/ext/ensembl/EnsemblProtein.java +++ b/src/jalview/ext/ensembl/EnsemblProtein.java @@ -3,7 +3,6 @@ package jalview.ext.ensembl; import jalview.datamodel.AlignmentI; import jalview.datamodel.SequenceFeature; -import java.util.Arrays; import java.util.List; import com.stevesoft.pat.Regex; @@ -24,9 +23,6 @@ public class EnsemblProtein extends EnsemblSeqProxy private static final Regex ACCESSION_REGEX = new Regex( "(ENS([A-Z]{3}|)P[0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)"); - private static final List CROSSREFS = Arrays.asList(new String[] { - "PDB", "Uniprot/SPTREMBL", "Uniprot/SWISSPROT" }); - /** * Default constructor (to use rest.ensembl.org) */ @@ -99,12 +95,6 @@ public class EnsemblProtein extends EnsemblSeqProxy } @Override - protected List getCrossReferenceDatabases() - { - return CROSSREFS; - } - - @Override public Regex getAccessionValidator() { return ACCESSION_REGEX; diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index 5f3f1c8..0ceb29c 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -36,10 +36,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient { private static final String ALLELES = "alleles"; - private static final List CROSS_REFERENCES = Arrays - .asList(new String[] { "CCDS", "Uniprot/SWISSPROT", - "Uniprot/SPTREMBL" }); - protected static final String CONSEQUENCE_TYPE = "consequence_type"; protected static final String PARENT = "Parent"; @@ -316,8 +312,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient } EnsemblXref xrefFetcher = new EnsemblXref(getDomain()); - List xrefs = xrefFetcher.getCrossReferences(seq.getName(), - getCrossReferenceDatabases()); + List xrefs = xrefFetcher.getCrossReferences(seq.getName()); for (DBRefEntry xref : xrefs) { seq.addDBRef(xref); @@ -338,18 +333,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient } /** - * Returns a list of database names to be used when fetching cross-references. - * Specifically, the names are used to filter data returned by the Ensembl - * xrefs REST service on the value in field 'dbname'. - * - * @return - */ - protected List getCrossReferenceDatabases() - { - return CROSS_REFERENCES; - } - - /** * Fetches sequences for the list of accession ids and adds them to the * alignment. Returns the extended (or created) alignment. * diff --git a/src/jalview/ext/ensembl/EnsemblSymbol.java b/src/jalview/ext/ensembl/EnsemblSymbol.java index 08f26c7..1c47f11 100644 --- a/src/jalview/ext/ensembl/EnsemblSymbol.java +++ b/src/jalview/ext/ensembl/EnsemblSymbol.java @@ -40,7 +40,7 @@ public class EnsemblSymbol extends EnsemblXref * @return * @throws IOException */ - protected String parseResponse(BufferedReader br) + protected String parseSymbolResponse(BufferedReader br) throws IOException { JSONParser jp = new JSONParser(); @@ -108,7 +108,7 @@ public class EnsemblSymbol extends EnsemblXref { br = getHttpResponse(url, ids); } - String geneId = parseResponse(br); + String geneId = parseSymbolResponse(br); if (geneId != null) { result.add(geneId); diff --git a/src/jalview/ext/ensembl/EnsemblXref.java b/src/jalview/ext/ensembl/EnsemblXref.java index 7b5f9da..fa86865 100644 --- a/src/jalview/ext/ensembl/EnsemblXref.java +++ b/src/jalview/ext/ensembl/EnsemblXref.java @@ -27,6 +27,8 @@ import org.json.simple.parser.ParseException; class EnsemblXref extends EnsemblRestClient { + private static final String GO_GENE_ONTOLOGY = "GO"; + /** * Constructor given the target domain to fetch data from * @@ -77,17 +79,14 @@ class EnsemblXref extends EnsemblRestClient * Calls the Ensembl xrefs REST endpoint and retrieves any cross-references * ("primary_id") for the given identifier (Ensembl accession id) and database * names. The "dbname" returned by Ensembl is canonicalised to Jalview's - * standard version, and a DBRefEntry constructed. If no databases are - * specified, all available cross-references are retrieved. + * standard version, and a DBRefEntry constructed. Currently takes all + * identifiers apart from GO terms and synonyms. * * @param identifier * an Ensembl stable identifier - * @param databases - * optional list of Ensembl cross-referenced databases of interest * @return */ - public List getCrossReferences(String identifier, - List databases) + public List getCrossReferences(String identifier) { List result = new ArrayList(); List ids = new ArrayList(); @@ -101,7 +100,7 @@ class EnsemblXref extends EnsemblRestClient { br = getHttpResponse(url, ids); } - return (parseResponse(br, databases)); + return (parseResponse(br)); } catch (IOException e) { // ignore @@ -124,17 +123,15 @@ class EnsemblXref extends EnsemblRestClient /** * Parses "primary_id" and "dbname" values from the JSON response and - * constructs a DBRefEntry if the dbname is in the list supplied. Returns a - * list of the DBRefEntry created. Note we don't parse "synonyms" as they - * appear to be either redirected or obsolete in Uniprot. + * constructs a DBRefEntry. Returns a list of the DBRefEntry created. Note we + * don't parse "synonyms" as they appear to be either redirected or obsolete + * in Uniprot. * * @param br - * @param databases * @return * @throws IOException */ - protected List parseResponse(BufferedReader br, - List databases) + protected List parseResponse(BufferedReader br) throws IOException { JSONParser jp = new JSONParser(); @@ -147,8 +144,7 @@ class EnsemblXref extends EnsemblRestClient { JSONObject val = (JSONObject) rvals.next(); String dbName = val.get("dbname").toString(); - if (databases != null && !databases.isEmpty() - && !databases.contains(dbName)) + if (dbName.equals(GO_GENE_ONTOLOGY)) { continue; } diff --git a/test/jalview/ext/ensembl/EnsemblXrefTest.java b/test/jalview/ext/ensembl/EnsemblXrefTest.java index 5073423..cde4afe 100644 --- a/test/jalview/ext/ensembl/EnsemblXrefTest.java +++ b/test/jalview/ext/ensembl/EnsemblXrefTest.java @@ -1,7 +1,6 @@ package jalview.ext.ensembl; import static org.testng.AssertJUnit.assertEquals; -import static org.testng.AssertJUnit.assertTrue; import jalview.datamodel.DBRefEntry; @@ -9,7 +8,6 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.StringReader; import java.net.URL; -import java.util.Arrays; import java.util.List; import org.testng.annotations.Test; @@ -19,7 +17,8 @@ public class EnsemblXrefTest //@formatter:off private static final String JSON = "[{\"primary_id\":\"CCDS5863\",\"dbname\":\"CCDS\"}," + - "{\"primary_id\":\"P15056\",\"dbname\":\"Uniprot/SWISSPROT\"}]"; + "{\"primary_id\":\"P15056\",\"dbname\":\"Uniprot/SWISSPROT\",\"synonyms\":[\"C21\"]}," + + "{\"primary_id\":\"GO:0000165\",\"dbname\":\"GO\"}]"; //@formatter:on @Test(groups = "functional") @@ -36,33 +35,13 @@ public class EnsemblXrefTest } }; - /* - * with no filter - */ - List dbrefs = testee.getCrossReferences("ABCDE", null); + // synonyms and GO terms are not returned + List dbrefs = testee.getCrossReferences("ABCDE"); assertEquals(2, dbrefs.size()); assertEquals("CCDS", dbrefs.get(0).getSource()); assertEquals("CCDS5863", dbrefs.get(0).getAccessionId()); // Uniprot name should get converted to Jalview canonical form assertEquals("UNIPROT", dbrefs.get(1).getSource()); assertEquals("P15056", dbrefs.get(1).getAccessionId()); - - /* - * filter for Uniprot only - */ - dbrefs = testee.getCrossReferences( - "ABCDE", - Arrays.asList(new String[] { "Uniprot/SWISSPROT", - "Uniprot/SPTREMBL" })); - assertEquals(1, dbrefs.size()); - assertEquals("UNIPROT", dbrefs.get(0).getSource()); - assertEquals("P15056", dbrefs.get(0).getAccessionId()); - - /* - * filter for PDB only - */ - dbrefs = testee.getCrossReferences("ABCDE", - Arrays.asList(new String[] { "PDB" })); - assertTrue(dbrefs.isEmpty()); } } -- 1.7.10.2