git://source.jalview.org
/
jalview.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
a9f0472
)
JAL-1705 refactored cross-reference fetching (CCDS, Uniprot, PDB)
author
gmungoc
<g.m.carstairs@dundee.ac.uk>
Sat, 30 Jan 2016 06:33:01 +0000
(06:33 +0000)
committer
gmungoc
<g.m.carstairs@dundee.ac.uk>
Sat, 30 Jan 2016 06:33:01 +0000
(06:33 +0000)
src/jalview/ext/ensembl/EnsemblCdna.java
patch
|
blob
|
history
src/jalview/ext/ensembl/EnsemblGene.java
patch
|
blob
|
history
src/jalview/ext/ensembl/EnsemblProtein.java
patch
|
blob
|
history
src/jalview/ext/ensembl/EnsemblSeqProxy.java
patch
|
blob
|
history
src/jalview/ext/ensembl/EnsemblXref.java
patch
|
blob
|
history
diff --git
a/src/jalview/ext/ensembl/EnsemblCdna.java
b/src/jalview/ext/ensembl/EnsemblCdna.java
index
a2ecfcd
..
139e44f
100644
(file)
--- a/
src/jalview/ext/ensembl/EnsemblCdna.java
+++ b/
src/jalview/ext/ensembl/EnsemblCdna.java
@@
-3,6
+3,8
@@
package jalview.ext.ensembl;
import jalview.datamodel.SequenceFeature;
import jalview.io.gff.SequenceOntology;
import jalview.datamodel.SequenceFeature;
import jalview.io.gff.SequenceOntology;
+import java.util.List;
+
import com.stevesoft.pat.Regex;
public class EnsemblCdna extends EnsemblSeqProxy
import com.stevesoft.pat.Regex;
public class EnsemblCdna extends EnsemblSeqProxy
@@
-78,4
+80,12
@@
public class EnsemblCdna extends EnsemblSeqProxy
return false;
}
return false;
}
+ @Override
+ protected List<String> getCrossReferenceDatabases()
+ {
+ return super.getCrossReferenceDatabases();
+ // 30/01/16 also found Vega_transcript, OTTT, ENS_LRG_transcript, UCSC,
+ // HGNC_trans_name, RefSeq_mRNA, RefSeq_mRNA_predicted
+ }
+
}
}
diff --git
a/src/jalview/ext/ensembl/EnsemblGene.java
b/src/jalview/ext/ensembl/EnsemblGene.java
index
b5ea686
..
1325bec
100644
(file)
--- a/
src/jalview/ext/ensembl/EnsemblGene.java
+++ b/
src/jalview/ext/ensembl/EnsemblGene.java
@@
-290,4
+290,13
@@
public class EnsemblGene extends EnsemblSeqProxy
return false;
}
return false;
}
+ @Override
+ protected List<String> getCrossReferenceDatabases()
+ {
+ // found these for ENSG00000157764 on 30/01/2016:
+ // return new String[] {"Vega_gene", "OTTG", "ENS_LRG_gene", "ArrayExpress",
+ // "EntrezGene", "HGNC", "MIM_GENE", "MIM_MORBID", "WikiGene"};
+ return super.getCrossReferenceDatabases();
+ }
+
}
}
diff --git
a/src/jalview/ext/ensembl/EnsemblProtein.java
b/src/jalview/ext/ensembl/EnsemblProtein.java
index
5238f98
..
c40fdd0
100644
(file)
--- a/
src/jalview/ext/ensembl/EnsemblProtein.java
+++ b/
src/jalview/ext/ensembl/EnsemblProtein.java
@@
-3,9
+3,15
@@
package jalview.ext.ensembl;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.SequenceFeature;
+import java.util.Arrays;
+import java.util.List;
+
public class EnsemblProtein extends EnsemblSeqProxy
{
public class EnsemblProtein extends EnsemblSeqProxy
{
+ private static final List<String> CROSSREFS = Arrays.asList(new String[] {
+ "PDB", "Uniprot/SPTREMBL", "Uniprot/SWISSPROT" });
+
public EnsemblProtein()
{
super();
public EnsemblProtein()
{
super();
@@
-64,4
+70,10
@@
public class EnsemblProtein extends EnsemblSeqProxy
return false;
}
return false;
}
+ @Override
+ protected List<String> getCrossReferenceDatabases()
+ {
+ return CROSSREFS;
+ }
+
}
}
diff --git
a/src/jalview/ext/ensembl/EnsemblSeqProxy.java
b/src/jalview/ext/ensembl/EnsemblSeqProxy.java
index
8698b78
..
0bfeda1
100644
(file)
--- a/
src/jalview/ext/ensembl/EnsemblSeqProxy.java
+++ b/
src/jalview/ext/ensembl/EnsemblSeqProxy.java
@@
-36,6
+36,9
@@
import java.util.Map.Entry;
*/
public abstract class EnsemblSeqProxy extends EnsemblRestClient
{
*/
public abstract class EnsemblSeqProxy extends EnsemblRestClient
{
+ private static final List<String> CROSS_REFERENCES = Arrays
+ .asList(new String[] { "CCDS" });
+
protected static final String CONSEQUENCE_TYPE = "consequence_type";
protected static final String PARENT = "Parent";
protected static final String CONSEQUENCE_TYPE = "consequence_type";
protected static final String PARENT = "Parent";
@@
-173,7
+176,11
@@
public abstract class EnsemblSeqProxy extends EnsemblRestClient
addFeaturesAndProduct(accId, alignment);
}
addFeaturesAndProduct(accId, alignment);
}
- inProgress = false;
+ for (SequenceI seq : alignment.getSequences())
+ {
+ getCrossReferences(seq);
+ }
+
System.out.println(getClass().getName() + " took "
+ (System.currentTimeMillis() - now) + "ms to fetch");
return alignment;
System.out.println(getClass().getName() + " took "
+ (System.currentTimeMillis() - now) + "ms to fetch");
return alignment;
@@
-265,8
+272,6
@@
public abstract class EnsemblSeqProxy extends EnsemblRestClient
proteinSeq.createDatasetSequence();
querySeq.createDatasetSequence();
proteinSeq.createDatasetSequence();
querySeq.createDatasetSequence();
- getProteinCrossReferences(proteinSeq);
-
MapList mapList = mapCdsToProtein(querySeq, proteinSeq);
if (mapList != null)
{
MapList mapList = mapCdsToProtein(querySeq, proteinSeq);
if (mapList != null)
{
@@
-293,26
+298,35
@@
public abstract class EnsemblSeqProxy extends EnsemblRestClient
* Get Uniprot and PDB xrefs from Ensembl, and attach them to the protein
* sequence
*
* Get Uniprot and PDB xrefs from Ensembl, and attach them to the protein
* sequence
*
- * @param proteinSeq
+ * @param seq
*/
*/
- protected void getProteinCrossReferences(SequenceI proteinSeq)
+ protected void getCrossReferences(SequenceI seq)
{
{
- while (proteinSeq.getDatasetSequence() != null)
+ while (seq.getDatasetSequence() != null)
{
{
- proteinSeq = proteinSeq.getDatasetSequence();
+ seq = seq.getDatasetSequence();
}
EnsemblXref xrefFetcher = new EnsemblXref();
}
EnsemblXref xrefFetcher = new EnsemblXref();
- List<DBRefEntry> xrefs = xrefFetcher.getCrossReferences(
- proteinSeq.getName(), "PDB", "Uniprot/SPTREMBL",
- "Uniprot/SWISSPROT");
+ List<DBRefEntry> xrefs = xrefFetcher.getCrossReferences(seq.getName(),
+ getCrossReferenceDatabases());
for (DBRefEntry xref : xrefs)
{
for (DBRefEntry xref : xrefs)
{
- proteinSeq.addDBRef(xref);
+ seq.addDBRef(xref);
}
}
/**
}
}
/**
+ * Returns a list of database names to be used when fetching cross-references.
+ *
+ * @return
+ */
+ protected List<String> getCrossReferenceDatabases()
+ {
+ return CROSS_REFERENCES;
+ }
+
+ /**
* Returns a mapping from dna to protein by inspecting sequence features of
* type "CDS" on the dna.
*
* Returns a mapping from dna to protein by inspecting sequence features of
* type "CDS" on the dna.
*
diff --git
a/src/jalview/ext/ensembl/EnsemblXref.java
b/src/jalview/ext/ensembl/EnsemblXref.java
index
6a4f369
..
36bd7c5
100644
(file)
--- a/
src/jalview/ext/ensembl/EnsemblXref.java
+++ b/
src/jalview/ext/ensembl/EnsemblXref.java
@@
-60,15
+60,16
@@
public class EnsemblXref extends EnsemblRestClient
/**
* Calls the Ensembl xrefs REST endpoint and retrieves any cross-references
* ("primary_id") for the given identifier (Ensembl accession id) and database
/**
* Calls the Ensembl xrefs REST endpoint and retrieves any cross-references
* ("primary_id") for the given identifier (Ensembl accession id) and database
- * name. The "dbname" returned by Ensembl is canonicalised to Jalview's
- * standard version, and a DBRefEntry constructed.
+ * names. The "dbname" returned by Ensembl is canonicalised to Jalview's
+ * standard version, and a DBRefEntry constructed. If no databases are
+ * specified, all available cross-references are retrieved.
*
* @param identifier
*
* @param identifier
- * @param database
+ * @param databases
* @return
*/
public List<DBRefEntry> getCrossReferences(String identifier,
* @return
*/
public List<DBRefEntry> getCrossReferences(String identifier,
- String... database)
+ List<String> databases)
{
List<DBRefEntry> result = new ArrayList<DBRefEntry>();
List<String> ids = new ArrayList<String>();
{
List<DBRefEntry> result = new ArrayList<DBRefEntry>();
List<String> ids = new ArrayList<String>();
@@
-77,22
+78,12
@@
public class EnsemblXref extends EnsemblRestClient
BufferedReader br = null;
try
{
BufferedReader br = null;
try
{
- for (String db : database)
- {
- URL url = getUrl(identifier, db);
+ URL url = getUrl(identifier);
if (url != null)
{
br = getHttpResponse(url, ids);
}
if (url != null)
{
br = getHttpResponse(url, ids);
}
- for (DBRefEntry xref : parseResponse(br))
- {
- if (!result.contains(xref))
- {
- result.add(xref);
- }
- }
- br.close();
- }
+ return (parseResponse(br, databases));
} catch (IOException e)
{
// ignore
} catch (IOException e)
{
// ignore
@@
-114,14
+105,17
@@
public class EnsemblXref extends EnsemblRestClient
}
/**
}
/**
- * Parses "primary_id" and "dbname" values from the JSON response and returns
- * a list of DBRefEntry constructed.
+ * Parses "primary_id" and "dbname" values from the JSON response and
+ * constructs a DBRefEntry if the dbname is in the list supplied. Returns a
+ * list of DBRefEntry created.
*
* @param br
*
* @param br
+ * @param databases
* @return
* @throws IOException
*/
* @return
* @throws IOException
*/
- protected List<DBRefEntry> parseResponse(BufferedReader br)
+ protected List<DBRefEntry> parseResponse(BufferedReader br,
+ List<String> databases)
throws IOException
{
JSONParser jp = new JSONParser();
throws IOException
{
JSONParser jp = new JSONParser();
@@
-134,6
+128,11
@@
public class EnsemblXref extends EnsemblRestClient
{
JSONObject val = (JSONObject) rvals.next();
String dbName = val.get("dbname").toString();
{
JSONObject val = (JSONObject) rvals.next();
String dbName = val.get("dbname").toString();
+ if (databases != null && !databases.isEmpty()
+ && !databases.contains(dbName))
+ {
+ continue;
+ }
String id = val.get("primary_id").toString();
if (dbName != null && id != null)
{
String id = val.get("primary_id").toString();
if (dbName != null && id != null)
{
@@
-149,10
+148,18
@@
public class EnsemblXref extends EnsemblRestClient
return result;
}
return result;
}
- protected URL getUrl(String identifier, String db)
+ /**
+ * Returns the URL for the REST endpoint to fetch all cross-references for an
+ * identifier. Note this may return protein cross-references for nucleotide.
+ * Filter the returned list as required.
+ *
+ * @param identifier
+ * @return
+ */
+ protected URL getUrl(String identifier)
{
String url = ENSEMBL_REST + "/xrefs/id/" + identifier
{
String url = ENSEMBL_REST + "/xrefs/id/" + identifier
- + "?content-type=application/json&external_db=" + db;
+ + "?content-type=application/json&all_levels=1";
try
{
return new URL(url);
try
{
return new URL(url);