import jalview.datamodel.SequenceFeature;
import jalview.io.gff.SequenceOntology;
+import java.util.List;
+
import com.stevesoft.pat.Regex;
public class EnsemblCdna extends EnsemblSeqProxy
return false;
}
+ @Override
+ protected List<String> getCrossReferenceDatabases()
+ {
+ return super.getCrossReferenceDatabases();
+ // 30/01/16 also found Vega_transcript, OTTT, ENS_LRG_transcript, UCSC,
+ // HGNC_trans_name, RefSeq_mRNA, RefSeq_mRNA_predicted
+ }
+
}
return false;
}
+ @Override
+ protected List<String> getCrossReferenceDatabases()
+ {
+ // found these for ENSG00000157764 on 30/01/2016:
+ // return new String[] {"Vega_gene", "OTTG", "ENS_LRG_gene", "ArrayExpress",
+ // "EntrezGene", "HGNC", "MIM_GENE", "MIM_MORBID", "WikiGene"};
+ return super.getCrossReferenceDatabases();
+ }
+
}
import jalview.datamodel.AlignmentI;
import jalview.datamodel.SequenceFeature;
+import java.util.Arrays;
+import java.util.List;
+
public class EnsemblProtein extends EnsemblSeqProxy
{
+ private static final List<String> CROSSREFS = Arrays.asList(new String[] {
+ "PDB", "Uniprot/SPTREMBL", "Uniprot/SWISSPROT" });
+
public EnsemblProtein()
{
super();
return false;
}
+ @Override
+ protected List<String> getCrossReferenceDatabases()
+ {
+ return CROSSREFS;
+ }
+
}
*/
public abstract class EnsemblSeqProxy extends EnsemblRestClient
{
+ private static final List<String> CROSS_REFERENCES = Arrays
+ .asList(new String[] { "CCDS" });
+
protected static final String CONSEQUENCE_TYPE = "consequence_type";
protected static final String PARENT = "Parent";
addFeaturesAndProduct(accId, alignment);
}
- inProgress = false;
+ for (SequenceI seq : alignment.getSequences())
+ {
+ getCrossReferences(seq);
+ }
+
System.out.println(getClass().getName() + " took "
+ (System.currentTimeMillis() - now) + "ms to fetch");
return alignment;
proteinSeq.createDatasetSequence();
querySeq.createDatasetSequence();
- getProteinCrossReferences(proteinSeq);
-
MapList mapList = mapCdsToProtein(querySeq, proteinSeq);
if (mapList != null)
{
* Get Uniprot and PDB xrefs from Ensembl, and attach them to the protein
* sequence
*
- * @param proteinSeq
+ * @param seq
*/
- protected void getProteinCrossReferences(SequenceI proteinSeq)
+ protected void getCrossReferences(SequenceI seq)
{
- while (proteinSeq.getDatasetSequence() != null)
+ while (seq.getDatasetSequence() != null)
{
- proteinSeq = proteinSeq.getDatasetSequence();
+ seq = seq.getDatasetSequence();
}
EnsemblXref xrefFetcher = new EnsemblXref();
- List<DBRefEntry> xrefs = xrefFetcher.getCrossReferences(
- proteinSeq.getName(), "PDB", "Uniprot/SPTREMBL",
- "Uniprot/SWISSPROT");
+ List<DBRefEntry> xrefs = xrefFetcher.getCrossReferences(seq.getName(),
+ getCrossReferenceDatabases());
for (DBRefEntry xref : xrefs)
{
- proteinSeq.addDBRef(xref);
+ seq.addDBRef(xref);
}
}
/**
+ * Returns a list of database names to be used when fetching cross-references.
+ *
+ * @return
+ */
+ protected List<String> getCrossReferenceDatabases()
+ {
+ return CROSS_REFERENCES;
+ }
+
+ /**
* Returns a mapping from dna to protein by inspecting sequence features of
* type "CDS" on the dna.
*
/**
* Calls the Ensembl xrefs REST endpoint and retrieves any cross-references
* ("primary_id") for the given identifier (Ensembl accession id) and database
- * name. The "dbname" returned by Ensembl is canonicalised to Jalview's
- * standard version, and a DBRefEntry constructed.
+ * names. The "dbname" returned by Ensembl is canonicalised to Jalview's
+ * standard version, and a DBRefEntry constructed. If no databases are
+ * specified, all available cross-references are retrieved.
*
* @param identifier
- * @param database
+ * @param databases
* @return
*/
public List<DBRefEntry> getCrossReferences(String identifier,
- String... database)
+ List<String> databases)
{
List<DBRefEntry> result = new ArrayList<DBRefEntry>();
List<String> ids = new ArrayList<String>();
BufferedReader br = null;
try
{
- for (String db : database)
- {
- URL url = getUrl(identifier, db);
+ URL url = getUrl(identifier);
if (url != null)
{
br = getHttpResponse(url, ids);
}
- for (DBRefEntry xref : parseResponse(br))
- {
- if (!result.contains(xref))
- {
- result.add(xref);
- }
- }
- br.close();
- }
+ return (parseResponse(br, databases));
} catch (IOException e)
{
// ignore
}
/**
- * Parses "primary_id" and "dbname" values from the JSON response and returns
- * a list of DBRefEntry constructed.
+ * Parses "primary_id" and "dbname" values from the JSON response and
+ * constructs a DBRefEntry if the dbname is in the list supplied. Returns a
+ * list of DBRefEntry created.
*
* @param br
+ * @param databases
* @return
* @throws IOException
*/
- protected List<DBRefEntry> parseResponse(BufferedReader br)
+ protected List<DBRefEntry> parseResponse(BufferedReader br,
+ List<String> databases)
throws IOException
{
JSONParser jp = new JSONParser();
{
JSONObject val = (JSONObject) rvals.next();
String dbName = val.get("dbname").toString();
+ if (databases != null && !databases.isEmpty()
+ && !databases.contains(dbName))
+ {
+ continue;
+ }
String id = val.get("primary_id").toString();
if (dbName != null && id != null)
{
return result;
}
- protected URL getUrl(String identifier, String db)
+ /**
+ * Returns the URL for the REST endpoint to fetch all cross-references for an
+ * identifier. Note this may return protein cross-references for nucleotide.
+ * Filter the returned list as required.
+ *
+ * @param identifier
+ * @return
+ */
+ protected URL getUrl(String identifier)
{
String url = ENSEMBL_REST + "/xrefs/id/" + identifier
- + "?content-type=application/json&external_db=" + db;
+ + "?content-type=application/json&all_levels=1";
try
{
return new URL(url);