import jalview.io.gff.SequenceOntologyFactory;
import jalview.io.gff.SequenceOntologyI;
-import java.util.Arrays;
-import java.util.List;
-
import com.stevesoft.pat.Regex;
/**
*/
public class EnsemblCdna extends EnsemblSeqProxy
{
- private static final List<String> CROSS_REFERENCES = Arrays
- .asList(new String[] { "Uniprot/SWISSPROT", "Uniprot/SPTREMBL" });
-
/*
* accepts ENST or ENSTG with 11 digits
* or ENSMUST or similar for other species
return false;
}
- @Override
- protected List<String> getCrossReferenceDatabases()
- {
- return CROSS_REFERENCES;
- // 30/01/16 also found Vega_transcript, OTTT, ENS_LRG_transcript, UCSC,
- // HGNC_trans_name, RefSeq_mRNA, RefSeq_mRNA_predicted
- }
-
}
*/
public class EnsemblGene extends EnsemblSeqProxy
{
- private static final List<String> CROSS_REFERENCES = Arrays
- .asList(new String[] { "CCDS" });
-
private static final String GENE_PREFIX = "gene:";
/*
return false;
}
- @Override
- protected List<String> getCrossReferenceDatabases()
- {
- // found these for ENSG00000157764 on 30/01/2016:
- // return new String[] {"Vega_gene", "OTTG", "ENS_LRG_gene", "ArrayExpress",
- // "EntrezGene", "HGNC", "MIM_GENE", "MIM_MORBID", "WikiGene"};
- return CROSS_REFERENCES;
- }
-
/**
* Override to do nothing as Ensembl doesn't return a protein sequence for a
* gene identifier
import jalview.datamodel.AlignmentI;
import jalview.datamodel.SequenceFeature;
-import java.util.Arrays;
import java.util.List;
import com.stevesoft.pat.Regex;
private static final Regex ACCESSION_REGEX = new Regex(
"(ENS([A-Z]{3}|)P[0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)");
- private static final List<String> CROSSREFS = Arrays.asList(new String[] {
- "PDB", "Uniprot/SPTREMBL", "Uniprot/SWISSPROT" });
-
/**
* Default constructor (to use rest.ensembl.org)
*/
}
@Override
- protected List<String> getCrossReferenceDatabases()
- {
- return CROSSREFS;
- }
-
- @Override
public Regex getAccessionValidator()
{
return ACCESSION_REGEX;
{
private static final String ALLELES = "alleles";
- private static final List<String> CROSS_REFERENCES = Arrays
- .asList(new String[] { "CCDS", "Uniprot/SWISSPROT",
- "Uniprot/SPTREMBL" });
-
protected static final String CONSEQUENCE_TYPE = "consequence_type";
protected static final String PARENT = "Parent";
}
EnsemblXref xrefFetcher = new EnsemblXref(getDomain());
- List<DBRefEntry> xrefs = xrefFetcher.getCrossReferences(seq.getName(),
- getCrossReferenceDatabases());
+ List<DBRefEntry> xrefs = xrefFetcher.getCrossReferences(seq.getName());
for (DBRefEntry xref : xrefs)
{
seq.addDBRef(xref);
}
/**
- * Returns a list of database names to be used when fetching cross-references.
- * Specifically, the names are used to filter data returned by the Ensembl
- * xrefs REST service on the value in field 'dbname'.
- *
- * @return
- */
- protected List<String> getCrossReferenceDatabases()
- {
- return CROSS_REFERENCES;
- }
-
- /**
* Fetches sequences for the list of accession ids and adds them to the
* alignment. Returns the extended (or created) alignment.
*
* @return
* @throws IOException
*/
- protected String parseResponse(BufferedReader br)
+ protected String parseSymbolResponse(BufferedReader br)
throws IOException
{
JSONParser jp = new JSONParser();
{
br = getHttpResponse(url, ids);
}
- String geneId = parseResponse(br);
+ String geneId = parseSymbolResponse(br);
if (geneId != null)
{
result.add(geneId);
class EnsemblXref extends EnsemblRestClient
{
+ private static final String GO_GENE_ONTOLOGY = "GO";
+
/**
* Constructor given the target domain to fetch data from
*
* Calls the Ensembl xrefs REST endpoint and retrieves any cross-references
* ("primary_id") for the given identifier (Ensembl accession id) and database
* names. The "dbname" returned by Ensembl is canonicalised to Jalview's
- * standard version, and a DBRefEntry constructed. If no databases are
- * specified, all available cross-references are retrieved.
+ * standard version, and a DBRefEntry constructed. Currently takes all
+ * identifiers apart from GO terms and synonyms.
*
* @param identifier
* an Ensembl stable identifier
- * @param databases
- * optional list of Ensembl cross-referenced databases of interest
* @return
*/
- public List<DBRefEntry> getCrossReferences(String identifier,
- List<String> databases)
+ public List<DBRefEntry> getCrossReferences(String identifier)
{
List<DBRefEntry> result = new ArrayList<DBRefEntry>();
List<String> ids = new ArrayList<String>();
{
br = getHttpResponse(url, ids);
}
- return (parseResponse(br, databases));
+ return (parseResponse(br));
} catch (IOException e)
{
// ignore
/**
* Parses "primary_id" and "dbname" values from the JSON response and
- * constructs a DBRefEntry if the dbname is in the list supplied. Returns a
- * list of the DBRefEntry created. Note we don't parse "synonyms" as they
- * appear to be either redirected or obsolete in Uniprot.
+ * constructs a DBRefEntry. Returns a list of the DBRefEntry created. Note we
+ * don't parse "synonyms" as they appear to be either redirected or obsolete
+ * in Uniprot.
*
* @param br
- * @param databases
* @return
* @throws IOException
*/
- protected List<DBRefEntry> parseResponse(BufferedReader br,
- List<String> databases)
+ protected List<DBRefEntry> parseResponse(BufferedReader br)
throws IOException
{
JSONParser jp = new JSONParser();
{
JSONObject val = (JSONObject) rvals.next();
String dbName = val.get("dbname").toString();
- if (databases != null && !databases.isEmpty()
- && !databases.contains(dbName))
+ if (dbName.equals(GO_GENE_ONTOLOGY))
{
continue;
}
package jalview.ext.ensembl;
import static org.testng.AssertJUnit.assertEquals;
-import static org.testng.AssertJUnit.assertTrue;
import jalview.datamodel.DBRefEntry;
import java.io.IOException;
import java.io.StringReader;
import java.net.URL;
-import java.util.Arrays;
import java.util.List;
import org.testng.annotations.Test;
//@formatter:off
private static final String JSON =
"[{\"primary_id\":\"CCDS5863\",\"dbname\":\"CCDS\"}," +
- "{\"primary_id\":\"P15056\",\"dbname\":\"Uniprot/SWISSPROT\"}]";
+ "{\"primary_id\":\"P15056\",\"dbname\":\"Uniprot/SWISSPROT\",\"synonyms\":[\"C21\"]}," +
+ "{\"primary_id\":\"GO:0000165\",\"dbname\":\"GO\"}]";
//@formatter:on
@Test(groups = "functional")
}
};
- /*
- * with no filter
- */
- List<DBRefEntry> dbrefs = testee.getCrossReferences("ABCDE", null);
+ // synonyms and GO terms are not returned
+ List<DBRefEntry> dbrefs = testee.getCrossReferences("ABCDE");
assertEquals(2, dbrefs.size());
assertEquals("CCDS", dbrefs.get(0).getSource());
assertEquals("CCDS5863", dbrefs.get(0).getAccessionId());
// Uniprot name should get converted to Jalview canonical form
assertEquals("UNIPROT", dbrefs.get(1).getSource());
assertEquals("P15056", dbrefs.get(1).getAccessionId());
-
- /*
- * filter for Uniprot only
- */
- dbrefs = testee.getCrossReferences(
- "ABCDE",
- Arrays.asList(new String[] { "Uniprot/SWISSPROT",
- "Uniprot/SPTREMBL" }));
- assertEquals(1, dbrefs.size());
- assertEquals("UNIPROT", dbrefs.get(0).getSource());
- assertEquals("P15056", dbrefs.get(0).getAccessionId());
-
- /*
- * filter for PDB only
- */
- dbrefs = testee.getCrossReferences("ABCDE",
- Arrays.asList(new String[] { "PDB" }));
- assertTrue(dbrefs.isEmpty());
}
}