}
/**
- * Converts a query, which may contain one or more gene or transcript
- * identifiers, into a non-redundant list of gene identifiers.
+ * Converts a query, which may contain one or more gene, transcript, or
+ * external (to Ensembl) identifiers, into a non-redundant list of gene
+ * identifiers.
*
* @param accessions
* @return
for (String acc : accessions.split(getAccessionSeparator()))
{
- if (isGeneIdentifier(acc))
- {
- if (!geneIds.contains(acc))
- {
- geneIds.add(acc);
- }
- }
-
/*
- * if given a transcript id, look up its gene parent
+ * First try lookup as an Ensembl (gene or transcript) identifier
*/
- else if (isTranscriptIdentifier(acc))
+ String geneId = new EnsemblLookup(getDomain()).getGeneId(acc);
+ if (geneId != null)
{
- String geneId = new EnsemblLookup(getDomain()).getParent(acc);
- if (geneId != null && !geneIds.contains(geneId))
+ if (!geneIds.contains(geneId))
{
geneIds.add(geneId);
}
}
- else if (isProteinIdentifier(acc))
- {
- String tscriptId = new EnsemblLookup(getDomain()).getParent(acc);
- if (tscriptId != null)
- {
- String geneId = new EnsemblLookup(getDomain())
- .getParent(tscriptId);
-
- if (geneId != null && !geneIds.contains(geneId))
- {
- geneIds.add(geneId);
- }
- }
- // NOTE - acc is lost if it resembles an ENS.+ ID but isn't actually
- // resolving to one... e.g. ENSMICP00000009241
- }
- /*
- * if given a gene or other external name, lookup and fetch
- * the corresponding gene for all model organisms
- */
else
{
+ /*
+ * if given a gene or other external name, lookup and fetch
+ * the corresponding gene for all model organisms
+ */
List<String> ids = new EnsemblSymbol(getDomain(), getDbSource(),
- getDbVersion()).getIds(acc);
- for (String geneId : ids)
+ getDbVersion()).getGeneIds(acc);
+ for (String id : ids)
{
- if (!geneIds.contains(geneId))
+ if (!geneIds.contains(id))
{
- geneIds.add(geneId);
+ geneIds.add(id);
}
}
}
}
/**
- * Attempts to get Ensembl stable identifiers for model organisms for a gene
- * name by calling the xrefs symbol REST service to resolve the gene name.
- *
- * @param query
- * @return
- */
- protected String getGeneIdentifiersForName(String query)
- {
- List<String> ids = new EnsemblSymbol(getDomain(), getDbSource(),
- getDbVersion()).getIds(query);
- if (ids != null)
- {
- for (String id : ids)
- {
- if (isGeneIdentifier(id))
- {
- return id;
- }
- }
- }
- return null;
- }
-
- /**
* Constructs all transcripts for the gene, as identified by "transcript"
* features whose Parent is the requested gene. The coding transcript
* sequences (i.e. with introns omitted) are added to the alignment.
/**
* Returns a list of the transcript features on the sequence whose Parent is
* the gene for the accession id.
+ * <p>
+ * Transcript features are those of type "transcript", or any of its sub-types
+ * in the Sequence Ontology e.g. "mRNA", "processed_transcript". We also
+ * include "NMD_transcript_variant", because this type behaves like a
+ * transcript identifier in Ensembl, although strictly speaking it is not in
+ * the SO.
*
* @param accId
* @param geneSequence
List<SequenceFeature> transcriptFeatures = new ArrayList<SequenceFeature>();
String parentIdentifier = GENE_PREFIX + accId;
- // todo optimise here by transcript type!
+
List<SequenceFeature> sfs = geneSequence.getFeatures()
- .getPositionalFeatures();
+ .getFeaturesByOntology(SequenceOntologyI.TRANSCRIPT);
+ sfs.addAll(geneSequence.getFeatures().getPositionalFeatures(
+ SequenceOntologyI.NMD_TRANSCRIPT_VARIANT));
for (SequenceFeature sf : sfs)
{
- if (isTranscript(sf.getType()))
+ String parent = (String) sf.getValue(PARENT);
+ if (parentIdentifier.equals(parent))
{
- String parent = (String) sf.getValue(PARENT);
- if (parentIdentifier.equals(parent))
- {
- transcriptFeatures.add(sf);
- }
+ transcriptFeatures.add(sf);
}
}