X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fensembl%2FEnsemblGene.java;h=6507ff5dfb4e1a2a75142069f458b3387790043d;hb=1e8c7a9ab9f5da589d0aa2482fd2e3361c320d57;hp=5b57e5e135135b99ea498947b2934f5cee86c25e;hpb=0b8abe58b934e03c34575b06d532a99f0ba70196;p=jalview.git diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index 5b57e5e..6507ff5 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -1,16 +1,25 @@ package jalview.ext.ensembl; +import jalview.api.FeatureColourI; +import jalview.api.FeatureSettingsI; import jalview.datamodel.AlignmentI; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; -import jalview.io.gff.SequenceOntology; +import jalview.io.gff.SequenceOntologyFactory; +import jalview.io.gff.SequenceOntologyI; +import jalview.schemes.FeatureColourAdapter; +import jalview.schemes.FeatureSettingsAdapter; import jalview.util.MapList; +import jalview.util.StringUtils; +import java.awt.Color; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import com.stevesoft.pat.Regex; + /** * A class that fetches genomic sequence and all transcripts for an Ensembl gene * @@ -18,6 +27,14 @@ import java.util.List; */ public class EnsemblGene extends EnsemblSeqProxy { + private static final String GENE_PREFIX = "gene:"; + + /* + * accepts anything as we will attempt lookup of gene or + * transcript id or gene name + */ + private static final Regex ACCESSION_REGEX = new Regex(".*"); + private static final EnsemblFeatureType[] FEATURES_TO_FETCH = { EnsemblFeatureType.gene, EnsemblFeatureType.transcript, EnsemblFeatureType.exon, EnsemblFeatureType.cds, @@ -42,8 +59,15 @@ public class EnsemblGene extends EnsemblSeqProxy } /** - * Builds an alignment of all transcripts for the requested gene: + * Returns an alignment containing the gene(s) for the given gene or + * transcript identifier, or external identifier (e.g. Uniprot id). If given a + * gene name or external identifier, returns any related gene sequences found + * for model organisms. If only a single gene is queried for, then its + * transcripts are also retrieved and added to the alignment.
+ * Method: * + * + * @param query + * one or more identifiers separated by a space + * @return an alignment containing one or more genes, and possibly + * transcripts, or null */ @Override public AlignmentI getSequenceRecords(String query) throws Exception { - // TODO ? if an ENST identifier is supplied, convert to ENSG? + // todo: tidy up handling of one or multiple accession ids + String[] queries = query.split(getAccessionSeparator()); + + /* + * if given a transcript id, look up its gene parent + */ + if (isTranscriptIdentifier(query)) + { + // we are assuming all transcripts have the same gene parent here + query = new EnsemblLookup().getParent(queries[0]); + if (query == null) + { + return null; + } + } + + /* + * if given a gene or other external name, lookup and fetch + * the corresponding gene for all model organisms + */ + if (!isGeneIdentifier(query)) + { + List geneIds = new EnsemblSymbol().getIds(query); + if (geneIds.isEmpty()) + { + return null; + } + String theIds = StringUtils.listToDelimitedString(geneIds, + getAccessionSeparator()); + return getSequenceRecords(theIds); + } + + /* + * fetch the gene sequence(s) with features and xrefs + */ AlignmentI al = super.getSequenceRecords(query); - if (al.getHeight() > 0) + + /* + * if we retrieved a single gene, get its transcripts as well + */ + if (al.getHeight() == 1) { getTranscripts(al, query); } @@ -70,6 +137,29 @@ public class EnsemblGene extends EnsemblSeqProxy } /** + * Attempts to get Ensembl stable identifiers for model organisms for a gene + * name by calling the xrefs symbol REST service to resolve the gene name. + * + * @param query + * @return + */ + protected String getGeneIdentifiersForName(String query) + { + List ids = new EnsemblSymbol().getIds(query); + if (ids != null) + { + for (String id : ids) + { + if (isGeneIdentifier(id)) + { + return id; + } + } + } + return null; + } + + /** * Constructs all transcripts for the gene, as identified by "transcript" * features whose Parent is the requested gene. The coding transcript * sequences (i.e. with introns omitted) are added to the alignment. @@ -89,6 +179,36 @@ public class EnsemblGene extends EnsemblSeqProxy { makeTranscript(transcriptFeature, al, gene); } + + clearGeneFeatures(gene); + } + + /** + * Remove unwanted features (transcript, exon, CDS) from the gene sequence + * after we have used them to derive transcripts and transfer features + * + * @param gene + */ + protected void clearGeneFeatures(SequenceI gene) + { + SequenceFeature[] sfs = gene.getSequenceFeatures(); + if (sfs != null) + { + SequenceOntologyI so = SequenceOntologyFactory.getInstance(); + List filtered = new ArrayList(); + for (SequenceFeature sf : sfs) + { + String type = sf.getType(); + if (!isTranscript(type) && !so.isA(type, SequenceOntologyI.EXON) + && !so.isA(type, SequenceOntologyI.CDS)) + { + filtered.add(sf); + } + } + gene.setSequenceFeatures(filtered + .toArray(new SequenceFeature[filtered + .size()])); + } } /** @@ -107,7 +227,7 @@ public class EnsemblGene extends EnsemblSeqProxy SequenceI makeTranscript(SequenceFeature transcriptFeature, AlignmentI al, SequenceI gene) { - String accId = (String) transcriptFeature.getValue("transcript_id"); + String accId = getTranscriptId(transcriptFeature); if (accId == null) { return null; @@ -132,10 +252,10 @@ public class EnsemblGene extends EnsemblSeqProxy */ String parentId = "transcript:" + accId; List splices = findFeatures(gene, - SequenceOntology.EXON, parentId); + SequenceOntologyI.EXON, parentId); if (splices.isEmpty()) { - splices = findFeatures(gene, SequenceOntology.CDS, parentId); + splices = findFeatures(gene, SequenceOntologyI.CDS, parentId); } int transcriptLength = 0; @@ -154,6 +274,11 @@ public class EnsemblGene extends EnsemblSeqProxy } Sequence transcript = new Sequence(accId, seqChars, 1, transcriptLength); + String geneName = (String) transcriptFeature.getValue(NAME); + if (geneName != null) + { + transcript.setDescription(geneName); + } transcript.createDatasetSequence(); al.addSequence(transcript); @@ -169,18 +294,32 @@ public class EnsemblGene extends EnsemblSeqProxy transcript.getDatasetSequence(), mapping, parentId); /* + * fetch and save cross-references + */ + super.getCrossReferences(transcript); + + /* * and finally fetch the protein product and save as a cross-reference */ - addProteinProduct(transcript); + new EnsemblCdna().addProteinProduct(transcript); return transcript; } /** + * Returns the 'transcript_id' property of the sequence feature (or null) + * + * @param feature + * @return + */ + protected String getTranscriptId(SequenceFeature feature) + { + return (String) feature.getValue("transcript_id"); + } + + /** * Returns a list of the transcript features on the sequence whose Parent is - * the gene for the accession id. Also removes all transcript features from - * the gene sequence, as we have no further need for them and they obscure - * more useful features on the display. + * the gene for the accession id. * * @param accId * @param geneSequence @@ -191,8 +330,7 @@ public class EnsemblGene extends EnsemblSeqProxy { List transcriptFeatures = new ArrayList(); - List keptFeatures = new ArrayList(); - String parentIdentifier = "gene:" + accId; + String parentIdentifier = GENE_PREFIX + accId; SequenceFeature[] sfs = geneSequence.getSequenceFeatures(); if (sfs != null) @@ -207,14 +345,8 @@ public class EnsemblGene extends EnsemblSeqProxy transcriptFeatures.add(sf); } } - else - { - keptFeatures.add(sf); - } } } - SequenceFeature[] featuresRetained = keptFeatures.toArray(new SequenceFeature[keptFeatures.size()]); - geneSequence.getDatasetSequence().setSequenceFeatures(featuresRetained); return transcriptFeatures; } @@ -222,11 +354,11 @@ public class EnsemblGene extends EnsemblSeqProxy @Override public String getDescription() { - return "Fetches all transcripts and variant features for a gene"; + return "Fetches all transcripts and variant features for a gene or transcript"; } /** - * Default test query is a transcript + * Default test query is a gene id (can also enter a transcript id) */ @Override public String getTestQuery() @@ -244,11 +376,11 @@ public class EnsemblGene extends EnsemblSeqProxy @Override protected boolean identifiesSequence(SequenceFeature sf, String accId) { - if (SequenceOntology.getInstance().isA(sf.getType(), - SequenceOntology.GENE)) + if (SequenceOntologyFactory.getInstance().isA(sf.getType(), + SequenceOntologyI.GENE)) { String id = (String) sf.getValue(ID); - if (("gene:" + accId).equals(id)) + if ((GENE_PREFIX + accId).equals(id)) { return true; } @@ -266,16 +398,16 @@ public class EnsemblGene extends EnsemblSeqProxy @Override protected boolean retainFeature(SequenceFeature sf, String accessionId) { - if (SequenceOntology.getInstance().isA(sf.getType(), - SequenceOntology.GENE)) + SequenceOntologyI so = SequenceOntologyFactory.getInstance(); + String type = sf.getType(); + if (so.isA(type, SequenceOntologyI.GENE)) { return false; } - - if (isTranscript(sf.getType())) + if (isTranscript(type)) { String parent = (String) sf.getValue(PARENT); - if (!("gene:" + accessionId).equals(parent)) + if (!(GENE_PREFIX + accessionId).equals(parent)) { return false; } @@ -294,4 +426,97 @@ public class EnsemblGene extends EnsemblSeqProxy return false; } + @Override + protected List getCrossReferenceDatabases() + { + // found these for ENSG00000157764 on 30/01/2016: + // return new String[] {"Vega_gene", "OTTG", "ENS_LRG_gene", "ArrayExpress", + // "EntrezGene", "HGNC", "MIM_GENE", "MIM_MORBID", "WikiGene"}; + return super.getCrossReferenceDatabases(); + } + + /** + * Override to do nothing as Ensembl doesn't return a protein sequence for a + * gene identifier + */ + @Override + protected void addProteinProduct(SequenceI querySeq) + { + } + + @Override + public Regex getAccessionValidator() + { + return ACCESSION_REGEX; + } + + @Override + public FeatureSettingsI getFeatureColourScheme() + { + return new FeatureSettingsAdapter() + { + SequenceOntologyI so = SequenceOntologyFactory.getInstance(); + @Override + public boolean isFeatureDisplayed(String type) + { + return (so.isA(type, SequenceOntologyI.EXON) || so.isA(type, + SequenceOntologyI.SEQUENCE_VARIANT)); + } + + @Override + public FeatureColourI getFeatureColour(String type) + { + if (so.isA(type, SequenceOntologyI.EXON)) + { + return new FeatureColourAdapter() + { + @Override + public boolean isColourByLabel() + { + return true; + } + }; + } + if (so.isA(type, SequenceOntologyI.SEQUENCE_VARIANT)) + { + return new FeatureColourAdapter() + { + + @Override + public Color getColour() + { + return Color.RED; + } + }; + } + return null; + } + + /** + * order to render sequence_variant after exon after the rest + */ + @Override + public int compare(String feature1, String feature2) + { + if (so.isA(feature1, SequenceOntologyI.SEQUENCE_VARIANT)) + { + return +1; + } + if (so.isA(feature2, SequenceOntologyI.SEQUENCE_VARIANT)) + { + return -1; + } + if (so.isA(feature1, SequenceOntologyI.EXON)) + { + return +1; + } + if (so.isA(feature2, SequenceOntologyI.EXON)) + { + return -1; + } + return 0; + } + }; + } + }