X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fensembl%2FEnsemblCds.java;h=2086eba7ad667f1a2cb0557a7ddaaa0bab43e65d;hb=479c44e27a46c7cbee33e293dbc38cbca4fd2e0b;hp=dc9234811cdde92b7419c5b7eb283ad4a40162af;hpb=409fd993c6e32e999b24082aae107a043a590f8f;p=jalview.git diff --git a/src/jalview/ext/ensembl/EnsemblCds.java b/src/jalview/ext/ensembl/EnsemblCds.java index dc92348..2086eba 100644 --- a/src/jalview/ext/ensembl/EnsemblCds.java +++ b/src/jalview/ext/ensembl/EnsemblCds.java @@ -1,15 +1,51 @@ package jalview.ext.ensembl; -import jalview.ext.ensembl.SeqFetcher.EnsemblSeqType; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.io.gff.SequenceOntologyFactory; +import jalview.io.gff.SequenceOntologyI; +import java.util.ArrayList; +import java.util.List; + +/** + * A client for direct fetching of CDS sequences from Ensembl (i.e. that part of + * the genomic sequence that is translated to protein) + * + * TODO: not currently used as CDS sequences are computed from CDS features on + * transcripts - delete this class? + * + * @author gmcarstairs + * + */ public class EnsemblCds extends EnsemblSeqProxy { + /* + * fetch cds features on genomic sequence (to identify the CDS regions) + * and exon and variation features (to retain for display) + */ + private static final EnsemblFeatureType[] FEATURES_TO_FETCH = { + EnsemblFeatureType.cds, EnsemblFeatureType.exon, + EnsemblFeatureType.variation }; + /** + * Default constructor (to use rest.ensembl.org) + */ public EnsemblCds() { super(); } + /** + * Constructor given the target domain to fetch data from + * + * @param d + */ + public EnsemblCds(String d) + { + super(d); + } + @Override public String getDbName() { @@ -22,4 +58,61 @@ public class EnsemblCds extends EnsemblSeqProxy return EnsemblSeqType.CDS; } + @Override + protected EnsemblFeatureType[] getFeaturesToFetch() + { + return FEATURES_TO_FETCH; + } + + /** + * Answers true unless the feature type is 'CDS' (or a sub-type of CDS in the + * Sequence Ontology). CDS features are only retrieved in order to identify + * the cds sequence range, and are redundant information on the cds sequence + * itself. + */ + @Override + protected boolean retainFeature(SequenceFeature sf, String accessionId) + { + if (SequenceOntologyFactory.getInstance().isA(sf.getType(), + SequenceOntologyI.CDS)) + { + return false; + } + return featureMayBelong(sf, accessionId); + } + + /** + * Answers true if the sequence feature type is 'CDS' (or a subtype of CDS in + * the Sequence Ontology), and the Parent of the feature is the transcript we + * are retrieving + */ + @Override + protected boolean identifiesSequence(SequenceFeature sf, String accId) + { + if (SequenceOntologyFactory.getInstance().isA(sf.getType(), + SequenceOntologyI.CDS)) + { + String parentFeature = (String) sf.getValue(PARENT); + if (("transcript:" + accId).equals(parentFeature)) + { + return true; + } + } + return false; + } + + /** + * Overrides this method to trivially return a range which is the whole of the + * nucleotide sequence. This is both faster than scanning for CDS features, + * and also means we don't need to keep CDS features on CDS sequence (where + * they are redundant information). + */ + protected List getCdsRanges(SequenceI dnaSeq) + { + int len = dnaSeq.getLength(); + List ranges = new ArrayList(); + ranges.add(new int[] { 1, len }); + return ranges; + } + }