1 package jalview.ext.ensembl;
3 import jalview.datamodel.SequenceFeature;
4 import jalview.io.gff.SequenceOntologyFactory;
5 import jalview.io.gff.SequenceOntologyI;
9 import com.stevesoft.pat.Regex;
11 public class EnsemblCdna extends EnsemblSeqProxy
13 // TODO modify to accept other species e.g. ENSMUSPnnn
14 private static final Regex ACCESSION_REGEX = new Regex(
15 "(ENST|ENSG|CCDS)[0-9.]{3,}$");
18 * fetch exon features on genomic sequence (to identify the cdna regions)
19 * and cds and variation features (to retain)
21 private static final EnsemblFeatureType[] FEATURES_TO_FETCH = {
22 EnsemblFeatureType.exon, EnsemblFeatureType.cds,
23 EnsemblFeatureType.variation };
31 public String getDbName()
33 return "ENSEMBL (CDNA)";
37 protected EnsemblSeqType getSourceEnsemblType()
39 return EnsemblSeqType.CDNA;
43 public Regex getAccessionValidator()
45 return ACCESSION_REGEX;
49 protected EnsemblFeatureType[] getFeaturesToFetch()
51 return FEATURES_TO_FETCH;
55 * Answers true unless the feature type is 'transcript' (or a sub-type in the
59 protected boolean retainFeature(SequenceFeature sf, String accessionId)
61 if (isTranscript(sf.getType()))
65 return featureMayBelong(sf, accessionId);
69 * Answers true if the sequence feature type is 'exon' (or a subtype of exon
70 * in the Sequence Ontology), and the Parent of the feature is the transcript
74 protected boolean identifiesSequence(SequenceFeature sf, String accId)
76 if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
77 SequenceOntologyI.EXON))
79 String parentFeature = (String) sf.getValue(PARENT);
80 if (("transcript:" + accId).equals(parentFeature))
89 protected List<String> getCrossReferenceDatabases()
91 return super.getCrossReferenceDatabases();
92 // 30/01/16 also found Vega_transcript, OTTT, ENS_LRG_transcript, UCSC,
93 // HGNC_trans_name, RefSeq_mRNA, RefSeq_mRNA_predicted