+package jalview.ext.ensembl;
+
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntology;
+import jalview.util.MapList;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A class that fetches genomic sequence and all transcripts for an Ensembl gene
+ *
+ * @author gmcarstairs
+ */
+public class EnsemblGene extends EnsemblSeqProxy
+{
+ private static final EnsemblFeatureType[] FEATURES_TO_FETCH = {
+ EnsemblFeatureType.gene, EnsemblFeatureType.transcript,
+ EnsemblFeatureType.exon, EnsemblFeatureType.cds,
+ EnsemblFeatureType.variation };
+
+ @Override
+ public String getDbName()
+ {
+ return "ENSEMBL (GENE)";
+ }
+
+ @Override
+ protected EnsemblFeatureType[] getFeaturesToFetch()
+ {
+ return FEATURES_TO_FETCH;
+ }
+
+ @Override
+ protected EnsemblSeqType getSourceEnsemblType()
+ {
+ return EnsemblSeqType.GENOMIC;
+ }
+
+ /**
+ * Builds an alignment of all transcripts for the requested gene:
+ * <ul>
+ * <li>fetches the gene sequence</li>
+ * <li>fetches features on the sequence</li>
+ * <li>identifies "transcript" features whose Parent is the requested gene</li>
+ * <li>fetches the transcript sequence for each transcript</li>
+ * <li>makes a mapping from the gene to each transcript</li>
+ * <li>copies features from gene to transcript sequences</li>
+ * <li>fetches the protein sequence for each transcript, maps and saves it as
+ * a cross-reference</li>
+ * <li>aligns each transcript against the gene sequence based on the position
+ * mappings</li>
+ * </ul>
+ */
+ @Override
+ public AlignmentI getSequenceRecords(String query) throws Exception
+ {
+ AlignmentI al = super.getSequenceRecords(query);
+ if (al.getHeight() > 0)
+ {
+ getTranscripts(al, query);
+ }
+
+ return al;
+ }
+
+ /**
+ * Find and fetch all transcripts for the gene, as identified by "transcript"
+ * features whose Parent is the requested gene
+ *
+ * @param al
+ * @param accId
+ * @throws Exception
+ */
+ protected void getTranscripts(AlignmentI al, String accId)
+ throws Exception
+ {
+ SequenceI gene = al.getSequenceAt(0);
+ List<String> transcriptIds = getTranscriptIds(accId, gene);
+
+ // TODO: could just use features and genomic sequence
+ // to generate the transcript sequences - faster
+ // could also grab "Name" as transcript description (gene name)
+ for (String transcriptId : transcriptIds)
+ {
+ /*
+ * fetch and map the transcript sequence; we can pass in the gene
+ * sequence with features marked to save fetching it again
+ */
+ EnsemblCdna cdnaFetcher = new EnsemblCdna();
+ AlignmentI al2 = cdnaFetcher.getSequenceRecords(transcriptId,
+ gene);
+ for (SequenceI seq : al2.getSequences())
+ {
+ /*
+ * build mapping from gene sequence to transcript
+ */
+ MapList mapping = cdnaFetcher.getGenomicRanges(gene, transcriptId,
+ seq.getStart());
+
+ /*
+ * align the transcript to the gene
+ */
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ acf.addMap(gene, seq, mapping);
+ char gap = al.getGapCharacter();
+ // AlignmentUtils.alignSequenceAs(seq, gene, acf, String.valueOf(gap),
+ // gap, false, false);
+
+ al.addSequence(seq);
+ }
+ }
+ }
+
+ /**
+ * Returns a list of the ids of transcript features on the sequence whose
+ * Parent is the gene for the accession id
+ *
+ * @param accId
+ * @param geneSequence
+ * @return
+ */
+ protected List<String> getTranscriptIds(String accId, SequenceI geneSequence)
+ {
+ SequenceOntology so = SequenceOntology.getInstance();
+ List<String> transcriptIds = new ArrayList<String>();
+
+ /*
+ * scan for transcript features belonging to our gene;
+ * also remove any which belong to other genes
+ */
+ SequenceFeature[] sfs = geneSequence.getSequenceFeatures();
+ List<SequenceFeature> keptFeatures = new ArrayList<SequenceFeature>();
+ boolean featureDropped = false;
+ String parentIdentifier = "gene:" + accId;
+ for (SequenceFeature sf : sfs)
+ {
+ if (so.isA(sf.getType(), SequenceOntology.TRANSCRIPT))
+ {
+ String parent = (String) sf.getValue(PARENT);
+ if (parentIdentifier.equals(parent))
+ {
+ transcriptIds.add((String) sf.getValue("transcript_id"));
+ keptFeatures.add(sf);
+ }
+ else
+ {
+ featureDropped = true;
+ }
+ }
+ else
+ {
+ keptFeatures.add(sf);
+ }
+ }
+ if (featureDropped)
+ {
+ geneSequence.getDatasetSequence().setSequenceFeatures(
+ keptFeatures.toArray(new SequenceFeature[keptFeatures
+ .size()]));
+ }
+ return transcriptIds;
+ }
+
+ @Override
+ public String getDescription()
+ {
+ return "Fetches all transcripts and variant features for a gene";
+ }
+
+ /**
+ * Default test query is a transcript
+ */
+ @Override
+ public String getTestQuery()
+ {
+ return "ENSG00000157764"; // reverse strand
+ // ENSG00000090266 // forward strand
+ }
+
+ /**
+ * Answers true for a feature of type 'gene' (or a sub-type of gene in the
+ * Sequence Ontology), whose ID is the accession we are retrieving
+ */
+ @Override
+ protected boolean identifiesSequence(SequenceFeature sf, String accId)
+ {
+ if (SequenceOntology.getInstance().isA(sf.getType(),
+ SequenceOntology.GENE))
+ {
+ String id = (String) sf.getValue(ID);
+ if (("gene:" + accId).equals(id))
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Answers true unless feature type is 'gene'. We need the gene features to
+ * identify the range, but it is redundant information on the gene sequence.
+ */
+ @Override
+ protected boolean retainFeature(SequenceFeature sf, String accessionId)
+ {
+ return !SequenceOntology.getInstance().isA(sf.getType(),
+ SequenceOntology.GENE);
+ }
+
+}