1 package jalview.ext.ensembl;
3 import jalview.datamodel.AlignedCodonFrame;
4 import jalview.datamodel.AlignmentI;
5 import jalview.datamodel.SequenceFeature;
6 import jalview.datamodel.SequenceI;
7 import jalview.io.gff.SequenceOntology;
8 import jalview.util.MapList;
10 import java.util.ArrayList;
11 import java.util.List;
14 * A class that fetches genomic sequence and all transcripts for an Ensembl gene
18 public class EnsemblGene extends EnsemblSeqProxy
20 private static final EnsemblFeatureType[] FEATURES_TO_FETCH = {
21 EnsemblFeatureType.gene, EnsemblFeatureType.transcript,
22 EnsemblFeatureType.exon, EnsemblFeatureType.cds,
23 EnsemblFeatureType.variation };
26 public String getDbName()
28 return "ENSEMBL (GENE)";
32 protected EnsemblFeatureType[] getFeaturesToFetch()
34 return FEATURES_TO_FETCH;
38 protected EnsemblSeqType getSourceEnsemblType()
40 return EnsemblSeqType.GENOMIC;
44 * Builds an alignment of all transcripts for the requested gene:
46 * <li>fetches the gene sequence</li>
47 * <li>fetches features on the sequence</li>
48 * <li>identifies "transcript" features whose Parent is the requested gene</li>
49 * <li>fetches the transcript sequence for each transcript</li>
50 * <li>makes a mapping from the gene to each transcript</li>
51 * <li>copies features from gene to transcript sequences</li>
52 * <li>fetches the protein sequence for each transcript, maps and saves it as
53 * a cross-reference</li>
54 * <li>aligns each transcript against the gene sequence based on the position
59 public AlignmentI getSequenceRecords(String query) throws Exception
61 AlignmentI al = super.getSequenceRecords(query);
62 if (al.getHeight() > 0)
64 getTranscripts(al, query);
71 * Find and fetch all transcripts for the gene, as identified by "transcript"
72 * features whose Parent is the requested gene
78 protected void getTranscripts(AlignmentI al, String accId)
81 SequenceI gene = al.getSequenceAt(0);
82 List<String> transcriptIds = getTranscriptIds(accId, gene);
84 // TODO: could just use features and genomic sequence
85 // to generate the transcript sequences - faster
86 // could also grab "Name" as transcript description (gene name)
87 for (String transcriptId : transcriptIds)
90 * fetch and map the transcript sequence; we can pass in the gene
91 * sequence with features marked to save fetching it again
93 EnsemblCdna cdnaFetcher = new EnsemblCdna();
94 AlignmentI al2 = cdnaFetcher.getSequenceRecords(transcriptId,
96 for (SequenceI seq : al2.getSequences())
99 * build mapping from gene sequence to transcript
101 MapList mapping = cdnaFetcher.getGenomicRanges(gene, transcriptId,
105 * align the transcript to the gene
107 AlignedCodonFrame acf = new AlignedCodonFrame();
108 acf.addMap(gene, seq, mapping);
109 char gap = al.getGapCharacter();
110 // AlignmentUtils.alignSequenceAs(seq, gene, acf, String.valueOf(gap),
111 // gap, false, false);
119 * Returns a list of the ids of transcript features on the sequence whose
120 * Parent is the gene for the accession id
123 * @param geneSequence
126 protected List<String> getTranscriptIds(String accId, SequenceI geneSequence)
128 SequenceOntology so = SequenceOntology.getInstance();
129 List<String> transcriptIds = new ArrayList<String>();
132 * scan for transcript features belonging to our gene;
133 * also remove any which belong to other genes
135 SequenceFeature[] sfs = geneSequence.getSequenceFeatures();
136 List<SequenceFeature> keptFeatures = new ArrayList<SequenceFeature>();
137 boolean featureDropped = false;
138 String parentIdentifier = "gene:" + accId;
139 for (SequenceFeature sf : sfs)
141 if (so.isA(sf.getType(), SequenceOntology.TRANSCRIPT))
143 String parent = (String) sf.getValue(PARENT);
144 if (parentIdentifier.equals(parent))
146 transcriptIds.add((String) sf.getValue("transcript_id"));
147 keptFeatures.add(sf);
151 featureDropped = true;
156 keptFeatures.add(sf);
161 geneSequence.getDatasetSequence().setSequenceFeatures(
162 keptFeatures.toArray(new SequenceFeature[keptFeatures
165 return transcriptIds;
169 public String getDescription()
171 return "Fetches all transcripts and variant features for a gene";
175 * Default test query is a transcript
178 public String getTestQuery()
180 return "ENSG00000157764"; // reverse strand
181 // ENSG00000090266 // forward strand
185 * Answers true for a feature of type 'gene' (or a sub-type of gene in the
186 * Sequence Ontology), whose ID is the accession we are retrieving
189 protected boolean identifiesSequence(SequenceFeature sf, String accId)
191 if (SequenceOntology.getInstance().isA(sf.getType(),
192 SequenceOntology.GENE))
194 String id = (String) sf.getValue(ID);
195 if (("gene:" + accId).equals(id))
204 * Answers true unless feature type is 'gene'. We need the gene features to
205 * identify the range, but it is redundant information on the gene sequence.
208 protected boolean retainFeature(SequenceFeature sf, String accessionId)
210 return !SequenceOntology.getInstance().isA(sf.getType(),
211 SequenceOntology.GENE);