package jalview.ext.ensembl; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.AlignmentI; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.io.gff.SequenceOntology; import jalview.util.MapList; import java.util.ArrayList; import java.util.List; /** * A class that fetches genomic sequence and all transcripts for an Ensembl gene * * @author gmcarstairs */ public class EnsemblGene extends EnsemblSeqProxy { private static final EnsemblFeatureType[] FEATURES_TO_FETCH = { EnsemblFeatureType.gene, EnsemblFeatureType.transcript, EnsemblFeatureType.exon, EnsemblFeatureType.cds, EnsemblFeatureType.variation }; @Override public String getDbName() { return "ENSEMBL (GENE)"; } @Override protected EnsemblFeatureType[] getFeaturesToFetch() { return FEATURES_TO_FETCH; } @Override protected EnsemblSeqType getSourceEnsemblType() { return EnsemblSeqType.GENOMIC; } /** * Builds an alignment of all transcripts for the requested gene: * */ @Override public AlignmentI getSequenceRecords(String query) throws Exception { AlignmentI al = super.getSequenceRecords(query); if (al.getHeight() > 0) { getTranscripts(al, query); } return al; } /** * Find and fetch all transcripts for the gene, as identified by "transcript" * features whose Parent is the requested gene * * @param al * @param accId * @throws Exception */ protected void getTranscripts(AlignmentI al, String accId) throws Exception { SequenceI gene = al.getSequenceAt(0); List transcriptIds = getTranscriptIds(accId, gene); // TODO: could just use features and genomic sequence // to generate the transcript sequences - faster // could also grab "Name" as transcript description (gene name) for (String transcriptId : transcriptIds) { /* * fetch and map the transcript sequence; we can pass in the gene * sequence with features marked to save fetching it again */ EnsemblCdna cdnaFetcher = new EnsemblCdna(); AlignmentI al2 = cdnaFetcher.getSequenceRecords(transcriptId, gene); for (SequenceI seq : al2.getSequences()) { /* * build mapping from gene sequence to transcript */ MapList mapping = cdnaFetcher.getGenomicRanges(gene, transcriptId, seq.getStart()); /* * align the transcript to the gene */ AlignedCodonFrame acf = new AlignedCodonFrame(); acf.addMap(gene, seq, mapping); char gap = al.getGapCharacter(); // AlignmentUtils.alignSequenceAs(seq, gene, acf, String.valueOf(gap), // gap, false, false); al.addSequence(seq); } } } /** * Returns a list of the ids of transcript features on the sequence whose * Parent is the gene for the accession id * * @param accId * @param geneSequence * @return */ protected List getTranscriptIds(String accId, SequenceI geneSequence) { SequenceOntology so = SequenceOntology.getInstance(); List transcriptIds = new ArrayList(); /* * scan for transcript features belonging to our gene; * also remove any which belong to other genes */ SequenceFeature[] sfs = geneSequence.getSequenceFeatures(); List keptFeatures = new ArrayList(); boolean featureDropped = false; String parentIdentifier = "gene:" + accId; for (SequenceFeature sf : sfs) { if (so.isA(sf.getType(), SequenceOntology.TRANSCRIPT)) { String parent = (String) sf.getValue(PARENT); if (parentIdentifier.equals(parent)) { transcriptIds.add((String) sf.getValue("transcript_id")); keptFeatures.add(sf); } else { featureDropped = true; } } else { keptFeatures.add(sf); } } if (featureDropped) { geneSequence.getDatasetSequence().setSequenceFeatures( keptFeatures.toArray(new SequenceFeature[keptFeatures .size()])); } return transcriptIds; } @Override public String getDescription() { return "Fetches all transcripts and variant features for a gene"; } /** * Default test query is a transcript */ @Override public String getTestQuery() { return "ENSG00000157764"; // reverse strand // ENSG00000090266 // forward strand } /** * Answers true for a feature of type 'gene' (or a sub-type of gene in the * Sequence Ontology), whose ID is the accession we are retrieving */ @Override protected boolean identifiesSequence(SequenceFeature sf, String accId) { if (SequenceOntology.getInstance().isA(sf.getType(), SequenceOntology.GENE)) { String id = (String) sf.getValue(ID); if (("gene:" + accId).equals(id)) { return true; } } return false; } /** * Answers true unless feature type is 'gene'. We need the gene features to * identify the range, but it is redundant information on the gene sequence. */ @Override protected boolean retainFeature(SequenceFeature sf, String accessionId) { return !SequenceOntology.getInstance().isA(sf.getType(), SequenceOntology.GENE); } }