+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
package jalview.ext.ensembl;
import jalview.api.FeatureColourI;
import jalview.api.FeatureSettingsModelI;
import jalview.datamodel.AlignmentI;
+import jalview.datamodel.GeneLoci;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
+import jalview.datamodel.features.SequenceFeatures;
import jalview.io.gff.SequenceOntologyFactory;
import jalview.io.gff.SequenceOntologyI;
import jalview.schemes.FeatureColour;
* <li>resolves an external identifier by looking up xref-ed gene ids</li>
* <li>fetches the gene sequence</li>
* <li>fetches features on the sequence</li>
- * <li>identifies "transcript" features whose Parent is the requested gene</li>
+ * <li>identifies "transcript" features whose Parent is the requested
+ * gene</li>
* <li>fetches the transcript sequence for each transcript</li>
* <li>makes a mapping from the gene to each transcript</li>
* <li>copies features from gene to transcript sequences</li>
geneIds.add(geneId);
}
}
+ else if (isProteinIdentifier(acc))
+ {
+ String tscriptId = new EnsemblLookup(getDomain()).getParent(acc);
+ if (tscriptId != null)
+ {
+ String geneId = new EnsemblLookup(getDomain())
+ .getParent(tscriptId);
+ if (geneId != null && !geneIds.contains(geneId))
+ {
+ geneIds.add(geneId);
+ }
+ }
+ // NOTE - acc is lost if it resembles an ENS.+ ID but isn't actually
+ // resolving to one... e.g. ENSMICP00000009241
+ }
/*
* if given a gene or other external name, lookup and fetch
* the corresponding gene for all model organisms
*/
protected void clearGeneFeatures(SequenceI gene)
{
- SequenceFeature[] sfs = gene.getSequenceFeatures();
- if (sfs != null)
+ /*
+ * Note we include NMD_transcript_variant here because it behaves like
+ * 'transcript' in Ensembl, although strictly speaking it is not
+ * (it is a sub-type of sequence_variant)
+ */
+ String[] soTerms = new String[] {
+ SequenceOntologyI.NMD_TRANSCRIPT_VARIANT,
+ SequenceOntologyI.TRANSCRIPT, SequenceOntologyI.EXON,
+ SequenceOntologyI.CDS };
+ List<SequenceFeature> sfs = gene.getFeatures().getFeaturesByOntology(
+ soTerms);
+ for (SequenceFeature sf : sfs)
{
- SequenceOntologyI so = SequenceOntologyFactory.getInstance();
- List<SequenceFeature> filtered = new ArrayList<SequenceFeature>();
- for (SequenceFeature sf : sfs)
- {
- String type = sf.getType();
- if (!isTranscript(type) && !so.isA(type, SequenceOntologyI.EXON)
- && !so.isA(type, SequenceOntologyI.CDS))
- {
- filtered.add(sf);
- }
- }
- gene.setSequenceFeatures(filtered
- .toArray(new SequenceFeature[filtered
- .size()]));
+ gene.deleteFeature(sf);
}
}
* the parent gene sequence, with features
* @return
*/
- SequenceI makeTranscript(SequenceFeature transcriptFeature,
- AlignmentI al, SequenceI gene)
+ SequenceI makeTranscript(SequenceFeature transcriptFeature, AlignmentI al,
+ SequenceI gene)
{
String accId = getTranscriptId(transcriptFeature);
if (accId == null)
{
splices = findFeatures(gene, SequenceOntologyI.CDS, parentId);
}
+ SequenceFeatures.sortFeatures(splices, true);
int transcriptLength = 0;
final char[] geneChars = gene.getSequence();
mappedFrom.add(new int[] { sf.getBegin(), sf.getEnd() });
}
- Sequence transcript = new Sequence(accId, seqChars, 1, transcriptLength);
+ Sequence transcript = new Sequence(accId, seqChars, 1,
+ transcriptLength);
/*
* Ensembl has gene name as transcript Name
mapTo.add(new int[] { 1, transcriptLength });
MapList mapping = new MapList(mappedFrom, mapTo, 1, 1);
EnsemblCdna cdna = new EnsemblCdna(getDomain());
- cdna.transferFeatures(gene.getSequenceFeatures(),
+ cdna.transferFeatures(gene.getFeatures().getPositionalFeatures(),
transcript.getDatasetSequence(), mapping, parentId);
+ mapTranscriptToChromosome(transcript, gene, mapping);
+
/*
* fetch and save cross-references
*/
}
/**
+ * If the gene has a mapping to chromosome coordinates, derive the transcript
+ * chromosome regions and save on the transcript sequence
+ *
+ * @param transcript
+ * @param gene
+ * @param mapping
+ * the mapping from gene to transcript positions
+ */
+ protected void mapTranscriptToChromosome(SequenceI transcript,
+ SequenceI gene, MapList mapping)
+ {
+ GeneLoci loci = gene.getGeneLoci();
+ if (loci == null)
+ {
+ return;
+ }
+
+ /*
+ * patch to ensure gene to chromosome mapping is complete
+ * (in case created before gene length was known)
+ */
+ MapList geneMapping = loci.mapping;
+ if (geneMapping.getFromRanges().get(0)[1] == 0)
+ {
+ geneMapping.getFromRanges().get(0)[0] = gene.getStart();
+ geneMapping.getFromRanges().get(0)[1] = gene.getEnd();
+ }
+
+ List<int[]> exons = mapping.getFromRanges();
+ List<int[]> transcriptLoci = new ArrayList<>();
+
+ for (int[] exon : exons)
+ {
+ transcriptLoci.add(geneMapping.locateInTo(exon[0], exon[1]));
+ }
+
+ List<int[]> transcriptRange = Arrays.asList(new int[] {
+ transcript.getStart(), transcript.getEnd() });
+ MapList mapList = new MapList(transcriptRange, transcriptLoci, 1, 1);
+ GeneLoci gl = new GeneLoci(loci.species, loci.assembly,
+ loci.chromosome, mapList);
+
+ transcript.setGeneLoci(gl);
+ }
+
+ /**
* Returns the 'transcript_id' property of the sequence feature (or null)
*
* @param feature
List<SequenceFeature> transcriptFeatures = new ArrayList<SequenceFeature>();
String parentIdentifier = GENE_PREFIX + accId;
- SequenceFeature[] sfs = geneSequence.getSequenceFeatures();
+ // todo optimise here by transcript type!
+ List<SequenceFeature> sfs = geneSequence.getFeatures()
+ .getPositionalFeatures();
- if (sfs != null)
+ for (SequenceFeature sf : sfs)
{
- for (SequenceFeature sf : sfs)
+ if (isTranscript(sf.getType()))
{
- if (isTranscript(sf.getType()))
+ String parent = (String) sf.getValue(PARENT);
+ if (parentIdentifier.equals(parent))
{
- String parent = (String) sf.getValue(PARENT);
- if (parentIdentifier.equals(parent))
- {
- transcriptFeatures.add(sf);
- }
+ transcriptFeatures.add(sf);
}
}
}
return new FeatureSettingsAdapter()
{
SequenceOntologyI so = SequenceOntologyFactory.getInstance();
+
@Override
public boolean isFeatureDisplayed(String type)
{
- return (so.isA(type, SequenceOntologyI.EXON) || so.isA(type,
- SequenceOntologyI.SEQUENCE_VARIANT));
+ return (so.isA(type, SequenceOntologyI.EXON)
+ || so.isA(type, SequenceOntologyI.SEQUENCE_VARIANT));
}
@Override