X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fensembl%2FEnsemblGene.java;h=0d5fc26facc10310199f1cf1a03d02443ec95cb2;hb=3440eae5b69bdca30f480f9e0d4f0df226630ef9;hp=b4d278344a6b37214f0773b31da3a921485bc36c;hpb=bf0d052fef43e9809b7170dbfd372b3ea116391b;p=jalview.git diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index b4d2783..0d5fc26 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -1,3 +1,23 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.ext.ensembl; import jalview.api.FeatureColourI; @@ -6,6 +26,7 @@ import jalview.datamodel.AlignmentI; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; +import jalview.datamodel.features.SequenceFeatures; import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyI; import jalview.schemes.FeatureColour; @@ -77,6 +98,12 @@ public class EnsemblGene extends EnsemblSeqProxy return EnsemblSeqType.GENOMIC; } + @Override + protected String getObjectType() + { + return OBJECT_TYPE_GENE; + } + /** * Returns an alignment containing the gene(s) for the given gene or * transcript identifier, or external identifier (e.g. Uniprot id). If given a @@ -89,7 +116,8 @@ public class EnsemblGene extends EnsemblSeqProxy *
  • resolves an external identifier by looking up xref-ed gene ids
  • *
  • fetches the gene sequence
  • *
  • fetches features on the sequence
  • - *
  • identifies "transcript" features whose Parent is the requested gene
  • + *
  • identifies "transcript" features whose Parent is the requested + * gene
  • *
  • fetches the transcript sequence for each transcript
  • *
  • makes a mapping from the gene to each transcript
  • *
  • copies features from gene to transcript sequences
  • @@ -118,9 +146,14 @@ public class EnsemblGene extends EnsemblSeqProxy * fetch the gene sequence(s) with features and xrefs */ AlignmentI geneAlignment = super.getSequenceRecords(geneId); - + if (geneAlignment == null) + { + continue; + } if (geneAlignment.getHeight() == 1) { + // ensure id has 'correct' case for the Ensembl identifier + geneId = geneAlignment.getSequenceAt(0).getName(); getTranscripts(geneAlignment, geneId); } if (al == null) @@ -136,50 +169,43 @@ public class EnsemblGene extends EnsemblSeqProxy } /** - * Converts a query, which may contain one or more gene or transcript - * identifiers, into a non-redundant list of gene identifiers. + * Converts a query, which may contain one or more gene, transcript, or + * external (to Ensembl) identifiers, into a non-redundant list of gene + * identifiers. * * @param accessions * @return */ List getGeneIds(String accessions) { - List geneIds = new ArrayList(); + List geneIds = new ArrayList<>(); for (String acc : accessions.split(getAccessionSeparator())) { - if (isGeneIdentifier(acc)) - { - if (!geneIds.contains(acc)) - { - geneIds.add(acc); - } - } - /* - * if given a transcript id, look up its gene parent + * First try lookup as an Ensembl (gene or transcript) identifier */ - else if (isTranscriptIdentifier(acc)) + String geneId = new EnsemblLookup(getDomain()).getGeneId(acc); + if (geneId != null) { - String geneId = new EnsemblLookup(getDomain()).getParent(acc); - if (geneId != null && !geneIds.contains(geneId)) + if (!geneIds.contains(geneId)) { geneIds.add(geneId); } } - - /* - * if given a gene or other external name, lookup and fetch - * the corresponding gene for all model organisms - */ else { - List ids = new EnsemblSymbol(getDomain()).getIds(acc); - for (String geneId : ids) + /* + * if given a gene or other external name, lookup and fetch + * the corresponding gene for all model organisms + */ + List ids = new EnsemblSymbol(getDomain(), getDbSource(), + getDbVersion()).getGeneIds(acc); + for (String id : ids) { - if (!geneIds.contains(geneId)) + if (!geneIds.contains(id)) { - geneIds.add(geneId); + geneIds.add(id); } } } @@ -188,29 +214,6 @@ public class EnsemblGene extends EnsemblSeqProxy } /** - * Attempts to get Ensembl stable identifiers for model organisms for a gene - * name by calling the xrefs symbol REST service to resolve the gene name. - * - * @param query - * @return - */ - protected String getGeneIdentifiersForName(String query) - { - List ids = new EnsemblSymbol(getDomain()).getIds(query); - if (ids != null) - { - for (String id : ids) - { - if (isGeneIdentifier(id)) - { - return id; - } - } - } - return null; - } - - /** * Constructs all transcripts for the gene, as identified by "transcript" * features whose Parent is the requested gene. The coding transcript * sequences (i.e. with introns omitted) are added to the alignment. @@ -242,23 +245,20 @@ public class EnsemblGene extends EnsemblSeqProxy */ protected void clearGeneFeatures(SequenceI gene) { - SequenceFeature[] sfs = gene.getSequenceFeatures(); - if (sfs != null) + /* + * Note we include NMD_transcript_variant here because it behaves like + * 'transcript' in Ensembl, although strictly speaking it is not + * (it is a sub-type of sequence_variant) + */ + String[] soTerms = new String[] { + SequenceOntologyI.NMD_TRANSCRIPT_VARIANT, + SequenceOntologyI.TRANSCRIPT, SequenceOntologyI.EXON, + SequenceOntologyI.CDS }; + List sfs = gene.getFeatures().getFeaturesByOntology( + soTerms); + for (SequenceFeature sf : sfs) { - SequenceOntologyI so = SequenceOntologyFactory.getInstance(); - List filtered = new ArrayList(); - for (SequenceFeature sf : sfs) - { - String type = sf.getType(); - if (!isTranscript(type) && !so.isA(type, SequenceOntologyI.EXON) - && !so.isA(type, SequenceOntologyI.CDS)) - { - filtered.add(sf); - } - } - gene.setSequenceFeatures(filtered - .toArray(new SequenceFeature[filtered - .size()])); + gene.deleteFeature(sf); } } @@ -275,8 +275,8 @@ public class EnsemblGene extends EnsemblSeqProxy * the parent gene sequence, with features * @return */ - SequenceI makeTranscript(SequenceFeature transcriptFeature, - AlignmentI al, SequenceI gene) + SequenceI makeTranscript(SequenceFeature transcriptFeature, AlignmentI al, + SequenceI gene) { String accId = getTranscriptId(transcriptFeature); if (accId == null) @@ -308,11 +308,12 @@ public class EnsemblGene extends EnsemblSeqProxy { splices = findFeatures(gene, SequenceOntologyI.CDS, parentId); } + SequenceFeatures.sortFeatures(splices, true); int transcriptLength = 0; final char[] geneChars = gene.getSequence(); int offset = gene.getStart(); // to convert to 0-based positions - List mappedFrom = new ArrayList(); + List mappedFrom = new ArrayList<>(); for (SequenceFeature sf : splices) { @@ -324,7 +325,8 @@ public class EnsemblGene extends EnsemblSeqProxy mappedFrom.add(new int[] { sf.getBegin(), sf.getEnd() }); } - Sequence transcript = new Sequence(accId, seqChars, 1, transcriptLength); + Sequence transcript = new Sequence(accId, seqChars, 1, + transcriptLength); /* * Ensembl has gene name as transcript Name @@ -353,11 +355,11 @@ public class EnsemblGene extends EnsemblSeqProxy * transfer features to the new sequence; we use EnsemblCdna to do this, * to filter out unwanted features types (see method retainFeature) */ - List mapTo = new ArrayList(); + List mapTo = new ArrayList<>(); mapTo.add(new int[] { 1, transcriptLength }); MapList mapping = new MapList(mappedFrom, mapTo, 1, 1); EnsemblCdna cdna = new EnsemblCdna(getDomain()); - cdna.transferFeatures(gene.getSequenceFeatures(), + cdna.transferFeatures(gene.getFeatures().getPositionalFeatures(), transcript.getDatasetSequence(), mapping, parentId); /* @@ -387,6 +389,12 @@ public class EnsemblGene extends EnsemblSeqProxy /** * Returns a list of the transcript features on the sequence whose Parent is * the gene for the accession id. + *

    + * Transcript features are those of type "transcript", or any of its sub-types + * in the Sequence Ontology e.g. "mRNA", "processed_transcript". We also + * include "NMD_transcript_variant", because this type behaves like a + * transcript identifier in Ensembl, although strictly speaking it is not in + * the SO. * * @param accId * @param geneSequence @@ -395,23 +403,21 @@ public class EnsemblGene extends EnsemblSeqProxy protected List getTranscriptFeatures(String accId, SequenceI geneSequence) { - List transcriptFeatures = new ArrayList(); + List transcriptFeatures = new ArrayList<>(); String parentIdentifier = GENE_PREFIX + accId; - SequenceFeature[] sfs = geneSequence.getSequenceFeatures(); - if (sfs != null) + List sfs = geneSequence.getFeatures() + .getFeaturesByOntology(SequenceOntologyI.TRANSCRIPT); + sfs.addAll(geneSequence.getFeatures().getPositionalFeatures( + SequenceOntologyI.NMD_TRANSCRIPT_VARIANT)); + + for (SequenceFeature sf : sfs) { - for (SequenceFeature sf : sfs) + String parent = (String) sf.getValue(PARENT); + if (parentIdentifier.equalsIgnoreCase(parent)) { - if (isTranscript(sf.getType())) - { - String parent = (String) sf.getValue(PARENT); - if (parentIdentifier.equals(parent)) - { - transcriptFeatures.add(sf); - } - } + transcriptFeatures.add(sf); } } @@ -446,8 +452,9 @@ public class EnsemblGene extends EnsemblSeqProxy if (SequenceOntologyFactory.getInstance().isA(sf.getType(), SequenceOntologyI.GENE)) { - String id = (String) sf.getValue(ID); - if ((GENE_PREFIX + accId).equals(id)) + // NB features as gff use 'ID'; rest services return as 'id' + String id = (String) sf.getValue("ID"); + if ((GENE_PREFIX + accId).equalsIgnoreCase(id)) { return true; } @@ -474,7 +481,7 @@ public class EnsemblGene extends EnsemblSeqProxy if (isTranscript(type)) { String parent = (String) sf.getValue(PARENT); - if (!(GENE_PREFIX + accessionId).equals(parent)) + if (!(GENE_PREFIX + accessionId).equalsIgnoreCase(parent)) { return false; } @@ -524,11 +531,12 @@ public class EnsemblGene extends EnsemblSeqProxy return new FeatureSettingsAdapter() { SequenceOntologyI so = SequenceOntologyFactory.getInstance(); + @Override public boolean isFeatureDisplayed(String type) { - return (so.isA(type, SequenceOntologyI.EXON) || so.isA(type, - SequenceOntologyI.SEQUENCE_VARIANT)); + return (so.isA(type, SequenceOntologyI.EXON) + || so.isA(type, SequenceOntologyI.SEQUENCE_VARIANT)); } @Override