X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fensembl%2FEnsemblGene.java;h=915fa0aa04e5067731f1ebd8dceae9dc7a79f9d0;hb=136c0793b90b72b928c4d77dc109dd5c644e00d3;hp=24e3e955fe442972c96f99ae6257cfaa0fecfa14;hpb=8677e6e34e291edc58c1da2fc9c958473754143f;p=jalview.git diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index 24e3e95..915fa0a 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -26,6 +26,7 @@ import jalview.datamodel.AlignmentI; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; +import jalview.datamodel.features.SequenceFeatures; import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyI; import jalview.schemes.FeatureColour; @@ -190,7 +191,22 @@ public class EnsemblGene extends EnsemblSeqProxy geneIds.add(geneId); } } + else if (isProteinIdentifier(acc)) + { + String tscriptId = new EnsemblLookup(getDomain()).getParent(acc); + if (tscriptId != null) + { + String geneId = new EnsemblLookup(getDomain()) + .getParent(tscriptId); + if (geneId != null && !geneIds.contains(geneId)) + { + geneIds.add(geneId); + } + } + // NOTE - acc is lost if it resembles an ENS.+ ID but isn't actually + // resolving to one... e.g. ENSMICP00000009241 + } /* * if given a gene or other external name, lookup and fetch * the corresponding gene for all model organisms @@ -267,22 +283,20 @@ public class EnsemblGene extends EnsemblSeqProxy */ protected void clearGeneFeatures(SequenceI gene) { - SequenceFeature[] sfs = gene.getSequenceFeatures(); - if (sfs != null) + /* + * Note we include NMD_transcript_variant here because it behaves like + * 'transcript' in Ensembl, although strictly speaking it is not + * (it is a sub-type of sequence_variant) + */ + String[] soTerms = new String[] { + SequenceOntologyI.NMD_TRANSCRIPT_VARIANT, + SequenceOntologyI.TRANSCRIPT, SequenceOntologyI.EXON, + SequenceOntologyI.CDS }; + List sfs = gene.getFeatures().getFeaturesByOntology( + soTerms); + for (SequenceFeature sf : sfs) { - SequenceOntologyI so = SequenceOntologyFactory.getInstance(); - List filtered = new ArrayList(); - for (SequenceFeature sf : sfs) - { - String type = sf.getType(); - if (!isTranscript(type) && !so.isA(type, SequenceOntologyI.EXON) - && !so.isA(type, SequenceOntologyI.CDS)) - { - filtered.add(sf); - } - } - gene.setSequenceFeatures(filtered - .toArray(new SequenceFeature[filtered.size()])); + gene.deleteFeature(sf); } } @@ -332,6 +346,7 @@ public class EnsemblGene extends EnsemblSeqProxy { splices = findFeatures(gene, SequenceOntologyI.CDS, parentId); } + SequenceFeatures.sortFeatures(splices, true); int transcriptLength = 0; final char[] geneChars = gene.getSequence(); @@ -381,7 +396,7 @@ public class EnsemblGene extends EnsemblSeqProxy mapTo.add(new int[] { 1, transcriptLength }); MapList mapping = new MapList(mappedFrom, mapTo, 1, 1); EnsemblCdna cdna = new EnsemblCdna(getDomain()); - cdna.transferFeatures(gene.getSequenceFeatures(), + cdna.transferFeatures(gene.getFeatures().getPositionalFeatures(), transcript.getDatasetSequence(), mapping, parentId); /* @@ -422,19 +437,18 @@ public class EnsemblGene extends EnsemblSeqProxy List transcriptFeatures = new ArrayList(); String parentIdentifier = GENE_PREFIX + accId; - SequenceFeature[] sfs = geneSequence.getSequenceFeatures(); + // todo optimise here by transcript type! + List sfs = geneSequence.getFeatures() + .getPositionalFeatures(); - if (sfs != null) + for (SequenceFeature sf : sfs) { - for (SequenceFeature sf : sfs) + if (isTranscript(sf.getType())) { - if (isTranscript(sf.getType())) + String parent = (String) sf.getValue(PARENT); + if (parentIdentifier.equals(parent)) { - String parent = (String) sf.getValue(PARENT); - if (parentIdentifier.equals(parent)) - { - transcriptFeatures.add(sf); - } + transcriptFeatures.add(sf); } } }