From: gmungoc Date: Fri, 10 Aug 2018 10:58:00 +0000 (+0100) Subject: JAL-3076 refactor for more efficient scan of 'gene' features X-Git-Tag: Release_2_11_0~20^2~2 X-Git-Url: http://source.jalview.org/gitweb/?p=jalview.git;a=commitdiff_plain;h=a4f2a7f356b8edab17a9a5bb6f2e71a1419792a9 JAL-3076 refactor for more efficient scan of 'gene' features --- diff --git a/src/jalview/datamodel/features/SequenceFeatures.java b/src/jalview/datamodel/features/SequenceFeatures.java index fcf1b53..727d3ef 100644 --- a/src/jalview/datamodel/features/SequenceFeatures.java +++ b/src/jalview/datamodel/features/SequenceFeatures.java @@ -87,7 +87,7 @@ public class SequenceFeatures implements SequenceFeaturesI */ // featureStore = Collections // .synchronizedSortedMap(new TreeMap()); - featureStore = new TreeMap(); + featureStore = new TreeMap<>(); } /** @@ -382,9 +382,10 @@ public class SequenceFeatures implements SequenceFeaturesI } /** - * Answers true if the given type is one of the specified sequence ontology - * terms (or a sub-type of one), or if no terms are supplied. Answers false if - * filter terms are specified and the given term does not match any of them. + * Answers true if the given type matches one of the specified terms (or is a + * sub-type of one in the Sequence Ontology), or if no terms are supplied. + * Answers false if filter terms are specified and the given term does not + * match any of them. * * @param type * @param soTerm @@ -399,7 +400,7 @@ public class SequenceFeatures implements SequenceFeaturesI SequenceOntologyI so = SequenceOntologyFactory.getInstance(); for (String term : soTerm) { - if (so.isA(type, term)) + if (type.equals(term) || so.isA(type, term)) { return true; } diff --git a/src/jalview/datamodel/features/SequenceFeaturesI.java b/src/jalview/datamodel/features/SequenceFeaturesI.java index 80c4f9a..31712b9 100644 --- a/src/jalview/datamodel/features/SequenceFeaturesI.java +++ b/src/jalview/datamodel/features/SequenceFeaturesI.java @@ -82,9 +82,9 @@ public interface SequenceFeaturesI String group, String... type); /** - * Answers a list of all features stored, whose type either matches one of the - * given ontology terms, or is a specialisation of a term in the Sequence - * Ontology. Results are returned in no particular guaranteed order. + * Answers a list of all features stored, whose type either matches, or is a + * specialisation (in the Sequence Ontology) of, one of the given terms. + * Results are returned in no particular order. * * @param ontologyTerm * @return diff --git a/src/jalview/ext/ensembl/EnsemblCdna.java b/src/jalview/ext/ensembl/EnsemblCdna.java index 952f01e..7384327 100644 --- a/src/jalview/ext/ensembl/EnsemblCdna.java +++ b/src/jalview/ext/ensembl/EnsemblCdna.java @@ -21,9 +21,13 @@ package jalview.ext.ensembl; import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyI; +import java.util.ArrayList; +import java.util.List; + import com.stevesoft.pat.Regex; /** @@ -109,23 +113,27 @@ public class EnsemblCdna extends EnsemblSeqProxy } /** - * Answers true if the sequence feature type is 'exon' (or a subtype of exon - * in the Sequence Ontology), and the Parent of the feature is the transcript - * we are retrieving + * Answers a list of sequence features (if any) whose type is 'exon' (or a + * subtype of exon in the Sequence Ontology), and whose Parent is the + * transcript we are retrieving */ @Override - protected boolean identifiesSequence(SequenceFeature sf, String accId) + protected List getIdentifyingFeatures(SequenceI seq, + String accId) { - if (SequenceOntologyFactory.getInstance().isA(sf.getType(), - SequenceOntologyI.EXON)) + List result = new ArrayList<>(); + List sfs = seq.getFeatures() + .getFeaturesByOntology(SequenceOntologyI.EXON); + for (SequenceFeature sf : sfs) { String parentFeature = (String) sf.getValue(PARENT); if (("transcript:" + accId).equals(parentFeature)) { - return true; + result.add(sf); } } - return false; + + return result; } /** diff --git a/src/jalview/ext/ensembl/EnsemblCds.java b/src/jalview/ext/ensembl/EnsemblCds.java index 8b2550d..8a71b64 100644 --- a/src/jalview/ext/ensembl/EnsemblCds.java +++ b/src/jalview/ext/ensembl/EnsemblCds.java @@ -102,23 +102,26 @@ public class EnsemblCds extends EnsemblSeqProxy } /** - * Answers true if the sequence feature type is 'CDS' (or a subtype of CDS in - * the Sequence Ontology), and the Parent of the feature is the transcript we - * are retrieving + * Answers a list of sequence features (if any) whose type is 'CDS' (or a + * subtype of CDS in the Sequence Ontology), and whose Parent is the + * transcript we are retrieving */ @Override - protected boolean identifiesSequence(SequenceFeature sf, String accId) + protected List getIdentifyingFeatures(SequenceI seq, + String accId) { - if (SequenceOntologyFactory.getInstance().isA(sf.getType(), - SequenceOntologyI.CDS)) + List result = new ArrayList<>(); + List sfs = seq.getFeatures() + .getFeaturesByOntology(SequenceOntologyI.CDS); + for (SequenceFeature sf : sfs) { String parentFeature = (String) sf.getValue(PARENT); if (("transcript:" + accId).equals(parentFeature)) { - return true; + result.add(sf); } } - return false; + return result; } /** @@ -130,7 +133,7 @@ public class EnsemblCds extends EnsemblSeqProxy protected List getCdsRanges(SequenceI dnaSeq) { int len = dnaSeq.getLength(); - List ranges = new ArrayList(); + List ranges = new ArrayList<>(); ranges.add(new int[] { 1, len }); return ranges; } diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index 7e6f653..36b19e2 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -548,23 +548,27 @@ public class EnsemblGene extends EnsemblSeqProxy } /** - * Answers true for a feature of type 'gene' (or a sub-type of gene in the - * Sequence Ontology), whose ID is the accession we are retrieving + * Answers a list of sequence features (if any) whose type is 'gene' (or a + * subtype of gene in the Sequence Ontology), and whose ID is the accession we + * are retrieving */ @Override - protected boolean identifiesSequence(SequenceFeature sf, String accId) + protected List getIdentifyingFeatures(SequenceI seq, + String accId) { - if (SequenceOntologyFactory.getInstance().isA(sf.getType(), - SequenceOntologyI.GENE)) + List result = new ArrayList<>(); + List sfs = seq.getFeatures() + .getFeaturesByOntology(SequenceOntologyI.GENE); + for (SequenceFeature sf : sfs) { // NB features as gff use 'ID'; rest services return as 'id' String id = (String) sf.getValue("ID"); if ((GENE_PREFIX + accId).equalsIgnoreCase(id)) { - return true; + result.add(sf); } } - return false; + return result; } /** @@ -595,17 +599,6 @@ public class EnsemblGene extends EnsemblSeqProxy } /** - * Answers false. This allows an optimisation - a single 'gene' feature is all - * that is needed to identify the positions of the gene on the genomic - * sequence. - */ - @Override - protected boolean isSpliceable() - { - return false; - } - - /** * Override to do nothing as Ensembl doesn't return a protein sequence for a * gene identifier */ diff --git a/src/jalview/ext/ensembl/EnsemblGenome.java b/src/jalview/ext/ensembl/EnsemblGenome.java index bde3c0f..6684e20 100644 --- a/src/jalview/ext/ensembl/EnsemblGenome.java +++ b/src/jalview/ext/ensembl/EnsemblGenome.java @@ -21,6 +21,11 @@ package jalview.ext.ensembl; import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.io.gff.SequenceOntologyI; + +import java.util.ArrayList; +import java.util.List; /** * A client to fetch genomic sequence from Ensembl @@ -94,22 +99,32 @@ public class EnsemblGenome extends EnsemblSeqProxy } /** - * Answers true if the sequence feature type is 'transcript' (or a subtype of - * transcript in the Sequence Ontology), and the ID of the feature is the - * transcript we are retrieving + * Answers a list of sequence features (if any) whose type is 'transcript' (or + * a subtype of transcript in the Sequence Ontology), and whose ID is the + * accession we are retrieving. + *

+ * Note we also include features of type "NMD_transcript_variant", although + * not strictly 'transcript' in the SO, as they used in Ensembl as if they + * were. */ @Override - protected boolean identifiesSequence(SequenceFeature sf, String accId) + protected List getIdentifyingFeatures(SequenceI seq, + String accId) { - if (isTranscript(sf.getType())) + List result = new ArrayList<>(); + List sfs = seq.getFeatures().getFeaturesByOntology( + SequenceOntologyI.TRANSCRIPT, + SequenceOntologyI.NMD_TRANSCRIPT_VARIANT); + for (SequenceFeature sf : sfs) { + // NB features as gff use 'ID'; rest services return as 'id' String id = (String) sf.getValue("ID"); if (("transcript:" + accId).equals(id)) { - return true; + result.add(sf); } } - return false; + return result; } } diff --git a/src/jalview/ext/ensembl/EnsemblProtein.java b/src/jalview/ext/ensembl/EnsemblProtein.java index 99006aa..0280f16 100644 --- a/src/jalview/ext/ensembl/EnsemblProtein.java +++ b/src/jalview/ext/ensembl/EnsemblProtein.java @@ -22,6 +22,10 @@ package jalview.ext.ensembl; import jalview.datamodel.AlignmentI; import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; + +import java.util.ArrayList; +import java.util.List; import com.stevesoft.pat.Regex; @@ -106,10 +110,10 @@ public class EnsemblProtein extends EnsemblSeqProxy } @Override - protected boolean identifiesSequence(SequenceFeature sf, String accId) + protected List getIdentifyingFeatures(SequenceI seq, + String accId) { - // not applicable - protein sequence is not a 'subset' of genomic sequence - return false; + return new ArrayList<>(); } @Override diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index f96f1d5..19065f2 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -581,8 +581,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient protected MapList getGenomicRangesFromFeatures(SequenceI sourceSequence, String accId, int start) { - List sfs = sourceSequence.getFeatures() - .getPositionalFeatures(); + List sfs = getIdentifyingFeatures(sourceSequence, + accId); if (sfs.isEmpty()) { return null; @@ -599,47 +599,31 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient for (SequenceFeature sf : sfs) { + int strand = sf.getStrand(); + strand = strand == 0 ? 1 : strand; // treat unknown as forward + + if (directionSet && strand != direction) + { + // abort - mix of forward and backward + System.err + .println("Error: forward and backward strand for " + accId); + return null; + } + direction = strand; + directionSet = true; + /* - * accept the target feature type or a specialisation of it - * (e.g. coding_exon for exon) + * add to CDS ranges, semi-sorted forwards/backwards */ - if (identifiesSequence(sf, accId)) + if (strand < 0) { - int strand = sf.getStrand(); - strand = strand == 0 ? 1 : strand; // treat unknown as forward - - if (directionSet && strand != direction) - { - // abort - mix of forward and backward - System.err.println( - "Error: forward and backward strand for " + accId); - return null; - } - direction = strand; - directionSet = true; - - /* - * add to CDS ranges, semi-sorted forwards/backwards - */ - if (strand < 0) - { - regions.add(0, new int[] { sf.getEnd(), sf.getBegin() }); - } - else - { - regions.add(new int[] { sf.getBegin(), sf.getEnd() }); - } - mappedLength += Math.abs(sf.getEnd() - sf.getBegin() + 1); - - if (!isSpliceable()) - { - /* - * 'gene' sequence is contiguous so we can stop as soon as its - * identifying feature has been found - */ - break; - } + regions.add(0, new int[] { sf.getEnd(), sf.getBegin() }); } + else + { + regions.add(new int[] { sf.getBegin(), sf.getEnd() }); + } + mappedLength += Math.abs(sf.getEnd() - sf.getBegin() + 1); } if (regions.isEmpty()) @@ -664,28 +648,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient } /** - * Answers true if the sequence being retrieved may occupy discontiguous - * regions on the genomic sequence. - */ - protected boolean isSpliceable() - { - return true; - } - - /** - * Returns true if the sequence feature marks positions of the genomic - * sequence feature which are within the sequence being retrieved. For - * example, an 'exon' feature whose parent is the target transcript marks the - * cdna positions of the transcript. - * - * @param sf - * @param accId - * @return - */ - protected abstract boolean identifiesSequence(SequenceFeature sf, - String accId); - - /** * Answers a list of sequence features that mark positions of the genomic * sequence feature which are within the sequence being retrieved. For * example, an 'exon' feature whose parent is the target transcript marks the diff --git a/test/jalview/datamodel/features/SequenceFeaturesTest.java b/test/jalview/datamodel/features/SequenceFeaturesTest.java index 39d6dce..32987b0 100644 --- a/test/jalview/datamodel/features/SequenceFeaturesTest.java +++ b/test/jalview/datamodel/features/SequenceFeaturesTest.java @@ -13,10 +13,10 @@ import java.util.List; import java.util.Map; import java.util.Set; -import junit.extensions.PA; - import org.testng.annotations.Test; +import junit.extensions.PA; + public class SequenceFeaturesTest { @Test(groups = "Functional") @@ -1005,33 +1005,44 @@ public class SequenceFeaturesTest assertTrue(store.getFeaturesByOntology(new String[] {}).isEmpty()); assertTrue(store.getFeaturesByOntology((String[]) null).isEmpty()); - SequenceFeature sf1 = new SequenceFeature("transcript", "desc", 10, 20, + SequenceFeature transcriptFeature = new SequenceFeature("transcript", "desc", 10, 20, Float.NaN, null); - store.add(sf1); + store.add(transcriptFeature); - // mRNA isA transcript; added here 'as if' non-positional - // just to show that non-positional features are included in results - SequenceFeature sf2 = new SequenceFeature("mRNA", "desc", 0, 0, + /* + * mRNA is a sub-type of transcript; added here 'as if' non-positional + * just to show that non-positional features are included in results + */ + SequenceFeature mrnaFeature = new SequenceFeature("mRNA", "desc", 0, 0, Float.NaN, null); - store.add(sf2); + store.add(mrnaFeature); - SequenceFeature sf3 = new SequenceFeature("Pfam", "desc", 30, 40, + SequenceFeature pfamFeature = new SequenceFeature("Pfam", "desc", 30, 40, Float.NaN, null); - store.add(sf3); + store.add(pfamFeature); + /* + * "transcript" matches both itself and the sub-term "mRNA" + */ features = store.getFeaturesByOntology("transcript"); assertEquals(features.size(), 2); - assertTrue(features.contains(sf1)); - assertTrue(features.contains(sf2)); + assertTrue(features.contains(transcriptFeature)); + assertTrue(features.contains(mrnaFeature)); + /* + * "mRNA" matches itself but not parent term "transcript" + */ features = store.getFeaturesByOntology("mRNA"); assertEquals(features.size(), 1); - assertTrue(features.contains(sf2)); + assertTrue(features.contains(mrnaFeature)); + /* + * "pfam" is not an SO term but is included as an exact match + */ features = store.getFeaturesByOntology("mRNA", "Pfam"); assertEquals(features.size(), 2); - assertTrue(features.contains(sf2)); - assertTrue(features.contains(sf3)); + assertTrue(features.contains(mrnaFeature)); + assertTrue(features.contains(pfamFeature)); features = store.getFeaturesByOntology("sequence_variant"); assertTrue(features.isEmpty()); @@ -1040,7 +1051,7 @@ public class SequenceFeaturesTest @Test(groups = "Functional") public void testSortFeatures() { - List sfs = new ArrayList(); + List sfs = new ArrayList<>(); SequenceFeature sf1 = new SequenceFeature("Pfam", "desc", 30, 80, Float.NaN, null); sfs.add(sf1); diff --git a/test/jalview/ext/ensembl/EnsemblCdnaTest.java b/test/jalview/ext/ensembl/EnsemblCdnaTest.java index 779962c..c9d8deb 100644 --- a/test/jalview/ext/ensembl/EnsemblCdnaTest.java +++ b/test/jalview/ext/ensembl/EnsemblCdnaTest.java @@ -25,6 +25,7 @@ import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertTrue; +import jalview.datamodel.Sequence; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; @@ -241,37 +242,51 @@ public class EnsemblCdnaTest * accession id as parent */ @Test(groups = "Functional") - public void testIdentifiesSequence() + public void testGetIdentifyingFeatures() { String accId = "ABC123"; - EnsemblCdna testee = new EnsemblCdna(); + SequenceI seq = new Sequence(accId, "MKLNFRQIE"); - // exon with no parent not valid - SequenceFeature sf = new SequenceFeature("exon", "", 1, 2, 0f, null); - assertFalse(testee.identifiesSequence(sf, accId)); + // exon with no parent: not valid + SequenceFeature sf1 = new SequenceFeature("exon", "", 1, 2, 0f, null); + seq.addSequenceFeature(sf1); - // exon with wrong parent not valid - sf.setValue("Parent", "transcript:XYZ"); - assertFalse(testee.identifiesSequence(sf, accId)); + // exon with wrong parent: not valid + SequenceFeature sf2 = new SequenceFeature("exon", "", 1, 2, 0f, null); + sf2.setValue("Parent", "transcript:XYZ"); + seq.addSequenceFeature(sf2); // exon with right parent is valid - sf.setValue("Parent", "transcript:" + accId); - assertTrue(testee.identifiesSequence(sf, accId)); + SequenceFeature sf3 = new SequenceFeature("exon", "", 1, 2, 0f, null); + sf3.setValue("Parent", "transcript:" + accId); + seq.addSequenceFeature(sf3); // exon sub-type with right parent is valid - sf = new SequenceFeature("coding_exon", "", 1, 2, 0f, null); - sf.setValue("Parent", "transcript:" + accId); - assertTrue(testee.identifiesSequence(sf, accId)); + SequenceFeature sf4 = new SequenceFeature("coding_exon", "", 1, 2, 0f, + null); + sf4.setValue("Parent", "transcript:" + accId); + seq.addSequenceFeature(sf4); // transcript not valid: - sf = new SequenceFeature("transcript", "", 1, 2, 0f, null); - sf.setValue("Parent", "transcript:" + accId); - assertFalse(testee.identifiesSequence(sf, accId)); + SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f, + null); + sf5.setValue("Parent", "transcript:" + accId); + seq.addSequenceFeature(sf5); // CDS not valid: - sf = new SequenceFeature("CDS", "", 1, 2, 0f, null); - sf.setValue("Parent", "transcript:" + accId); - assertFalse(testee.identifiesSequence(sf, accId)); + SequenceFeature sf6 = new SequenceFeature("transcript", "", 1, 2, 0f, + null); + sf6.setValue("Parent", "transcript:" + accId); + seq.addSequenceFeature(sf6); + + List sfs = new EnsemblCdna() + .getIdentifyingFeatures(seq, accId); + assertFalse(sfs.contains(sf1)); + assertFalse(sfs.contains(sf2)); + assertTrue(sfs.contains(sf3)); + assertTrue(sfs.contains(sf4)); + assertFalse(sfs.contains(sf5)); + assertFalse(sfs.contains(sf6)); } @Test(groups = "Functional") diff --git a/test/jalview/ext/ensembl/EnsemblCdsTest.java b/test/jalview/ext/ensembl/EnsemblCdsTest.java index 8482c90..a44ab7f 100644 --- a/test/jalview/ext/ensembl/EnsemblCdsTest.java +++ b/test/jalview/ext/ensembl/EnsemblCdsTest.java @@ -24,6 +24,7 @@ import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertTrue; +import jalview.datamodel.Sequence; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; @@ -152,37 +153,50 @@ public class EnsemblCdsTest * accession id as parent */ @Test(groups = "Functional") - public void testIdentifiesSequence() + public void testGetIdentifyingFeatures() { String accId = "ABC123"; - EnsemblCds testee = new EnsemblCds(); + SequenceI seq = new Sequence(accId, "MKDONS"); // cds with no parent not valid - SequenceFeature sf = new SequenceFeature("CDS", "", 1, 2, 0f, null); - assertFalse(testee.identifiesSequence(sf, accId)); + SequenceFeature sf1 = new SequenceFeature("CDS", "", 1, 2, 0f, null); + seq.addSequenceFeature(sf1); // cds with wrong parent not valid - sf.setValue("Parent", "transcript:XYZ"); - assertFalse(testee.identifiesSequence(sf, accId)); + SequenceFeature sf2 = new SequenceFeature("CDS", "", 1, 2, 0f, null); + sf2.setValue("Parent", "transcript:XYZ"); + seq.addSequenceFeature(sf2); // cds with right parent is valid - sf.setValue("Parent", "transcript:" + accId); - assertTrue(testee.identifiesSequence(sf, accId)); + SequenceFeature sf3 = new SequenceFeature("CDS", "", 1, 2, 0f, null); + sf3.setValue("Parent", "transcript:" + accId); + seq.addSequenceFeature(sf3); // cds sub-type with right parent is valid - sf = new SequenceFeature("CDS_predicted", "", 1, 2, 0f, null); - sf.setValue("Parent", "transcript:" + accId); - assertTrue(testee.identifiesSequence(sf, accId)); + SequenceFeature sf4 = new SequenceFeature("CDS_predicted", "", 1, 2, 0f, + null); + sf4.setValue("Parent", "transcript:" + accId); + seq.addSequenceFeature(sf4); // transcript not valid: - sf = new SequenceFeature("transcript", "", 1, 2, 0f, null); - sf.setValue("Parent", "transcript:" + accId); - assertFalse(testee.identifiesSequence(sf, accId)); + SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f, + null); + sf5.setValue("Parent", "transcript:" + accId); + seq.addSequenceFeature(sf5); // exon not valid: - sf = new SequenceFeature("exon", "", 1, 2, 0f, null); - sf.setValue("Parent", "transcript:" + accId); - assertFalse(testee.identifiesSequence(sf, accId)); + SequenceFeature sf6 = new SequenceFeature("exon", "", 1, 2, 0f, null); + sf6.setValue("Parent", "transcript:" + accId); + seq.addSequenceFeature(sf6); + + List sfs = new EnsemblCds().getIdentifyingFeatures(seq, + accId); + assertFalse(sfs.contains(sf1)); + assertFalse(sfs.contains(sf2)); + assertTrue(sfs.contains(sf3)); + assertTrue(sfs.contains(sf4)); + assertFalse(sfs.contains(sf5)); + assertFalse(sfs.contains(sf6)); } @Test(groups = "Functional") diff --git a/test/jalview/ext/ensembl/EnsemblGeneTest.java b/test/jalview/ext/ensembl/EnsemblGeneTest.java index 217742d..446b4f7 100644 --- a/test/jalview/ext/ensembl/EnsemblGeneTest.java +++ b/test/jalview/ext/ensembl/EnsemblGeneTest.java @@ -26,6 +26,7 @@ import static org.testng.AssertJUnit.assertTrue; import jalview.api.FeatureSettingsModelI; import jalview.bin.Cache; +import jalview.datamodel.Sequence; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; @@ -77,17 +78,9 @@ public class EnsemblGeneTest genomic.setEnd(50000); String geneId = "ABC123"; - // gene at (start+20000) length 501 - // should be ignored - the first 'gene' found defines the whole range - // (note features are found in position order, not addition order) - SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f, - null); - sf.setValue("ID", "gene:" + geneId); - sf.setStrand("+"); - genomic.addSequenceFeature(sf); - // gene at (start + 10500) length 101 - sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null); + SequenceFeature sf = new SequenceFeature("gene", "", 10500, 10600, 0f, + null); sf.setValue("ID", "gene:" + geneId); sf.setStrand("+"); genomic.addSequenceFeature(sf); @@ -117,17 +110,9 @@ public class EnsemblGeneTest genomic.setEnd(50000); String geneId = "ABC123"; - // gene at (start+20000) length 501 - // should be ignored - the first 'gene' found defines the whole range - // (real data would only have one such feature) - SequenceFeature sf = new SequenceFeature("ncRNA_gene", "", 20000, - 20500, 0f, null); - sf.setValue("ID", "gene:" + geneId); - sf.setStrand("-"); - genomic.addSequenceFeature(sf); - // gene at (start + 10500) length 101 - sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null); + SequenceFeature sf = new SequenceFeature("gene", "", 10500, 10600, 0f, + null); sf.setValue("ID", "gene:" + geneId); sf.setStrand("+"); genomic.addSequenceFeature(sf); @@ -240,40 +225,48 @@ public class EnsemblGeneTest * accession id as ID */ @Test(groups = "Functional") - public void testIdentifiesSequence() + public void testGetIdentifyingFeatures() { String accId = "ABC123"; - EnsemblGene testee = new EnsemblGene(); + SequenceI seq = new Sequence(accId, "HIBEES"); // gene with no ID not valid - SequenceFeature sf = new SequenceFeature("gene", "", 1, 2, 0f, null); - assertFalse(testee.identifiesSequence(sf, accId)); + SequenceFeature sf1 = new SequenceFeature("gene", "", 1, 2, 0f, null); + seq.addSequenceFeature(sf1); // gene with wrong ID not valid - sf.setValue("ID", "gene:XYZ"); - assertFalse(testee.identifiesSequence(sf, accId)); + SequenceFeature sf2 = new SequenceFeature("gene", "", 1, 2, 0f, null); + sf2.setValue("ID", "gene:XYZ"); + seq.addSequenceFeature(sf2); // gene with right ID is valid - sf.setValue("ID", "gene:" + accId); - assertTrue(testee.identifiesSequence(sf, accId)); + SequenceFeature sf3 = new SequenceFeature("gene", "", 1, 2, 0f, null); + sf3.setValue("ID", "gene:" + accId); + seq.addSequenceFeature(sf3); // gene sub-type with right ID is valid - sf = new SequenceFeature("snRNA_gene", "", 1, 2, 0f, null); - sf.setValue("ID", "gene:" + accId); - assertTrue(testee.identifiesSequence(sf, accId)); - - // test is not case-sensitive - assertTrue(testee.identifiesSequence(sf, accId.toLowerCase())); + SequenceFeature sf4 = new SequenceFeature("snRNA_gene", "", 1, 2, 0f, null); + sf4.setValue("ID", "gene:" + accId); + seq.addSequenceFeature(sf4); // transcript not valid: - sf = new SequenceFeature("transcript", "", 1, 2, 0f, null); - sf.setValue("ID", "gene:" + accId); - assertFalse(testee.identifiesSequence(sf, accId)); + SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f, null); + sf5.setValue("ID", "gene:" + accId); + seq.addSequenceFeature(sf5); // exon not valid: - sf = new SequenceFeature("exon", "", 1, 2, 0f, null); - sf.setValue("ID", "gene:" + accId); - assertFalse(testee.identifiesSequence(sf, accId)); + SequenceFeature sf6 = new SequenceFeature("exon", "", 1, 2, 0f, null); + sf6.setValue("ID", "gene:" + accId); + seq.addSequenceFeature(sf6); + + List sfs = new EnsemblGene() + .getIdentifyingFeatures(seq, accId); + assertFalse(sfs.contains(sf1)); + assertFalse(sfs.contains(sf2)); + assertTrue(sfs.contains(sf3)); + assertTrue(sfs.contains(sf4)); + assertFalse(sfs.contains(sf5)); + assertFalse(sfs.contains(sf6)); } /** diff --git a/test/jalview/ext/ensembl/EnsemblGenomeTest.java b/test/jalview/ext/ensembl/EnsemblGenomeTest.java index 8687da9..72ee492 100644 --- a/test/jalview/ext/ensembl/EnsemblGenomeTest.java +++ b/test/jalview/ext/ensembl/EnsemblGenomeTest.java @@ -24,6 +24,7 @@ import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertTrue; +import jalview.datamodel.Sequence; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; @@ -162,43 +163,58 @@ public class EnsemblGenomeTest * accession id as ID */ @Test(groups = "Functional") - public void testIdentifiesSequence() + public void testGetIdentifyingFeatures() { String accId = "ABC123"; - EnsemblGenome testee = new EnsemblGenome(); + SequenceI seq = new Sequence(accId, "HEARTS"); // transcript with no ID not valid - SequenceFeature sf = new SequenceFeature("transcript", "", 1, 2, 0f, + SequenceFeature sf1 = new SequenceFeature("transcript", "", 1, 2, 0f, null); - assertFalse(testee.identifiesSequence(sf, accId)); + seq.addSequenceFeature(sf1); // transcript with wrong ID not valid - sf.setValue("ID", "transcript"); - assertFalse(testee.identifiesSequence(sf, accId)); + SequenceFeature sf2 = new SequenceFeature("transcript", "", 1, 2, 0f, + null); + sf2.setValue("ID", "transcript"); + seq.addSequenceFeature(sf2); // transcript with right ID is valid - sf.setValue("ID", "transcript:" + accId); - assertTrue(testee.identifiesSequence(sf, accId)); + SequenceFeature sf3 = new SequenceFeature("transcript", "", 1, 2, 0f, + null); + sf3.setValue("ID", "transcript:" + accId); + seq.addSequenceFeature(sf3); // transcript sub-type with right ID is valid - sf = new SequenceFeature("ncRNA", "", 1, 2, 0f, null); - sf.setValue("ID", "transcript:" + accId); - assertTrue(testee.identifiesSequence(sf, accId)); + SequenceFeature sf4 = new SequenceFeature("ncRNA", "", 1, 2, 0f, null); + sf4.setValue("ID", "transcript:" + accId); + seq.addSequenceFeature(sf4); // Ensembl treats NMD_transcript_variant as if a transcript - sf = new SequenceFeature("NMD_transcript_variant", "", 1, 2, 0f, null); - sf.setValue("ID", "transcript:" + accId); - assertTrue(testee.identifiesSequence(sf, accId)); + SequenceFeature sf5 = new SequenceFeature("NMD_transcript_variant", "", + 1, 2, 0f, null); + sf5.setValue("ID", "transcript:" + accId); + seq.addSequenceFeature(sf5); // gene not valid: - sf = new SequenceFeature("gene", "", 1, 2, 0f, null); - sf.setValue("ID", "transcript:" + accId); - assertFalse(testee.identifiesSequence(sf, accId)); + SequenceFeature sf6 = new SequenceFeature("gene", "", 1, 2, 0f, null); + sf6.setValue("ID", "transcript:" + accId); + seq.addSequenceFeature(sf6); // exon not valid: - sf = new SequenceFeature("exon", "", 1, 2, 0f, null); - sf.setValue("ID", "transcript:" + accId); - assertFalse(testee.identifiesSequence(sf, accId)); + SequenceFeature sf7 = new SequenceFeature("exon", "", 1, 2, 0f, null); + sf7.setValue("ID", "transcript:" + accId); + seq.addSequenceFeature(sf7); + + List sfs = new EnsemblGenome() + .getIdentifyingFeatures(seq, accId); + assertFalse(sfs.contains(sf1)); + assertFalse(sfs.contains(sf2)); + assertTrue(sfs.contains(sf3)); + assertTrue(sfs.contains(sf4)); + assertTrue(sfs.contains(sf5)); + assertFalse(sfs.contains(sf6)); + assertFalse(sfs.contains(sf7)); } } diff --git a/test/jalview/ext/ensembl/EnsemblSeqProxyAdapter.java b/test/jalview/ext/ensembl/EnsemblSeqProxyAdapter.java index 9fad30e..be7bdf2 100644 --- a/test/jalview/ext/ensembl/EnsemblSeqProxyAdapter.java +++ b/test/jalview/ext/ensembl/EnsemblSeqProxyAdapter.java @@ -21,6 +21,10 @@ package jalview.ext.ensembl; import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; + +import java.util.ArrayList; +import java.util.List; /** * A convenience class to simplify writing unit tests (pending Mockito or @@ -65,9 +69,10 @@ public class EnsemblSeqProxyAdapter extends EnsemblSeqProxy } @Override - protected boolean identifiesSequence(SequenceFeature sf, String accId) + protected List getIdentifyingFeatures(SequenceI seq, + String accId) { - return false; + return new ArrayList<>(); } }