JAL-3076 refactor for more efficient scan of 'gene' features
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 10 Aug 2018 10:58:00 +0000 (11:58 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 10 Aug 2018 10:58:00 +0000 (11:58 +0100)
14 files changed:
src/jalview/datamodel/features/SequenceFeatures.java
src/jalview/datamodel/features/SequenceFeaturesI.java
src/jalview/ext/ensembl/EnsemblCdna.java
src/jalview/ext/ensembl/EnsemblCds.java
src/jalview/ext/ensembl/EnsemblGene.java
src/jalview/ext/ensembl/EnsemblGenome.java
src/jalview/ext/ensembl/EnsemblProtein.java
src/jalview/ext/ensembl/EnsemblSeqProxy.java
test/jalview/datamodel/features/SequenceFeaturesTest.java
test/jalview/ext/ensembl/EnsemblCdnaTest.java
test/jalview/ext/ensembl/EnsemblCdsTest.java
test/jalview/ext/ensembl/EnsemblGeneTest.java
test/jalview/ext/ensembl/EnsemblGenomeTest.java
test/jalview/ext/ensembl/EnsemblSeqProxyAdapter.java

index fcf1b53..727d3ef 100644 (file)
@@ -87,7 +87,7 @@ public class SequenceFeatures implements SequenceFeaturesI
      */
     // featureStore = Collections
     // .synchronizedSortedMap(new TreeMap<String, FeatureStore>());
-    featureStore = new TreeMap<String, FeatureStore>();
+    featureStore = new TreeMap<>();
   }
 
   /**
@@ -382,9 +382,10 @@ public class SequenceFeatures implements SequenceFeaturesI
   }
 
   /**
-   * Answers true if the given type is one of the specified sequence ontology
-   * terms (or a sub-type of one), or if no terms are supplied. Answers false if
-   * filter terms are specified and the given term does not match any of them.
+   * Answers true if the given type matches one of the specified terms (or is a
+   * sub-type of one in the Sequence Ontology), or if no terms are supplied.
+   * Answers false if filter terms are specified and the given term does not
+   * match any of them.
    * 
    * @param type
    * @param soTerm
@@ -399,7 +400,7 @@ public class SequenceFeatures implements SequenceFeaturesI
     SequenceOntologyI so = SequenceOntologyFactory.getInstance();
     for (String term : soTerm)
     {
-      if (so.isA(type, term))
+      if (type.equals(term) || so.isA(type, term))
       {
         return true;
       }
index 80c4f9a..31712b9 100644 (file)
@@ -82,9 +82,9 @@ public interface SequenceFeaturesI
           String group, String... type);
 
   /**
-   * Answers a list of all features stored, whose type either matches one of the
-   * given ontology terms, or is a specialisation of a term in the Sequence
-   * Ontology. Results are returned in no particular guaranteed order.
+   * Answers a list of all features stored, whose type either matches, or is a
+   * specialisation (in the Sequence Ontology) of, one of the given terms.
+   * Results are returned in no particular order.
    * 
    * @param ontologyTerm
    * @return
index 952f01e..7384327 100644 (file)
 package jalview.ext.ensembl;
 
 import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
 import jalview.io.gff.SequenceOntologyFactory;
 import jalview.io.gff.SequenceOntologyI;
 
+import java.util.ArrayList;
+import java.util.List;
+
 import com.stevesoft.pat.Regex;
 
 /**
@@ -109,23 +113,27 @@ public class EnsemblCdna extends EnsemblSeqProxy
   }
 
   /**
-   * Answers true if the sequence feature type is 'exon' (or a subtype of exon
-   * in the Sequence Ontology), and the Parent of the feature is the transcript
-   * we are retrieving
+   * Answers a list of sequence features (if any) whose type is 'exon' (or a
+   * subtype of exon in the Sequence Ontology), and whose Parent is the
+   * transcript we are retrieving
    */
   @Override
-  protected boolean identifiesSequence(SequenceFeature sf, String accId)
+  protected List<SequenceFeature> getIdentifyingFeatures(SequenceI seq,
+          String accId)
   {
-    if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
-            SequenceOntologyI.EXON))
+    List<SequenceFeature> result = new ArrayList<>();
+    List<SequenceFeature> sfs = seq.getFeatures()
+            .getFeaturesByOntology(SequenceOntologyI.EXON);
+    for (SequenceFeature sf : sfs)
     {
       String parentFeature = (String) sf.getValue(PARENT);
       if (("transcript:" + accId).equals(parentFeature))
       {
-        return true;
+        result.add(sf);
       }
     }
-    return false;
+
+    return result;
   }
 
   /**
index 8b2550d..8a71b64 100644 (file)
@@ -102,23 +102,26 @@ public class EnsemblCds extends EnsemblSeqProxy
   }
 
   /**
-   * Answers true if the sequence feature type is 'CDS' (or a subtype of CDS in
-   * the Sequence Ontology), and the Parent of the feature is the transcript we
-   * are retrieving
+   * Answers a list of sequence features (if any) whose type is 'CDS' (or a
+   * subtype of CDS in the Sequence Ontology), and whose Parent is the
+   * transcript we are retrieving
    */
   @Override
-  protected boolean identifiesSequence(SequenceFeature sf, String accId)
+  protected List<SequenceFeature> getIdentifyingFeatures(SequenceI seq,
+          String accId)
   {
-    if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
-            SequenceOntologyI.CDS))
+    List<SequenceFeature> result = new ArrayList<>();
+    List<SequenceFeature> sfs = seq.getFeatures()
+            .getFeaturesByOntology(SequenceOntologyI.CDS);
+    for (SequenceFeature sf : sfs)
     {
       String parentFeature = (String) sf.getValue(PARENT);
       if (("transcript:" + accId).equals(parentFeature))
       {
-        return true;
+        result.add(sf);
       }
     }
-    return false;
+    return result;
   }
 
   /**
@@ -130,7 +133,7 @@ public class EnsemblCds extends EnsemblSeqProxy
   protected List<int[]> getCdsRanges(SequenceI dnaSeq)
   {
     int len = dnaSeq.getLength();
-    List<int[]> ranges = new ArrayList<int[]>();
+    List<int[]> ranges = new ArrayList<>();
     ranges.add(new int[] { 1, len });
     return ranges;
   }
index 7e6f653..36b19e2 100644 (file)
@@ -548,23 +548,27 @@ public class EnsemblGene extends EnsemblSeqProxy
   }
 
   /**
-   * Answers true for a feature of type 'gene' (or a sub-type of gene in the
-   * Sequence Ontology), whose ID is the accession we are retrieving
+   * Answers a list of sequence features (if any) whose type is 'gene' (or a
+   * subtype of gene in the Sequence Ontology), and whose ID is the accession we
+   * are retrieving
    */
   @Override
-  protected boolean identifiesSequence(SequenceFeature sf, String accId)
+  protected List<SequenceFeature> getIdentifyingFeatures(SequenceI seq,
+          String accId)
   {
-    if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
-            SequenceOntologyI.GENE))
+    List<SequenceFeature> result = new ArrayList<>();
+    List<SequenceFeature> sfs = seq.getFeatures()
+            .getFeaturesByOntology(SequenceOntologyI.GENE);
+    for (SequenceFeature sf : sfs)
     {
       // NB features as gff use 'ID'; rest services return as 'id'
       String id = (String) sf.getValue("ID");
       if ((GENE_PREFIX + accId).equalsIgnoreCase(id))
       {
-        return true;
+        result.add(sf);
       }
     }
-    return false;
+    return result;
   }
 
   /**
@@ -595,17 +599,6 @@ public class EnsemblGene extends EnsemblSeqProxy
   }
 
   /**
-   * Answers false. This allows an optimisation - a single 'gene' feature is all
-   * that is needed to identify the positions of the gene on the genomic
-   * sequence.
-   */
-  @Override
-  protected boolean isSpliceable()
-  {
-    return false;
-  }
-
-  /**
    * Override to do nothing as Ensembl doesn't return a protein sequence for a
    * gene identifier
    */
index bde3c0f..6684e20 100644 (file)
 package jalview.ext.ensembl;
 
 import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntologyI;
+
+import java.util.ArrayList;
+import java.util.List;
 
 /**
  * A client to fetch genomic sequence from Ensembl
@@ -94,22 +99,32 @@ public class EnsemblGenome extends EnsemblSeqProxy
   }
 
   /**
-   * Answers true if the sequence feature type is 'transcript' (or a subtype of
-   * transcript in the Sequence Ontology), and the ID of the feature is the
-   * transcript we are retrieving
+   * Answers a list of sequence features (if any) whose type is 'transcript' (or
+   * a subtype of transcript in the Sequence Ontology), and whose ID is the
+   * accession we are retrieving.
+   * <p>
+   * Note we also include features of type "NMD_transcript_variant", although
+   * not strictly 'transcript' in the SO, as they used in Ensembl as if they
+   * were.
    */
   @Override
-  protected boolean identifiesSequence(SequenceFeature sf, String accId)
+  protected List<SequenceFeature> getIdentifyingFeatures(SequenceI seq,
+          String accId)
   {
-    if (isTranscript(sf.getType()))
+    List<SequenceFeature> result = new ArrayList<>();
+    List<SequenceFeature> sfs = seq.getFeatures().getFeaturesByOntology(
+            SequenceOntologyI.TRANSCRIPT,
+            SequenceOntologyI.NMD_TRANSCRIPT_VARIANT);
+    for (SequenceFeature sf : sfs)
     {
+      // NB features as gff use 'ID'; rest services return as 'id'
       String id = (String) sf.getValue("ID");
       if (("transcript:" + accId).equals(id))
       {
-        return true;
+        result.add(sf);
       }
     }
-    return false;
+    return result;
   }
 
 }
index 99006aa..0280f16 100644 (file)
@@ -22,6 +22,10 @@ package jalview.ext.ensembl;
 
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+
+import java.util.ArrayList;
+import java.util.List;
 
 import com.stevesoft.pat.Regex;
 
@@ -106,10 +110,10 @@ public class EnsemblProtein extends EnsemblSeqProxy
   }
 
   @Override
-  protected boolean identifiesSequence(SequenceFeature sf, String accId)
+  protected List<SequenceFeature> getIdentifyingFeatures(SequenceI seq,
+          String accId)
   {
-    // not applicable - protein sequence is not a 'subset' of genomic sequence
-    return false;
+    return new ArrayList<>();
   }
 
   @Override
index f96f1d5..19065f2 100644 (file)
@@ -581,8 +581,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
   protected MapList getGenomicRangesFromFeatures(SequenceI sourceSequence,
           String accId, int start)
   {
-    List<SequenceFeature> sfs = sourceSequence.getFeatures()
-            .getPositionalFeatures();
+    List<SequenceFeature> sfs = getIdentifyingFeatures(sourceSequence,
+            accId);
     if (sfs.isEmpty())
     {
       return null;
@@ -599,47 +599,31 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
 
     for (SequenceFeature sf : sfs)
     {
+      int strand = sf.getStrand();
+      strand = strand == 0 ? 1 : strand; // treat unknown as forward
+
+      if (directionSet && strand != direction)
+      {
+        // abort - mix of forward and backward
+        System.err
+                .println("Error: forward and backward strand for " + accId);
+        return null;
+      }
+      direction = strand;
+      directionSet = true;
+
       /*
-       * accept the target feature type or a specialisation of it
-       * (e.g. coding_exon for exon)
+       * add to CDS ranges, semi-sorted forwards/backwards
        */
-      if (identifiesSequence(sf, accId))
+      if (strand < 0)
       {
-        int strand = sf.getStrand();
-        strand = strand == 0 ? 1 : strand; // treat unknown as forward
-
-        if (directionSet && strand != direction)
-        {
-          // abort - mix of forward and backward
-          System.err.println(
-                  "Error: forward and backward strand for " + accId);
-          return null;
-        }
-        direction = strand;
-        directionSet = true;
-
-        /*
-         * add to CDS ranges, semi-sorted forwards/backwards
-         */
-        if (strand < 0)
-        {
-          regions.add(0, new int[] { sf.getEnd(), sf.getBegin() });
-        }
-        else
-        {
-          regions.add(new int[] { sf.getBegin(), sf.getEnd() });
-        }
-        mappedLength += Math.abs(sf.getEnd() - sf.getBegin() + 1);
-
-        if (!isSpliceable())
-        {
-          /*
-           * 'gene' sequence is contiguous so we can stop as soon as its
-           * identifying feature has been found
-           */
-          break;
-        }
+        regions.add(0, new int[] { sf.getEnd(), sf.getBegin() });
       }
+      else
+      {
+        regions.add(new int[] { sf.getBegin(), sf.getEnd() });
+      }
+      mappedLength += Math.abs(sf.getEnd() - sf.getBegin() + 1);
     }
 
     if (regions.isEmpty())
@@ -664,28 +648,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
   }
 
   /**
-   * Answers true if the sequence being retrieved may occupy discontiguous
-   * regions on the genomic sequence.
-   */
-  protected boolean isSpliceable()
-  {
-    return true;
-  }
-
-  /**
-   * Returns true if the sequence feature marks positions of the genomic
-   * sequence feature which are within the sequence being retrieved. For
-   * example, an 'exon' feature whose parent is the target transcript marks the
-   * cdna positions of the transcript.
-   * 
-   * @param sf
-   * @param accId
-   * @return
-   */
-  protected abstract boolean identifiesSequence(SequenceFeature sf,
-          String accId);
-
-  /**
    * Answers a list of sequence features that mark positions of the genomic
    * sequence feature which are within the sequence being retrieved. For
    * example, an 'exon' feature whose parent is the target transcript marks the
index 39d6dce..32987b0 100644 (file)
@@ -13,10 +13,10 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import junit.extensions.PA;
-
 import org.testng.annotations.Test;
 
+import junit.extensions.PA;
+
 public class SequenceFeaturesTest
 {
   @Test(groups = "Functional")
@@ -1005,33 +1005,44 @@ public class SequenceFeaturesTest
     assertTrue(store.getFeaturesByOntology(new String[] {}).isEmpty());
     assertTrue(store.getFeaturesByOntology((String[]) null).isEmpty());
   
-    SequenceFeature sf1 = new SequenceFeature("transcript", "desc", 10, 20,
+    SequenceFeature transcriptFeature = new SequenceFeature("transcript", "desc", 10, 20,
             Float.NaN, null);
-    store.add(sf1);
+    store.add(transcriptFeature);
 
-    // mRNA isA transcript; added here 'as if' non-positional
-    // just to show that non-positional features are included in results
-    SequenceFeature sf2 = new SequenceFeature("mRNA", "desc", 0, 0,
+    /*
+     * mRNA is a sub-type of transcript; added here 'as if' non-positional
+     * just to show that non-positional features are included in results
+     */
+    SequenceFeature mrnaFeature = new SequenceFeature("mRNA", "desc", 0, 0,
             Float.NaN, null);
-    store.add(sf2);
+    store.add(mrnaFeature);
 
-    SequenceFeature sf3 = new SequenceFeature("Pfam", "desc", 30, 40,
+    SequenceFeature pfamFeature = new SequenceFeature("Pfam", "desc", 30, 40,
             Float.NaN, null);
-    store.add(sf3);
+    store.add(pfamFeature);
 
+    /*
+     * "transcript" matches both itself and the sub-term "mRNA"
+     */
     features = store.getFeaturesByOntology("transcript");
     assertEquals(features.size(), 2);
-    assertTrue(features.contains(sf1));
-    assertTrue(features.contains(sf2));
+    assertTrue(features.contains(transcriptFeature));
+    assertTrue(features.contains(mrnaFeature));
 
+    /*
+     * "mRNA" matches itself but not parent term "transcript"
+     */
     features = store.getFeaturesByOntology("mRNA");
     assertEquals(features.size(), 1);
-    assertTrue(features.contains(sf2));
+    assertTrue(features.contains(mrnaFeature));
 
+    /*
+     * "pfam" is not an SO term but is included as an exact match
+     */
     features = store.getFeaturesByOntology("mRNA", "Pfam");
     assertEquals(features.size(), 2);
-    assertTrue(features.contains(sf2));
-    assertTrue(features.contains(sf3));
+    assertTrue(features.contains(mrnaFeature));
+    assertTrue(features.contains(pfamFeature));
 
     features = store.getFeaturesByOntology("sequence_variant");
     assertTrue(features.isEmpty());
@@ -1040,7 +1051,7 @@ public class SequenceFeaturesTest
   @Test(groups = "Functional")
   public void testSortFeatures()
   {
-    List<SequenceFeature> sfs = new ArrayList<SequenceFeature>();
+    List<SequenceFeature> sfs = new ArrayList<>();
     SequenceFeature sf1 = new SequenceFeature("Pfam", "desc", 30, 80,
             Float.NaN, null);
     sfs.add(sf1);
index 779962c..c9d8deb 100644 (file)
@@ -25,6 +25,7 @@ import static org.testng.AssertJUnit.assertFalse;
 import static org.testng.AssertJUnit.assertNull;
 import static org.testng.AssertJUnit.assertTrue;
 
+import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceDummy;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
@@ -241,37 +242,51 @@ public class EnsemblCdnaTest
    * accession id as parent
    */
   @Test(groups = "Functional")
-  public void testIdentifiesSequence()
+  public void testGetIdentifyingFeatures()
   {
     String accId = "ABC123";
-    EnsemblCdna testee = new EnsemblCdna();
+    SequenceI seq = new Sequence(accId, "MKLNFRQIE");
 
-    // exon with no parent not valid
-    SequenceFeature sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
-    assertFalse(testee.identifiesSequence(sf, accId));
+    // exon with no parent: not valid
+    SequenceFeature sf1 = new SequenceFeature("exon", "", 1, 2, 0f, null);
+    seq.addSequenceFeature(sf1);
 
-    // exon with wrong parent not valid
-    sf.setValue("Parent", "transcript:XYZ");
-    assertFalse(testee.identifiesSequence(sf, accId));
+    // exon with wrong parent: not valid
+    SequenceFeature sf2 = new SequenceFeature("exon", "", 1, 2, 0f, null);
+    sf2.setValue("Parent", "transcript:XYZ");
+    seq.addSequenceFeature(sf2);
 
     // exon with right parent is valid
-    sf.setValue("Parent", "transcript:" + accId);
-    assertTrue(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf3 = new SequenceFeature("exon", "", 1, 2, 0f, null);
+    sf3.setValue("Parent", "transcript:" + accId);
+    seq.addSequenceFeature(sf3);
 
     // exon sub-type with right parent is valid
-    sf = new SequenceFeature("coding_exon", "", 1, 2, 0f, null);
-    sf.setValue("Parent", "transcript:" + accId);
-    assertTrue(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf4 = new SequenceFeature("coding_exon", "", 1, 2, 0f,
+            null);
+    sf4.setValue("Parent", "transcript:" + accId);
+    seq.addSequenceFeature(sf4);
 
     // transcript not valid:
-    sf = new SequenceFeature("transcript", "", 1, 2, 0f, null);
-    sf.setValue("Parent", "transcript:" + accId);
-    assertFalse(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f,
+            null);
+    sf5.setValue("Parent", "transcript:" + accId);
+    seq.addSequenceFeature(sf5);
 
     // CDS not valid:
-    sf = new SequenceFeature("CDS", "", 1, 2, 0f, null);
-    sf.setValue("Parent", "transcript:" + accId);
-    assertFalse(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf6 = new SequenceFeature("transcript", "", 1, 2, 0f,
+            null);
+    sf6.setValue("Parent", "transcript:" + accId);
+    seq.addSequenceFeature(sf6);
+
+    List<SequenceFeature> sfs = new EnsemblCdna()
+            .getIdentifyingFeatures(seq, accId);
+    assertFalse(sfs.contains(sf1));
+    assertFalse(sfs.contains(sf2));
+    assertTrue(sfs.contains(sf3));
+    assertTrue(sfs.contains(sf4));
+    assertFalse(sfs.contains(sf5));
+    assertFalse(sfs.contains(sf6));
   }
 
   @Test(groups = "Functional")
index 8482c90..a44ab7f 100644 (file)
@@ -24,6 +24,7 @@ import static org.testng.AssertJUnit.assertEquals;
 import static org.testng.AssertJUnit.assertFalse;
 import static org.testng.AssertJUnit.assertTrue;
 
+import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceDummy;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
@@ -152,37 +153,50 @@ public class EnsemblCdsTest
    * accession id as parent
    */
   @Test(groups = "Functional")
-  public void testIdentifiesSequence()
+  public void testGetIdentifyingFeatures()
   {
     String accId = "ABC123";
-    EnsemblCds testee = new EnsemblCds();
+    SequenceI seq = new Sequence(accId, "MKDONS");
 
     // cds with no parent not valid
-    SequenceFeature sf = new SequenceFeature("CDS", "", 1, 2, 0f, null);
-    assertFalse(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf1 = new SequenceFeature("CDS", "", 1, 2, 0f, null);
+    seq.addSequenceFeature(sf1);
 
     // cds with wrong parent not valid
-    sf.setValue("Parent", "transcript:XYZ");
-    assertFalse(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf2 = new SequenceFeature("CDS", "", 1, 2, 0f, null);
+    sf2.setValue("Parent", "transcript:XYZ");
+    seq.addSequenceFeature(sf2);
 
     // cds with right parent is valid
-    sf.setValue("Parent", "transcript:" + accId);
-    assertTrue(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf3 = new SequenceFeature("CDS", "", 1, 2, 0f, null);
+    sf3.setValue("Parent", "transcript:" + accId);
+    seq.addSequenceFeature(sf3);
 
     // cds sub-type with right parent is valid
-    sf = new SequenceFeature("CDS_predicted", "", 1, 2, 0f, null);
-    sf.setValue("Parent", "transcript:" + accId);
-    assertTrue(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf4 = new SequenceFeature("CDS_predicted", "", 1, 2, 0f,
+            null);
+    sf4.setValue("Parent", "transcript:" + accId);
+    seq.addSequenceFeature(sf4);
 
     // transcript not valid:
-    sf = new SequenceFeature("transcript", "", 1, 2, 0f, null);
-    sf.setValue("Parent", "transcript:" + accId);
-    assertFalse(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f,
+            null);
+    sf5.setValue("Parent", "transcript:" + accId);
+    seq.addSequenceFeature(sf5);
 
     // exon not valid:
-    sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
-    sf.setValue("Parent", "transcript:" + accId);
-    assertFalse(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf6 = new SequenceFeature("exon", "", 1, 2, 0f, null);
+    sf6.setValue("Parent", "transcript:" + accId);
+    seq.addSequenceFeature(sf6);
+
+    List<SequenceFeature> sfs = new EnsemblCds().getIdentifyingFeatures(seq,
+            accId);
+    assertFalse(sfs.contains(sf1));
+    assertFalse(sfs.contains(sf2));
+    assertTrue(sfs.contains(sf3));
+    assertTrue(sfs.contains(sf4));
+    assertFalse(sfs.contains(sf5));
+    assertFalse(sfs.contains(sf6));
   }
 
   @Test(groups = "Functional")
index 217742d..446b4f7 100644 (file)
@@ -26,6 +26,7 @@ import static org.testng.AssertJUnit.assertTrue;
 
 import jalview.api.FeatureSettingsModelI;
 import jalview.bin.Cache;
+import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceDummy;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
@@ -77,17 +78,9 @@ public class EnsemblGeneTest
     genomic.setEnd(50000);
     String geneId = "ABC123";
 
-    // gene at (start+20000) length 501
-    // should be ignored - the first 'gene' found defines the whole range
-    // (note features are found in position order, not addition order)
-    SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
-            null);
-    sf.setValue("ID", "gene:" + geneId);
-    sf.setStrand("+");
-    genomic.addSequenceFeature(sf);
-
     // gene at (start + 10500) length 101
-    sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
+    SequenceFeature sf = new SequenceFeature("gene", "", 10500, 10600, 0f,
+            null);
     sf.setValue("ID", "gene:" + geneId);
     sf.setStrand("+");
     genomic.addSequenceFeature(sf);
@@ -117,17 +110,9 @@ public class EnsemblGeneTest
     genomic.setEnd(50000);
     String geneId = "ABC123";
 
-    // gene at (start+20000) length 501
-    // should be ignored - the first 'gene' found defines the whole range
-    // (real data would only have one such feature)
-    SequenceFeature sf = new SequenceFeature("ncRNA_gene", "", 20000,
-            20500, 0f, null);
-    sf.setValue("ID", "gene:" + geneId);
-    sf.setStrand("-");
-    genomic.addSequenceFeature(sf);
-
     // gene at (start + 10500) length 101
-    sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
+    SequenceFeature sf = new SequenceFeature("gene", "", 10500, 10600, 0f,
+            null);
     sf.setValue("ID", "gene:" + geneId);
     sf.setStrand("+");
     genomic.addSequenceFeature(sf);
@@ -240,40 +225,48 @@ public class EnsemblGeneTest
    * accession id as ID
    */
   @Test(groups = "Functional")
-  public void testIdentifiesSequence()
+  public void testGetIdentifyingFeatures()
   {
     String accId = "ABC123";
-    EnsemblGene testee = new EnsemblGene();
+    SequenceI seq = new Sequence(accId, "HIBEES");
 
     // gene with no ID not valid
-    SequenceFeature sf = new SequenceFeature("gene", "", 1, 2, 0f, null);
-    assertFalse(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf1 = new SequenceFeature("gene", "", 1, 2, 0f, null);
+    seq.addSequenceFeature(sf1);
 
     // gene with wrong ID not valid
-    sf.setValue("ID", "gene:XYZ");
-    assertFalse(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf2 = new SequenceFeature("gene", "", 1, 2, 0f, null);
+    sf2.setValue("ID", "gene:XYZ");
+    seq.addSequenceFeature(sf2);
 
     // gene with right ID is valid
-    sf.setValue("ID", "gene:" + accId);
-    assertTrue(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf3 = new SequenceFeature("gene", "", 1, 2, 0f, null);
+    sf3.setValue("ID", "gene:" + accId);
+    seq.addSequenceFeature(sf3);
 
     // gene sub-type with right ID is valid
-    sf = new SequenceFeature("snRNA_gene", "", 1, 2, 0f, null);
-    sf.setValue("ID", "gene:" + accId);
-    assertTrue(testee.identifiesSequence(sf, accId));
-
-    // test is not case-sensitive
-    assertTrue(testee.identifiesSequence(sf, accId.toLowerCase()));
+    SequenceFeature sf4 = new SequenceFeature("snRNA_gene", "", 1, 2, 0f, null);
+    sf4.setValue("ID", "gene:" + accId);
+    seq.addSequenceFeature(sf4);
 
     // transcript not valid:
-    sf = new SequenceFeature("transcript", "", 1, 2, 0f, null);
-    sf.setValue("ID", "gene:" + accId);
-    assertFalse(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f, null);
+    sf5.setValue("ID", "gene:" + accId);
+    seq.addSequenceFeature(sf5);
 
     // exon not valid:
-    sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
-    sf.setValue("ID", "gene:" + accId);
-    assertFalse(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf6 = new SequenceFeature("exon", "", 1, 2, 0f, null);
+    sf6.setValue("ID", "gene:" + accId);
+    seq.addSequenceFeature(sf6);
+    
+    List<SequenceFeature> sfs = new EnsemblGene()
+            .getIdentifyingFeatures(seq, accId);
+    assertFalse(sfs.contains(sf1));
+    assertFalse(sfs.contains(sf2));
+    assertTrue(sfs.contains(sf3));
+    assertTrue(sfs.contains(sf4));
+    assertFalse(sfs.contains(sf5));
+    assertFalse(sfs.contains(sf6));
   }
 
   /**
index 8687da9..72ee492 100644 (file)
@@ -24,6 +24,7 @@ import static org.testng.AssertJUnit.assertEquals;
 import static org.testng.AssertJUnit.assertFalse;
 import static org.testng.AssertJUnit.assertTrue;
 
+import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceDummy;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
@@ -162,43 +163,58 @@ public class EnsemblGenomeTest
    * accession id as ID
    */
   @Test(groups = "Functional")
-  public void testIdentifiesSequence()
+  public void testGetIdentifyingFeatures()
   {
     String accId = "ABC123";
-    EnsemblGenome testee = new EnsemblGenome();
+    SequenceI seq = new Sequence(accId, "HEARTS");
 
     // transcript with no ID not valid
-    SequenceFeature sf = new SequenceFeature("transcript", "", 1, 2, 0f,
+    SequenceFeature sf1 = new SequenceFeature("transcript", "", 1, 2, 0f,
             null);
-    assertFalse(testee.identifiesSequence(sf, accId));
+    seq.addSequenceFeature(sf1);
 
     // transcript with wrong ID not valid
-    sf.setValue("ID", "transcript");
-    assertFalse(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf2 = new SequenceFeature("transcript", "", 1, 2, 0f,
+            null);
+    sf2.setValue("ID", "transcript");
+    seq.addSequenceFeature(sf2);
 
     // transcript with right ID is valid
-    sf.setValue("ID", "transcript:" + accId);
-    assertTrue(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf3 = new SequenceFeature("transcript", "", 1, 2, 0f,
+            null);
+    sf3.setValue("ID", "transcript:" + accId);
+    seq.addSequenceFeature(sf3);
 
     // transcript sub-type with right ID is valid
-    sf = new SequenceFeature("ncRNA", "", 1, 2, 0f, null);
-    sf.setValue("ID", "transcript:" + accId);
-    assertTrue(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf4 = new SequenceFeature("ncRNA", "", 1, 2, 0f, null);
+    sf4.setValue("ID", "transcript:" + accId);
+    seq.addSequenceFeature(sf4);
 
     // Ensembl treats NMD_transcript_variant as if a transcript
-    sf = new SequenceFeature("NMD_transcript_variant", "", 1, 2, 0f, null);
-    sf.setValue("ID", "transcript:" + accId);
-    assertTrue(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf5 = new SequenceFeature("NMD_transcript_variant", "",
+            1, 2, 0f, null);
+    sf5.setValue("ID", "transcript:" + accId);
+    seq.addSequenceFeature(sf5);
 
     // gene not valid:
-    sf = new SequenceFeature("gene", "", 1, 2, 0f, null);
-    sf.setValue("ID", "transcript:" + accId);
-    assertFalse(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf6 = new SequenceFeature("gene", "", 1, 2, 0f, null);
+    sf6.setValue("ID", "transcript:" + accId);
+    seq.addSequenceFeature(sf6);
 
     // exon not valid:
-    sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
-    sf.setValue("ID", "transcript:" + accId);
-    assertFalse(testee.identifiesSequence(sf, accId));
+    SequenceFeature sf7 = new SequenceFeature("exon", "", 1, 2, 0f, null);
+    sf7.setValue("ID", "transcript:" + accId);
+    seq.addSequenceFeature(sf7);
+
+    List<SequenceFeature> sfs = new EnsemblGenome()
+            .getIdentifyingFeatures(seq, accId);
+    assertFalse(sfs.contains(sf1));
+    assertFalse(sfs.contains(sf2));
+    assertTrue(sfs.contains(sf3));
+    assertTrue(sfs.contains(sf4));
+    assertTrue(sfs.contains(sf5));
+    assertFalse(sfs.contains(sf6));
+    assertFalse(sfs.contains(sf7));
   }
 
 }
index 9fad30e..be7bdf2 100644 (file)
 package jalview.ext.ensembl;
 
 import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+
+import java.util.ArrayList;
+import java.util.List;
 
 /**
  * A convenience class to simplify writing unit tests (pending Mockito or
@@ -65,9 +69,10 @@ public class EnsemblSeqProxyAdapter extends EnsemblSeqProxy
   }
 
   @Override
-  protected boolean identifiesSequence(SequenceFeature sf, String accId)
+  protected List<SequenceFeature> getIdentifyingFeatures(SequenceI seq,
+          String accId)
   {
-    return false;
+    return new ArrayList<>();
   }
 
 }