JAL-2525 get sequence features for Ontology term(s), tidy feature sort

author gmungoc <g.m.carstairs@dundee.ac.uk>

Fri, 12 May 2017 15:00:20 +0000 (16:00 +0100)

committer gmungoc <g.m.carstairs@dundee.ac.uk>

Fri, 12 May 2017 15:00:20 +0000 (16:00 +0100)
author gmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 12 May 2017 15:00:20 +0000 (16:00 +0100)
committer gmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 12 May 2017 15:00:20 +0000 (16:00 +0100)
diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java

index 955de28..7b867ac 100644 (file)
--- a/src/jalview/analysis/AlignmentUtils.java
+++ b/src/jalview/analysis/AlignmentUtils.java
@@ -35,14 +35,14 @@ import jalview.datamodel.Sequence;
  import jalview.datamodel.SequenceFeature;
  import jalview.datamodel.SequenceGroup;
  import jalview.datamodel.SequenceI;
-import jalview.io.gff.SequenceOntologyFactory;
+import jalview.datamodel.features.SequenceFeatures;
  import jalview.io.gff.SequenceOntologyI;
  import jalview.schemes.ResidueProperties;
  import jalview.util.Comparison;
  import jalview.util.DBRefUtils;
+import jalview.util.IntRangeComparator;
  import jalview.util.MapList;
  import jalview.util.MappingUtils;
-import jalview.util.RangeComparator;
  import jalview.util.StringUtils;
  
  import java.io.UnsupportedEncodingException;
@@ -51,7 +51,6 @@ import java.util.ArrayList;
  import java.util.Arrays;
  import java.util.Collection;
  import java.util.Collections;
-import java.util.Comparator;
  import java.util.HashMap;
  import java.util.HashSet;
  import java.util.Iterator;
@@ -2055,11 +2054,11 @@ public class AlignmentUtils
     * 
     * @param fromSeq
     * @param toSeq
+   * @param mapping
+   *          the mapping from 'fromSeq' to 'toSeq'
     * @param select
     *          if not null, only features of this type are copied (including
     *          subtypes in the Sequence Ontology)
-   * @param mapping
-   *          the mapping from 'fromSeq' to 'toSeq'
     * @param omitting
     */
    public static int transferFeatures(SequenceI fromSeq, SequenceI toSeq,
@@ -2071,76 +2070,74 @@ public class AlignmentUtils
        copyTo = copyTo.getDatasetSequence();
      }
  
-    SequenceOntologyI so = SequenceOntologyFactory.getInstance();
+    /*
+     * get features, optionally restricted by an ontology term
+     */
+    List<SequenceFeature> sfs = select == null ? fromSeq.getFeatures()
+            .getPositionalFeatures() : fromSeq.getFeatures()
+            .getFeaturesByOntology(select);
+
      int count = 0;
-    SequenceFeature[] sfs = fromSeq.getSequenceFeatures();
-    if (sfs != null)
+    for (SequenceFeature sf : sfs)
      {
-      for (SequenceFeature sf : sfs)
+      String type = sf.getType();
+      boolean omit = false;
+      for (String toOmit : omitting)
        {
-        String type = sf.getType();
-        if (select != null && !so.isA(type, select))
+        if (type.equals(toOmit))
          {
-          continue;
-        }
-        boolean omit = false;
-        for (String toOmit : omitting)
-        {
-          if (type.equals(toOmit))
-          {
-            omit = true;
-          }
-        }
-        if (omit)
-        {
-          continue;
+          omit = true;
          }
+      }
+      if (omit)
+      {
+        continue;
+      }
  
-        /*
-         * locate the mapped range - null if either start or end is
-         * not mapped (no partial overlaps are calculated)
-         */
-        int start = sf.getBegin();
-        int end = sf.getEnd();
-        int[] mappedTo = mapping.locateInTo(start, end);
-        /*
-         * if whole exon range doesn't map, try interpreting it
-         * as 5' or 3' exon overlapping the CDS range
-         */
-        if (mappedTo == null)
-        {
-          mappedTo = mapping.locateInTo(end, end);
-          if (mappedTo != null)
-          {
-            /*
-             * end of exon is in CDS range - 5' overlap
-             * to a range from the start of the peptide
-             */
-            mappedTo[0] = 1;
-          }
-        }
-        if (mappedTo == null)
+      /*
+       * locate the mapped range - null if either start or end is
+       * not mapped (no partial overlaps are calculated)
+       */
+      int start = sf.getBegin();
+      int end = sf.getEnd();
+      int[] mappedTo = mapping.locateInTo(start, end);
+      /*
+       * if whole exon range doesn't map, try interpreting it
+       * as 5' or 3' exon overlapping the CDS range
+       */
+      if (mappedTo == null)
+      {
+        mappedTo = mapping.locateInTo(end, end);
+        if (mappedTo != null)
          {
-          mappedTo = mapping.locateInTo(start, start);
-          if (mappedTo != null)
-          {
-            /*
-             * start of exon is in CDS range - 3' overlap
-             * to a range up to the end of the peptide
-             */
-            mappedTo[1] = toSeq.getLength();
-          }
+          /*
+           * end of exon is in CDS range - 5' overlap
+           * to a range from the start of the peptide
+           */
+          mappedTo[0] = 1;
          }
+      }
+      if (mappedTo == null)
+      {
+        mappedTo = mapping.locateInTo(start, start);
          if (mappedTo != null)
          {
-          int newBegin = Math.min(mappedTo[0], mappedTo[1]);
-          int newEnd = Math.max(mappedTo[0], mappedTo[1]);
-          SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd,
-                  sf.getFeatureGroup());
-          copyTo.addSequenceFeature(copy);
-          count++;
+          /*
+           * start of exon is in CDS range - 3' overlap
+           * to a range up to the end of the peptide
+           */
+          mappedTo[1] = toSeq.getLength();
          }
        }
+      if (mappedTo != null)
+      {
+        int newBegin = Math.min(mappedTo[0], mappedTo[1]);
+        int newEnd = Math.max(mappedTo[0], mappedTo[1]);
+        SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd,
+                sf.getFeatureGroup());
+        copyTo.addSequenceFeature(copy);
+        count++;
+      }
      }
      return count;
    }
@@ -2205,49 +2202,44 @@ public class AlignmentUtils
    public static List<int[]> findCdsPositions(SequenceI dnaSeq)
    {
      List<int[]> result = new ArrayList<int[]>();
-    SequenceFeature[] sfs = dnaSeq.getSequenceFeatures();
-    if (sfs == null)
+
+    List<SequenceFeature> sfs = dnaSeq.getFeatures().getFeaturesByOntology(
+            SequenceOntologyI.CDS);
+    if (sfs.isEmpty())
      {
        return result;
      }
-
-    SequenceOntologyI so = SequenceOntologyFactory.getInstance();
+    SequenceFeatures.sortFeatures(sfs, true);
      int startPhase = 0;
  
      for (SequenceFeature sf : sfs)
      {
+      int phase = 0;
+      try
+      {
+        phase = Integer.parseInt(sf.getPhase());
+      } catch (NumberFormatException e)
+      {
+        // ignore
+      }
        /*
-       * process a CDS feature (or a sub-type of CDS)
+       * phase > 0 on first codon means 5' incomplete - skip to the start
+       * of the next codon; example ENST00000496384
         */
-      if (so.isA(sf.getType(), SequenceOntologyI.CDS))
+      int begin = sf.getBegin();
+      int end = sf.getEnd();
+      if (result.isEmpty())
        {
-        int phase = 0;
-        try
-        {
-          phase = Integer.parseInt(sf.getPhase());
-        } catch (NumberFormatException e)
+        begin += phase;
+        if (begin > end)
          {
-          // ignore
-        }
-        /*
-         * phase > 0 on first codon means 5' incomplete - skip to the start
-         * of the next codon; example ENST00000496384
-         */
-        int begin = sf.getBegin();
-        int end = sf.getEnd();
-        if (result.isEmpty())
-        {
-          begin += phase;
-          if (begin > end)
-          {
-            // shouldn't happen!
-            System.err
-                    .println("Error: start phase extends beyond start CDS in "
-                            + dnaSeq.getName());
-          }
+          // shouldn't happen!
+          System.err
+                  .println("Error: start phase extends beyond start CDS in "
+                          + dnaSeq.getName());
          }
-        result.add(new int[] { begin, end });
        }
+      result.add(new int[] { begin, end });
      }
  
      /*
@@ -2267,7 +2259,7 @@ public class AlignmentUtils
       * ranges are assembled in order. Other cases should not use this method,
       * but instead construct an explicit mapping for CDS (e.g. EMBL parsing).
       */
-    Collections.sort(result, new RangeComparator(true));
+    Collections.sort(result, IntRangeComparator.ASCENDING);
      return result;
    }
  
@@ -2324,20 +2316,20 @@ public class AlignmentUtils
       * sort to get sequence features in start position order
       * - would be better to store in Sequence as a TreeSet or NCList?
       */
-    if (peptide.getSequenceFeatures() != null)
-    {
-      Arrays.sort(peptide.getSequenceFeatures(),
-              new Comparator<SequenceFeature>()
-              {
-                @Override
-                public int compare(SequenceFeature o1, SequenceFeature o2)
-                {
-                  int c = Integer.compare(o1.getBegin(), o2.getBegin());
-                  return c == 0 ? Integer.compare(o1.getEnd(), o2.getEnd())
-                          : c;
-                }
-              });
-    }
+    // if (peptide.getSequenceFeatures() != null)
+    // {
+    // Arrays.sort(peptide.getSequenceFeatures(),
+    // new Comparator<SequenceFeature>()
+    // {
+    // @Override
+    // public int compare(SequenceFeature o1, SequenceFeature o2)
+    // {
+    // int c = Integer.compare(o1.getBegin(), o2.getBegin());
+    // return c == 0 ? Integer.compare(o1.getEnd(), o2.getEnd())
+    // : c;
+    // }
+    // });
+    // }
      return count;
    }
  
@@ -2528,10 +2520,10 @@ public class AlignmentUtils
       * LinkedHashMap ensures we keep the peptide features in sequence order
       */
      LinkedHashMap<Integer, List<DnaVariant>[]> variants = new LinkedHashMap<Integer, List<DnaVariant>[]>();
-    SequenceOntologyI so = SequenceOntologyFactory.getInstance();
  
-    SequenceFeature[] dnaFeatures = dnaSeq.getSequenceFeatures();
-    if (dnaFeatures == null)
+    List<SequenceFeature> dnaFeatures = dnaSeq.getFeatures()
+            .getFeaturesByOntology(SequenceOntologyI.SEQUENCE_VARIANT);
+    if (dnaFeatures.isEmpty())
      {
        return variants;
      }
@@ -2551,84 +2543,80 @@ public class AlignmentUtils
          // not handling multi-locus variant features
          continue;
        }
-      if (so.isA(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT))
+      int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol);
+      if (mapsTo == null)
        {
-        int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol);
-        if (mapsTo == null)
-        {
-          // feature doesn't lie within coding region
-          continue;
-        }
-        int peptidePosition = mapsTo[0];
-        List<DnaVariant>[] codonVariants = variants.get(peptidePosition);
-        if (codonVariants == null)
-        {
-          codonVariants = new ArrayList[CODON_LENGTH];
-          codonVariants[0] = new ArrayList<DnaVariant>();
-          codonVariants[1] = new ArrayList<DnaVariant>();
-          codonVariants[2] = new ArrayList<DnaVariant>();
-          variants.put(peptidePosition, codonVariants);
-        }
+        // feature doesn't lie within coding region
+        continue;
+      }
+      int peptidePosition = mapsTo[0];
+      List<DnaVariant>[] codonVariants = variants.get(peptidePosition);
+      if (codonVariants == null)
+      {
+        codonVariants = new ArrayList[CODON_LENGTH];
+        codonVariants[0] = new ArrayList<DnaVariant>();
+        codonVariants[1] = new ArrayList<DnaVariant>();
+        codonVariants[2] = new ArrayList<DnaVariant>();
+        variants.put(peptidePosition, codonVariants);
+      }
  
-        /*
-         * extract dna variants to a string array
-         */
-        String alls = (String) sf.getValue("alleles");
-        if (alls == null)
-        {
-          continue;
-        }
-        String[] alleles = alls.toUpperCase().split(",");
-        int i = 0;
-        for (String allele : alleles)
-        {
-          alleles[i++] = allele.trim(); // lose any space characters "A, G"
-        }
+      /*
+       * extract dna variants to a string array
+       */
+      String alls = (String) sf.getValue("alleles");
+      if (alls == null)
+      {
+        continue;
+      }
+      String[] alleles = alls.toUpperCase().split(",");
+      int i = 0;
+      for (String allele : alleles)
+      {
+        alleles[i++] = allele.trim(); // lose any space characters "A, G"
+      }
  
-        /*
-         * get this peptide's codon positions e.g. [3, 4, 5] or [4, 7, 10]
-         */
-        int[] codon = peptidePosition == lastPeptidePostion ? lastCodon
-                : MappingUtils.flattenRanges(dnaToProtein.locateInFrom(
-                        peptidePosition, peptidePosition));
-        lastPeptidePostion = peptidePosition;
-        lastCodon = codon;
+      /*
+       * get this peptide's codon positions e.g. [3, 4, 5] or [4, 7, 10]
+       */
+      int[] codon = peptidePosition == lastPeptidePostion ? lastCodon
+              : MappingUtils.flattenRanges(dnaToProtein.locateInFrom(
+                      peptidePosition, peptidePosition));
+      lastPeptidePostion = peptidePosition;
+      lastCodon = codon;
  
-        /*
-         * save nucleotide (and any variant) for each codon position
-         */
-        for (int codonPos = 0; codonPos < CODON_LENGTH; codonPos++)
+      /*
+       * save nucleotide (and any variant) for each codon position
+       */
+      for (int codonPos = 0; codonPos < CODON_LENGTH; codonPos++)
+      {
+        String nucleotide = String.valueOf(
+                dnaSeq.getCharAt(codon[codonPos] - dnaStart)).toUpperCase();
+        List<DnaVariant> codonVariant = codonVariants[codonPos];
+        if (codon[codonPos] == dnaCol)
          {
-          String nucleotide = String.valueOf(
-                  dnaSeq.getCharAt(codon[codonPos] - dnaStart))
-                  .toUpperCase();
-          List<DnaVariant> codonVariant = codonVariants[codonPos];
-          if (codon[codonPos] == dnaCol)
+          if (!codonVariant.isEmpty()
+                  && codonVariant.get(0).variant == null)
            {
-            if (!codonVariant.isEmpty()
-                    && codonVariant.get(0).variant == null)
-            {
-              /*
-               * already recorded base value, add this variant
-               */
-              codonVariant.get(0).variant = sf;
-            }
-            else
-            {
-              /*
-               * add variant with base value
-               */
-              codonVariant.add(new DnaVariant(nucleotide, sf));
-            }
+            /*
+             * already recorded base value, add this variant
+             */
+            codonVariant.get(0).variant = sf;
            }
-          else if (codonVariant.isEmpty())
+          else
            {
              /*
-             * record (possibly non-varying) base value
+             * add variant with base value
               */
-            codonVariant.add(new DnaVariant(nucleotide));
+            codonVariant.add(new DnaVariant(nucleotide, sf));
            }
          }
+        else if (codonVariant.isEmpty())
+        {
+          /*
+           * record (possibly non-varying) base value
+           */
+          codonVariant.add(new DnaVariant(nucleotide));
+        }
        }
      }
      return variants;
diff --git a/src/jalview/datamodel/features/SequenceFeatures.java b/src/jalview/datamodel/features/SequenceFeatures.java

index 5fa9a3c..73ddac7 100644 (file)
--- a/src/jalview/datamodel/features/SequenceFeatures.java
+++ b/src/jalview/datamodel/features/SequenceFeatures.java
@@ -1,10 +1,13 @@
  package jalview.datamodel.features;
  
  import jalview.datamodel.SequenceFeature;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyI;
  
  import java.util.ArrayList;
  import java.util.Arrays;
  import java.util.Collections;
+import java.util.Comparator;
  import java.util.HashSet;
  import java.util.List;
  import java.util.Map;
@@ -22,6 +25,29 @@ import java.util.TreeMap;
   */
  public class SequenceFeatures implements SequenceFeaturesI
  {
+  /**
+   * a comparator for sorting features by start position ascending
+   */
+  private static Comparator<ContiguousI> FORWARD_STRAND = new Comparator<ContiguousI>()
+  {
+    @Override
+    public int compare(ContiguousI o1, ContiguousI o2)
+    {
+      return Integer.compare(o1.getBegin(), o2.getBegin());
+    }
+  };
+
+  /**
+   * a comparator for sorting features by end position descending
+   */
+  private static Comparator<ContiguousI> REVERSE_STRAND = new Comparator<ContiguousI>()
+  {
+    @Override
+    public int compare(ContiguousI o1, ContiguousI o2)
+    {
+      return Integer.compare(o2.getEnd(), o1.getEnd());
+    }
+  };
  
    /*
     * map from feature type to structured store of features for that type
@@ -38,8 +64,9 @@ public class SequenceFeatures implements SequenceFeaturesI
       * use a TreeMap so that features are returned in alphabetical order of type
       * wrap as a synchronized map for add and delete operations
       */
-    featureStore = Collections
-            .synchronizedSortedMap(new TreeMap<String, FeatureStore>());
+    // featureStore = Collections
+    // .synchronizedSortedMap(new TreeMap<String, FeatureStore>());
+    featureStore = new TreeMap<String, FeatureStore>();
    }
  
    /**
@@ -102,6 +129,22 @@ public class SequenceFeatures implements SequenceFeaturesI
     * {@inheritDoc}
     */
    @Override
+  public List<SequenceFeature> getFeaturesByOntology(String... ontologyTerm)
+  {
+    if (ontologyTerm == null || ontologyTerm.length == 0)
+    {
+      return new ArrayList<SequenceFeature>();
+    }
+
+    Set<String> featureTypes = getFeatureTypes(ontologyTerm);
+    return getAllFeatures(featureTypes.toArray(new String[featureTypes
+            .size()]));
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
    public int getFeatureCount(boolean positional, String... type)
    {
      int result = 0;
@@ -309,20 +352,47 @@ public class SequenceFeatures implements SequenceFeaturesI
     * {@inheritDoc}
     */
    @Override
-  public Set<String> getFeatureTypes()
+  public Set<String> getFeatureTypes(String... soTerm)
    {
      Set<String> types = new HashSet<String>();
      for (Entry<String, FeatureStore> entry : featureStore.entrySet())
      {
-      if (!entry.getValue().isEmpty())
+      String type = entry.getKey();
+      if (!entry.getValue().isEmpty() && isOntologyTerm(type, soTerm))
        {
-        types.add(entry.getKey());
+        types.add(type);
        }
      }
      return types;
    }
  
    /**
+   * Answers true if the given type is one of the specified sequence ontology
+   * terms (or a sub-type of one), or if no terms are supplied. Answers false if
+   * filter terms are specified and the given term does not match any of them.
+   * 
+   * @param type
+   * @param soTerm
+   * @return
+   */
+  protected boolean isOntologyTerm(String type, String... soTerm)
+  {
+    if (soTerm == null || soTerm.length == 0)
+    {
+      return true;
+    }
+    SequenceOntologyI so = SequenceOntologyFactory.getInstance();
+    for (String term : soTerm)
+    {
+      if (so.isA(type, term))
+      {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
     * {@inheritDoc}
     */
    @Override
@@ -341,4 +411,18 @@ public class SequenceFeatures implements SequenceFeaturesI
      return featureStore.containsKey(type) ? featureStore.get(type)
              .getMaximumScore(positional) : Float.NaN;
    }
-}
+
+  /**
+   * A convenience method to sort features by start position ascending (if on
+   * forward strand), or end position descending (if on reverse strand)
+   * 
+   * @param features
+   * @param forwardStrand
+   */
+  public static void sortFeatures(List<SequenceFeature> features,
+          final boolean forwardStrand)
+  {
+    Collections.sort(features, forwardStrand ? FORWARD_STRAND
+            : REVERSE_STRAND);
+  }
+}
\ No newline at end of file
diff --git a/src/jalview/datamodel/features/SequenceFeaturesI.java b/src/jalview/datamodel/features/SequenceFeaturesI.java

index cfcdc76..ed966e7 100644 (file)
--- a/src/jalview/datamodel/features/SequenceFeaturesI.java
+++ b/src/jalview/datamodel/features/SequenceFeaturesI.java
@@ -42,6 +42,16 @@ public interface SequenceFeaturesI
    List<SequenceFeature> getAllFeatures(String... type);
  
    /**
+   * Answers a list of all features stored, whose type either matches one of the
+   * given ontology terms, or is a specialisation of a term in the Sequence
+   * Ontology. Results are returned in no particular guaranteed order.
+   * 
+   * @param ontologyTerm
+   * @return
+   */
+  List<SequenceFeature> getFeaturesByOntology(String... ontologyTerm);
+
+  /**
     * Answers the number of (positional or non-positional) features, optionally
     * restricted to specified feature types. Contact features are counted as 1.
     * 
@@ -134,11 +144,13 @@ public interface SequenceFeaturesI
            boolean positionalFeatures, String... groups);
  
    /**
-   * Answers a set of the distinct feature types for which a feature is stored
+   * Answers a set of the distinct feature types for which a feature is stored.
+   * The types may optionally be restricted to those which match, or are a
+   * subtype of, given sequence ontology terms
     * 
     * @return
     */
-  Set<String> getFeatureTypes();
+  Set<String> getFeatureTypes(String... soTerm);
  
    /**
     * Answers the minimum score held for positional or non-positional features
diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java

index 24e3e95..223e54a 100644 (file)
--- a/src/jalview/ext/ensembl/EnsemblGene.java
+++ b/src/jalview/ext/ensembl/EnsemblGene.java
@@ -26,6 +26,7 @@ import jalview.datamodel.AlignmentI;
  import jalview.datamodel.Sequence;
  import jalview.datamodel.SequenceFeature;
  import jalview.datamodel.SequenceI;
+import jalview.datamodel.features.SequenceFeatures;
  import jalview.io.gff.SequenceOntologyFactory;
  import jalview.io.gff.SequenceOntologyI;
  import jalview.schemes.FeatureColour;
@@ -267,22 +268,19 @@ public class EnsemblGene extends EnsemblSeqProxy
     */
    protected void clearGeneFeatures(SequenceI gene)
    {
-    SequenceFeature[] sfs = gene.getSequenceFeatures();
-    if (sfs != null)
+    /*
+     * Note we include NMD_transcript_variant here because it behaves like 
+     * 'transcript' in Ensembl, although strictly speaking it is not 
+     * (it is a sub-type of sequence_variant)    
+     */
+    String[] soTerms = new String[] {
+        SequenceOntologyI.NMD_TRANSCRIPT_VARIANT, SequenceOntologyI.EXON,
+        SequenceOntologyI.CDS };
+    List<SequenceFeature> sfs = gene.getFeatures().getFeaturesByOntology(
+            soTerms);
+    for (SequenceFeature sf : sfs)
      {
-      SequenceOntologyI so = SequenceOntologyFactory.getInstance();
-      List<SequenceFeature> filtered = new ArrayList<SequenceFeature>();
-      for (SequenceFeature sf : sfs)
-      {
-        String type = sf.getType();
-        if (!isTranscript(type) && !so.isA(type, SequenceOntologyI.EXON)
-                && !so.isA(type, SequenceOntologyI.CDS))
-        {
-          filtered.add(sf);
-        }
-      }
-      gene.setSequenceFeatures(filtered
-              .toArray(new SequenceFeature[filtered.size()]));
+      gene.deleteFeature(sf);
      }
    }
  
@@ -332,6 +330,7 @@ public class EnsemblGene extends EnsemblSeqProxy
      {
        splices = findFeatures(gene, SequenceOntologyI.CDS, parentId);
      }
+    SequenceFeatures.sortFeatures(splices, true);
  
      int transcriptLength = 0;
      final char[] geneChars = gene.getSequence();
@@ -381,7 +380,7 @@ public class EnsemblGene extends EnsemblSeqProxy
      mapTo.add(new int[] { 1, transcriptLength });
      MapList mapping = new MapList(mappedFrom, mapTo, 1, 1);
      EnsemblCdna cdna = new EnsemblCdna(getDomain());
-    cdna.transferFeatures(gene.getSequenceFeatures(),
+    cdna.transferFeatures(gene.getFeatures().getPositionalFeatures(),
              transcript.getDatasetSequence(), mapping, parentId);
  
      /*
@@ -422,19 +421,18 @@ public class EnsemblGene extends EnsemblSeqProxy
      List<SequenceFeature> transcriptFeatures = new ArrayList<SequenceFeature>();
  
      String parentIdentifier = GENE_PREFIX + accId;
-    SequenceFeature[] sfs = geneSequence.getSequenceFeatures();
+    // todo optimise here by transcript type!
+    List<SequenceFeature> sfs = geneSequence.getFeatures()
+            .getPositionalFeatures();
  
-    if (sfs != null)
+    for (SequenceFeature sf : sfs)
      {
-      for (SequenceFeature sf : sfs)
+      if (isTranscript(sf.getType()))
        {
-        if (isTranscript(sf.getType()))
+        String parent = (String) sf.getValue(PARENT);
+        if (parentIdentifier.equals(parent))
          {
-          String parent = (String) sf.getValue(PARENT);
-          if (parentIdentifier.equals(parent))
-          {
-            transcriptFeatures.add(sf);
-          }
+          transcriptFeatures.add(sf);
          }
        }
      }
diff --git a/test/jalview/datamodel/features/SequenceFeaturesTest.java b/test/jalview/datamodel/features/SequenceFeaturesTest.java

index 0d1d89d..5ff2d7b 100644 (file)
--- a/test/jalview/datamodel/features/SequenceFeaturesTest.java
+++ b/test/jalview/datamodel/features/SequenceFeaturesTest.java
@@ -2,10 +2,12 @@ package jalview.datamodel.features;
  
  import static org.testng.Assert.assertEquals;
  import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertSame;
  import static org.testng.Assert.assertTrue;
  
  import jalview.datamodel.SequenceFeature;
  
+import java.util.ArrayList;
  import java.util.Iterator;
  import java.util.List;
  import java.util.Set;
@@ -909,4 +911,105 @@ public class SequenceFeaturesTest
      assertEquals(iterator.next(), "Helix");
      assertFalse(iterator.hasNext());
    }
+
+  @Test(groups = "Functional")
+  public void testGetFeatureTypes_byOntology()
+  {
+    SequenceFeaturesI store = new SequenceFeatures();
+  
+    SequenceFeature sf1 = new SequenceFeature("transcript", "desc", 10, 20,
+            Float.NaN, null);
+    store.add(sf1);
+    // mRNA isA mature_transcript isA transcript
+    SequenceFeature sf2 = new SequenceFeature("mRNA", "desc", 10, 20,
+            Float.NaN, null);
+    store.add(sf2);
+    // just to prove non-positional feature types are included
+    SequenceFeature sf3 = new SequenceFeature("mRNA", "desc", 0, 0,
+            Float.NaN, null);
+    store.add(sf3);
+    SequenceFeature sf4 = new SequenceFeature("CDS", "desc", 0, 0,
+            Float.NaN, null);
+    store.add(sf4);
+
+    Set<String> types = store.getFeatureTypes("transcript");
+    assertEquals(types.size(), 2);
+    assertTrue(types.contains("transcript"));
+    assertTrue(types.contains("mRNA"));
+
+    // matches include arguments whether SO terms or not
+    types = store.getFeatureTypes("transcript", "CDS");
+    assertEquals(types.size(), 3);
+    assertTrue(types.contains("transcript"));
+    assertTrue(types.contains("mRNA"));
+    assertTrue(types.contains("CDS"));
+
+    types = store.getFeatureTypes("exon");
+    assertTrue(types.isEmpty());
+  }
+
+  @Test(groups = "Functional")
+  public void testGetFeaturesByOntology()
+  {
+    SequenceFeaturesI store = new SequenceFeatures();
+    List<SequenceFeature> features = store.getFeaturesByOntology();
+    assertTrue(features.isEmpty());
+    assertTrue(store.getFeaturesByOntology(new String[] {}).isEmpty());
+    assertTrue(store.getFeaturesByOntology((String[]) null).isEmpty());
+  
+    SequenceFeature sf1 = new SequenceFeature("transcript", "desc", 10, 20,
+            Float.NaN, null);
+    store.add(sf1);
+
+    // mRNA isA transcript; added here 'as if' non-positional
+    // just to show that non-positional features are included in results
+    SequenceFeature sf2 = new SequenceFeature("mRNA", "desc", 0, 0,
+            Float.NaN, null);
+    store.add(sf2);
+
+    SequenceFeature sf3 = new SequenceFeature("Pfam", "desc", 30, 40,
+            Float.NaN, null);
+    store.add(sf3);
+
+    features = store.getFeaturesByOntology("transcript");
+    assertEquals(features.size(), 2);
+    assertTrue(features.contains(sf1));
+    assertTrue(features.contains(sf2));
+
+    features = store.getFeaturesByOntology("mRNA");
+    assertEquals(features.size(), 1);
+    assertTrue(features.contains(sf2));
+
+    features = store.getFeaturesByOntology("mRNA", "Pfam");
+    assertEquals(features.size(), 2);
+    assertTrue(features.contains(sf2));
+    assertTrue(features.contains(sf3));
+  }
+
+  @Test(groups = "Functional")
+  public void testSortFeatures()
+  {
+    List<SequenceFeature> sfs = new ArrayList<SequenceFeature>();
+    SequenceFeature sf1 = new SequenceFeature("Pfam", "desc", 30, 80,
+            Float.NaN, null);
+    sfs.add(sf1);
+    SequenceFeature sf2 = new SequenceFeature("Rfam", "desc", 40, 50,
+            Float.NaN, null);
+    sfs.add(sf2);
+    SequenceFeature sf3 = new SequenceFeature("Rfam", "desc", 50, 60,
+            Float.NaN, null);
+    sfs.add(sf3);
+
+    // sort by end position descending
+    SequenceFeatures.sortFeatures(sfs, false);
+    assertSame(sfs.get(0), sf1);
+    assertSame(sfs.get(1), sf3);
+    assertSame(sfs.get(2), sf2);
+
+    // sort by start position ascending
+    SequenceFeatures.sortFeatures(sfs, true);
+    assertSame(sfs.get(0), sf1);
+    assertSame(sfs.get(1), sf2);
+    assertSame(sfs.get(2), sf3);
+  }
  }
diff --git a/test/jalview/ext/ensembl/EnsemblGeneTest.java b/test/jalview/ext/ensembl/EnsemblGeneTest.java

index 6cfd85b..edecc23 100644 (file)
--- a/test/jalview/ext/ensembl/EnsemblGeneTest.java
+++ b/test/jalview/ext/ensembl/EnsemblGeneTest.java
@@ -22,7 +22,6 @@ package jalview.ext.ensembl;
  
  import static org.testng.AssertJUnit.assertEquals;
  import static org.testng.AssertJUnit.assertFalse;
-import static org.testng.AssertJUnit.assertSame;
  import static org.testng.AssertJUnit.assertTrue;
  
  import jalview.api.FeatureSettingsModelI;
@@ -76,7 +75,9 @@ public class EnsemblGeneTest
      genomic.setEnd(50000);
      String geneId = "ABC123";
  
-    // gene at (start+10000) length 501
+    // gene at (start+20000) length 501
+    // should be ignored - the first 'gene' found defines the whole range
+    // (note features are found in position order, not addition order)
      SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
              null);
      sf.setValue("ID", "gene:" + geneId);
@@ -84,7 +85,6 @@ public class EnsemblGeneTest
      genomic.addSequenceFeature(sf);
  
      // gene at (start + 10500) length 101
-    // should be ignored - the first 'gene' found defines the whole range
      sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
      sf.setValue("ID", "gene:" + geneId);
      sf.setStrand("+");
@@ -94,13 +94,13 @@ public class EnsemblGeneTest
              23);
      List<int[]> fromRanges = ranges.getFromRanges();
      assertEquals(1, fromRanges.size());
-    assertEquals(20000, fromRanges.get(0)[0]);
-    assertEquals(20500, fromRanges.get(0)[1]);
+    assertEquals(10500, fromRanges.get(0)[0]);
+    assertEquals(10600, fromRanges.get(0)[1]);
      // to range should start from given start numbering
      List<int[]> toRanges = ranges.getToRanges();
      assertEquals(1, toRanges.size());
      assertEquals(23, toRanges.get(0)[0]);
-    assertEquals(523, toRanges.get(0)[1]);
+    assertEquals(123, toRanges.get(0)[1]);
    }
  
    /**
@@ -115,7 +115,9 @@ public class EnsemblGeneTest
      genomic.setEnd(50000);
      String geneId = "ABC123";
  
-    // gene at (start+10000) length 501
+    // gene at (start+20000) length 501
+    // should be ignored - the first 'gene' found defines the whole range
+    // (real data would only have one such feature)
      SequenceFeature sf = new SequenceFeature("ncRNA_gene", "", 20000,
              20500, 0f, null);
      sf.setValue("ID", "gene:" + geneId);
@@ -123,8 +125,6 @@ public class EnsemblGeneTest
      genomic.addSequenceFeature(sf);
  
      // gene at (start + 10500) length 101
-    // should be ignored - the first 'gene' found defines the whole range
-    // (real data would only have one such feature)
      sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
      sf.setValue("ID", "gene:" + geneId);
      sf.setStrand("+");
@@ -135,13 +135,13 @@ public class EnsemblGeneTest
      List<int[]> fromRanges = ranges.getFromRanges();
      assertEquals(1, fromRanges.size());
      // from range on reverse strand:
-    assertEquals(20500, fromRanges.get(0)[0]);
-    assertEquals(20000, fromRanges.get(0)[1]);
+    assertEquals(10500, fromRanges.get(0)[0]);
+    assertEquals(10600, fromRanges.get(0)[1]);
      // to range should start from given start numbering
      List<int[]> toRanges = ranges.getToRanges();
      assertEquals(1, toRanges.size());
      assertEquals(23, toRanges.get(0)[0]);
-    assertEquals(523, toRanges.get(0)[1]);
+    assertEquals(123, toRanges.get(0)[1]);
    }
  
    /**
@@ -164,7 +164,7 @@ public class EnsemblGeneTest
      genomic.addSequenceFeature(sf1);
  
      // transcript sub-type feature
-    SequenceFeature sf2 = new SequenceFeature("snRNA", "", 20000, 20500,
+    SequenceFeature sf2 = new SequenceFeature("snRNA", "", 21000, 21500,
              0f, null);
      sf2.setValue("Parent", "gene:" + geneId);
      sf2.setValue("transcript_id", "transcript2");
@@ -172,13 +172,13 @@ public class EnsemblGeneTest
  
      // NMD_transcript_variant treated like transcript in Ensembl
      SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "",
-            20000, 20500, 0f, null);
+            22000, 22500, 0f, null);
      sf3.setValue("Parent", "gene:" + geneId);
      sf3.setValue("transcript_id", "transcript3");
      genomic.addSequenceFeature(sf3);
  
      // transcript for a different gene - ignored
-    SequenceFeature sf4 = new SequenceFeature("snRNA", "", 20000, 20500,
+    SequenceFeature sf4 = new SequenceFeature("snRNA", "", 23000, 23500,
              0f, null);
      sf4.setValue("Parent", "gene:XYZ");
      sf4.setValue("transcript_id", "transcript4");
@@ -192,9 +192,9 @@ public class EnsemblGeneTest
      List<SequenceFeature> features = testee.getTranscriptFeatures(geneId,
              genomic);
      assertEquals(3, features.size());
-    assertSame(sf1, features.get(0));
-    assertSame(sf2, features.get(1));
-    assertSame(sf3, features.get(2));
+    assertTrue(features.contains(sf1));
+    assertTrue(features.contains(sf2));
+    assertTrue(features.contains(sf3));
    }
  
    /**
diff --git a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java

index e977233..c8fa3c2 100644 (file)
--- a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java
+++ b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java
@@ -22,12 +22,13 @@ package jalview.ext.ensembl;
  
  import static org.testng.AssertJUnit.assertEquals;
  import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertSame;
  import static org.testng.AssertJUnit.assertTrue;
-import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
  
  import jalview.datamodel.Alignment;
  import jalview.datamodel.SequenceFeature;
  import jalview.datamodel.SequenceI;
+import jalview.datamodel.features.SequenceFeatures;
  import jalview.gui.JvOptionPane;
  import jalview.io.DataSourceType;
  import jalview.io.FastaFile;
@@ -37,6 +38,7 @@ import jalview.io.gff.SequenceOntologyLite;
  
  import java.lang.reflect.Method;
  import java.util.Arrays;
+import java.util.List;
  
  import org.testng.Assert;
  import org.testng.annotations.AfterClass;
@@ -166,6 +168,8 @@ public class EnsemblSeqProxyTest
      Alignment ral = new Alignment(sqs);
      for (SequenceI tr : trueSqs)
      {
+      // 12/05/2017 failing for EnsemblCdna which is returning protein
+      // Ensembl helpdesk ticket 187998
        SequenceI[] rseq;
        Assert.assertNotNull(
                rseq = ral.findSequenceMatch(tr.getName()),
@@ -269,15 +273,22 @@ public class EnsemblSeqProxyTest
      SequenceFeature sf2 = new SequenceFeature("", "", 8, 12, 0f, null);
      SequenceFeature sf3 = new SequenceFeature("", "", 8, 13, 0f, null);
      SequenceFeature sf4 = new SequenceFeature("", "", 11, 11, 0f, null);
-    SequenceFeature[] sfs = new SequenceFeature[] { sf1, sf2, sf3, sf4 };
+    List<SequenceFeature> sfs = Arrays.asList(new SequenceFeature[] { sf1,
+        sf2, sf3, sf4 });
  
      // sort by start position ascending (forward strand)
      // sf2 and sf3 tie and should not be reordered by sorting
-    EnsemblSeqProxy.sortFeatures(sfs, true);
-    assertArrayEquals(new SequenceFeature[] { sf2, sf3, sf1, sf4 }, sfs);
+    SequenceFeatures.sortFeatures(sfs, true);
+    assertSame(sfs.get(0), sf2);
+    assertSame(sfs.get(1), sf3);
+    assertSame(sfs.get(2), sf1);
+    assertSame(sfs.get(3), sf4);
  
      // sort by end position descending (reverse strand)
-    EnsemblSeqProxy.sortFeatures(sfs, false);
-    assertArrayEquals(new SequenceFeature[] { sf1, sf3, sf2, sf4 }, sfs);
+    SequenceFeatures.sortFeatures(sfs, false);
+    assertSame(sfs.get(0), sf1);
+    assertSame(sfs.get(1), sf3);
+    assertSame(sfs.get(2), sf2);
+    assertSame(sfs.get(3), sf4);
    }
  }
author	gmungoc <g.m.carstairs@dundee.ac.uk>
	Fri, 12 May 2017 15:00:20 +0000 (16:00 +0100)
committer	gmungoc <g.m.carstairs@dundee.ac.uk>
	Fri, 12 May 2017 15:00:20 +0000 (16:00 +0100)
src/jalview/analysis/AlignmentUtils.java		patch \| blob \| history
src/jalview/datamodel/features/SequenceFeatures.java		patch \| blob \| history
src/jalview/datamodel/features/SequenceFeaturesI.java		patch \| blob \| history
src/jalview/ext/ensembl/EnsemblGene.java		patch \| blob \| history
test/jalview/datamodel/features/SequenceFeaturesTest.java		patch \| blob \| history
test/jalview/ext/ensembl/EnsemblGeneTest.java		patch \| blob \| history
test/jalview/ext/ensembl/EnsemblSeqProxyTest.java		patch \| blob \| history