JAL-2446 merged to spike branch
[jalview.git] / src / jalview / ext / ensembl / EnsemblSeqProxy.java
index 233707b..79d6c0a 100644 (file)
@@ -30,6 +30,7 @@ import jalview.datamodel.DBRefSource;
 import jalview.datamodel.Mapping;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
+import jalview.datamodel.features.SequenceFeatures;
 import jalview.exceptions.JalviewException;
 import jalview.io.FastaFile;
 import jalview.io.FileParse;
@@ -37,8 +38,8 @@ import jalview.io.gff.SequenceOntologyFactory;
 import jalview.io.gff.SequenceOntologyI;
 import jalview.util.Comparison;
 import jalview.util.DBRefUtils;
+import jalview.util.IntRangeComparator;
 import jalview.util.MapList;
-import jalview.util.RangeComparator;
 
 import java.io.IOException;
 import java.net.MalformedURLException;
@@ -46,8 +47,9 @@ import java.net.URL;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
-import java.util.Comparator;
 import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
 
 /**
  * Base class for Ensembl sequence fetchers
@@ -468,11 +470,31 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
     urlstring.append("?type=").append(getSourceEnsemblType().getType());
     urlstring.append(("&Accept=text/x-fasta"));
 
+    Map<String, String> params = getAdditionalParameters();
+    if (params != null)
+    {
+      for (Entry<String, String> entry : params.entrySet())
+      {
+        urlstring.append("&").append(entry.getKey()).append("=")
+                .append(entry.getValue());
+      }
+    }
+
     URL url = new URL(urlstring.toString());
     return url;
   }
 
   /**
+   * Override this method to add any additional x=y URL parameters needed
+   * 
+   * @return
+   */
+  protected Map<String, String> getAdditionalParameters()
+  {
+    return null;
+  }
+
+  /**
    * A sequence/id POST request currently allows up to 50 queries
    * 
    * @see http://rest.ensembl.org/documentation/info/sequence_id_post
@@ -536,8 +558,10 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
   protected MapList getGenomicRangesFromFeatures(SequenceI sourceSequence,
           String accId, int start)
   {
-    SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
-    if (sfs == null)
+    // SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
+    List<SequenceFeature> sfs = sourceSequence.getFeatures()
+            .getPositionalFeatures();
+    if (sfs.isEmpty())
     {
       return null;
     }
@@ -607,7 +631,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
      * a final sort is needed since Ensembl returns CDS sorted within source
      * (havana / ensembl_havana)
      */
-    Collections.sort(regions, new RangeComparator(direction == 1));
+    Collections.sort(regions, direction == 1 ? IntRangeComparator.ASCENDING
+            : IntRangeComparator.DESCENDING);
 
     List<int[]> to = Arrays.asList(new int[] { start,
         start + mappedLength - 1 });
@@ -658,13 +683,15 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
 
     if (mappedRange != null)
     {
-      SequenceFeature copy = new SequenceFeature(sf);
-      copy.setBegin(Math.min(mappedRange[0], mappedRange[1]));
-      copy.setEnd(Math.max(mappedRange[0], mappedRange[1]));
-      if (".".equals(copy.getFeatureGroup()))
+      String group = sf.getFeatureGroup();
+      if (".".equals(group))
       {
-        copy.setFeatureGroup(getDbSource());
+        group = getDbSource();
       }
+      int newBegin = Math.min(mappedRange[0], mappedRange[1]);
+      int newEnd = Math.max(mappedRange[0], mappedRange[1]);
+      SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd,
+              group, sf.getScore());
       targetSequence.addSequenceFeature(copy);
 
       /*
@@ -763,8 +790,10 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       return false;
     }
 
-    // long start = System.currentTimeMillis();
-    SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
+    long start = System.currentTimeMillis();
+    // SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
+    List<SequenceFeature> sfs = sourceSequence.getFeatures()
+            .getPositionalFeatures();
     MapList mapping = getGenomicRangesFromFeatures(sourceSequence,
             accessionId, targetSequence.getStart());
     if (mapping == null)
@@ -774,10 +803,10 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
 
     boolean result = transferFeatures(sfs, targetSequence, mapping,
             accessionId);
-    // System.out.println("transferFeatures (" + (sfs.length) + " --> "
-    // + targetSequence.getSequenceFeatures().length + ") to "
-    // + targetSequence.getName()
-    // + " took " + (System.currentTimeMillis() - start) + "ms");
+    System.out.println("transferFeatures (" + (sfs.size()) + " --> "
+            + targetSequence.getFeatures().getFeatureCount(true) + ") to "
+            + targetSequence.getName() + " took "
+            + (System.currentTimeMillis() - start) + "ms");
     return result;
   }
 
@@ -786,13 +815,13 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
    * converted using the mapping. Features which do not overlap are ignored.
    * Features whose parent is not the specified identifier are also ignored.
    * 
-   * @param features
+   * @param sfs
    * @param targetSequence
    * @param mapping
    * @param parentId
    * @return
    */
-  protected boolean transferFeatures(SequenceFeature[] features,
+  protected boolean transferFeatures(List<SequenceFeature> sfs,
           SequenceI targetSequence, MapList mapping, String parentId)
   {
     final boolean forwardStrand = mapping.isFromForwardStrand();
@@ -802,10 +831,10 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
      * position descending if reverse strand) so as to add them in
      * 'forwards' order to the target sequence
      */
-    sortFeatures(features, forwardStrand);
+    SequenceFeatures.sortFeatures(sfs, forwardStrand);
 
     boolean transferred = false;
-    for (SequenceFeature sf : features)
+    for (SequenceFeature sf : sfs)
     {
       if (retainFeature(sf, parentId))
       {
@@ -817,33 +846,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
   }
 
   /**
-   * Sort features by start position ascending (if on forward strand), or end
-   * position descending (if on reverse strand)
-   * 
-   * @param features
-   * @param forwardStrand
-   */
-  protected static void sortFeatures(SequenceFeature[] features,
-          final boolean forwardStrand)
-  {
-    Arrays.sort(features, new Comparator<SequenceFeature>()
-    {
-      @Override
-      public int compare(SequenceFeature o1, SequenceFeature o2)
-      {
-        if (forwardStrand)
-        {
-          return Integer.compare(o1.getBegin(), o2.getBegin());
-        }
-        else
-        {
-          return Integer.compare(o2.getEnd(), o1.getEnd());
-        }
-      }
-    });
-  }
-
-  /**
    * Answers true if the feature type is one we want to keep for the sequence.
    * Some features are only retrieved in order to identify the sequence range,
    * and may then be discarded as redundant information (e.g. "CDS" feature for
@@ -885,35 +887,30 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
 
   /**
    * Returns a (possibly empty) list of features on the sequence which have the
-   * specified sequence ontology type (or a sub-type of it), and the given
+   * specified sequence ontology term (or a sub-type of it), and the given
    * identifier as parent
    * 
    * @param sequence
-   * @param type
+   * @param term
    * @param parentId
    * @return
    */
   protected List<SequenceFeature> findFeatures(SequenceI sequence,
-          String type, String parentId)
+          String term, String parentId)
   {
     List<SequenceFeature> result = new ArrayList<SequenceFeature>();
 
-    SequenceFeature[] sfs = sequence.getSequenceFeatures();
-    if (sfs != null)
+    List<SequenceFeature> sfs = sequence.getFeatures()
+            .getFeaturesByOntology(term);
+    for (SequenceFeature sf : sfs)
     {
-      SequenceOntologyI so = SequenceOntologyFactory.getInstance();
-      for (SequenceFeature sf : sfs)
+      String parent = (String) sf.getValue(PARENT);
+      if (parent != null && parent.equals(parentId))
       {
-        if (so.isA(sf.getType(), type))
-        {
-          String parent = (String) sf.getValue(PARENT);
-          if (parent.equals(parentId))
-          {
-            result.add(sf);
-          }
-        }
+        result.add(sf);
       }
     }
+
     return result;
   }