JAL-3829 refactor and JAL-3391 JAL-2935 first stab at coverage based selection of...
authorJim Procter <j.procter@dundee.ac.uk>
Wed, 8 Sep 2021 15:58:22 +0000 (16:58 +0100)
committerJim Procter <j.procter@dundee.ac.uk>
Wed, 8 Sep 2021 15:58:22 +0000 (16:58 +0100)
src/jalview/gui/structurechooser/TDBResultAnalyser.java [new file with mode: 0644]
src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java

diff --git a/src/jalview/gui/structurechooser/TDBResultAnalyser.java b/src/jalview/gui/structurechooser/TDBResultAnalyser.java
new file mode 100644 (file)
index 0000000..f73f397
--- /dev/null
@@ -0,0 +1,203 @@
+package jalview.gui.structurechooser;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+import jalview.datamodel.SequenceI;
+import jalview.fts.api.FTSData;
+import jalview.fts.core.FTSRestRequest;
+
+public class TDBResultAnalyser
+{
+
+  /**
+   * model categories - update as needed. warnings output if unknown types
+   * encountered.
+   * 
+   * Order denotes 'trust'
+   */
+  private static List<String> EXP_CATEGORIES = Arrays
+          .asList(new String[]
+          { "EXPERIMENTALLY DETERMINED", "DEEP LEARNING",
+              "TEMPLATE-BASED" });
+
+  private SequenceI seq;
+
+  private Collection<FTSData> collectedResults;
+
+  private FTSRestRequest lastTdbRequest;
+
+  private int idx_ups;
+
+  private int idx_upe;
+
+  private int idx_mcat;
+
+  private int idx_mqual;
+
+  private int idx_resol;
+
+  public TDBResultAnalyser(SequenceI seq,
+          Collection<FTSData> collectedResults,
+          FTSRestRequest lastTdbRequest)
+  {
+    this.seq = seq;
+    this.collectedResults = collectedResults;
+    this.lastTdbRequest = lastTdbRequest;
+    idx_ups = lastTdbRequest.getFieldIndex("Uniprot Start");
+    idx_upe = lastTdbRequest.getFieldIndex("Uniprot End");
+    idx_mcat = lastTdbRequest.getFieldIndex("Model Category");
+    idx_mqual = lastTdbRequest.getFieldIndex("Qmean");
+    idx_resol = lastTdbRequest.getFieldIndex("Resolution");
+  }
+  private final int scoreCategory(String cat)
+  {
+    // TODO: make quicker
+    int idx = EXP_CATEGORIES.indexOf(cat.toUpperCase());
+    if (idx == -1)
+    {
+      System.out.println("Unknown category: '" + cat + "'");
+    }
+    return -EXP_CATEGORIES.size() - idx;
+  }
+
+  /**
+   * sorts records discovered by 3D beacons and excludes any that don't
+   * intersect with the sequence's start/end rage
+   * 
+   * @return
+   */
+  public List<FTSData> getFilteredResponse()
+  {
+    List<FTSData> filteredResponse = new ArrayList<FTSData>();
+
+    // ignore anything outside the sequence region
+    for (FTSData row : collectedResults)
+    {
+      int up_s = (Integer) row.getSummaryData()[idx_ups];
+      int up_e = (Integer) row.getSummaryData()[idx_upe];
+
+      if (seq == row.getSummaryData()[0] && up_e > seq.getStart()
+              && up_s < seq.getEnd())
+      {
+        filteredResponse.add(row);
+      }
+    }
+    // sort according to decreasing length,
+    // increasing start
+    Collections.sort(filteredResponse, new Comparator<FTSData>()
+    {
+
+
+      @Override
+      public int compare(FTSData o1, FTSData o2)
+      {
+        Object[] o1data = o1.getSummaryData();
+        Object[] o2data = o2.getSummaryData();
+        int o1_s = (Integer) o1data[idx_ups];
+        int o1_e = (Integer) o1data[idx_upe];
+        int o1_cat = scoreCategory((String) o1data[idx_mcat]);
+        int o2_s = (Integer) o2data[idx_ups];
+        int o2_e = (Integer) o2data[idx_upe];
+        int o2_cat = scoreCategory((String) o2data[idx_mcat]);
+
+        if (o1_cat == o2_cat)
+        {
+          if (o1_s == o2_s)
+          {
+            int o1_xtent = o1_e - o1_s;
+            int o2_xtent = o2_e - o2_s;
+            if (o1_xtent == o2_xtent)
+            {
+              if (o1_cat == scoreCategory(EXP_CATEGORIES.get(0)))
+              {
+                // experimental structures, so rank on quality
+                double o1_res = (Double) o1data[idx_resol];
+                double o2_res = (Double) o2data[idx_resol];
+                return (o2_res < o1_res) ? 1 : (o2_res == o1_res) ? 0 : -1;
+              }
+              else
+              {
+                // models, so rank on qmean
+                float o1_mq = (Float) o1data[idx_mqual];
+                float o2_mq = (Float) o2data[idx_mqual];
+                return (o2_mq < o1_mq) ? 1 : (o2_mq == o1_mq) ? 0 : -1;
+              }
+            }
+            else
+            {
+              return o1_xtent - o2_xtent;
+            }
+          }
+          else
+          {
+            return o1_s - o2_s;
+          }
+        }
+        else
+        {
+          return o2_cat - o1_cat;
+        }
+      }
+
+      @Override
+      public boolean equals(Object obj)
+      {
+        return super.equals(obj);
+      }
+    });
+    return filteredResponse;
+  }
+
+  /**
+   * return list of structures to be marked as selected for this sequence according to given criteria
+   * @param filteredStructures - sorted, filtered structures from getFilteredResponse
+   * 
+   */
+  public List<FTSData> selectStructures(List<FTSData> filteredStructures)
+  {
+    List<FTSData> selected = new ArrayList<FTSData>();
+    BitSet cover = new BitSet();
+    cover.set(seq.getStart(),seq.getEnd());
+    // walk down the list of structures, selecting some to add to selected
+    for (FTSData structure:filteredStructures)
+    {
+      Object[] odata=structure.getSummaryData();
+      int o1_s = (Integer) odata[idx_ups];
+      int o1_e = (Integer) odata[idx_upe];
+      int o1_cat = scoreCategory((String) odata[idx_mcat]);
+      BitSet scover = new BitSet();
+      // measure intersection
+      scover.set(o1_s,o1_e);
+      scover.and(cover);
+      if (scover.cardinality()>4)
+      {
+        selected.add(structure);
+        // clear the range covered by this structure
+        cover.andNot(scover); 
+      }
+    }
+    // final step is to sort on length - this might help the superposition process
+    Collections.sort(selected,new Comparator<FTSData>()
+    {
+      @Override
+      public int compare(FTSData o1, FTSData o2)
+      {
+        Object[] o1data = o1.getSummaryData();
+        Object[] o2data = o2.getSummaryData();
+        int o1_xt = ((Integer) o1data[idx_upe]) - ((Integer) o1data[idx_ups]);
+        int o1_cat = scoreCategory((String) o1data[idx_mcat]);
+        int o2_xt = ((Integer) o2data[idx_upe]-(Integer) o2data[idx_ups]);
+        int o2_cat = scoreCategory((String) o2data[idx_mcat]);
+        return o2_xt-o1_xt;
+      }
+    });
+    return selected;
+  }
+
+}
index 1c04d3a..e5f96be 100644 (file)
@@ -201,16 +201,6 @@ public class ThreeDBStructureChooserQuerySource
   }
 
   /**
-   * model categories - update as needed. warnings output if unknown types
-   * encountered.
-   * 
-   * Order denotes 'trust'
-   */
-  private static List<String> EXP_CATEGORIES = Arrays
-          .asList(new String[]
-          { "EXPERIMENTALLY DETERMINED", "DEEP LEARNING", "TEMPLATE-BASED" });
-
-  /**
    * FTSRestClient specific query builder to pick top ranked entry from a
    * fetchStructuresMetaData query
    * 
@@ -231,99 +221,16 @@ public class ThreeDBStructureChooserQuerySource
           boolean b) throws Exception
   {
 
-    List<FTSData> filteredResponse = new ArrayList<FTSData>();
-    final int idx_ups = lastTdbRequest.getFieldIndex("Uniprot Start");
-    final int idx_upe = lastTdbRequest.getFieldIndex("Uniprot End");
-    final int idx_mcat = lastTdbRequest.getFieldIndex("Model Category");
-    final int idx_mqual = lastTdbRequest.getFieldIndex("Qmean");
-    final int idx_resol = lastTdbRequest.getFieldIndex("Resolution");
-
-    // ignore anything outside the sequence region
-    for (FTSData row : collectedResults)
-    {
-      int up_s = (Integer) row.getSummaryData()[idx_ups];
-      int up_e = (Integer) row.getSummaryData()[idx_upe];
-
-      if (seq == row.getSummaryData()[0] && up_e > seq.getStart()
-              && up_s < seq.getEnd())
-      {
-        filteredResponse.add(row);
-      }
-    }
-    // sort according to decreasing length,
-    // increasing start
-    Collections.sort(filteredResponse, new Comparator<FTSData>()
-    {
-
-      private final int scoreCategory(String cat)
-      {
-        // TODO: make quicker
-        int idx = EXP_CATEGORIES.indexOf(cat.toUpperCase());
-        if (idx == -1)
-        {
-          System.out.println("Unknown category: '" + cat + "'");
-        }
-        return -EXP_CATEGORIES.size() - idx;
-      }
-
-      @Override
-      public int compare(FTSData o1, FTSData o2)
-      {
-        int o1_s = (Integer) o1.getSummaryData()[idx_ups];
-        int o1_e = (Integer) o1.getSummaryData()[idx_upe];
-        int o1_cat = scoreCategory((String) o1.getSummaryData()[idx_mcat]);
-        int o2_s = (Integer) o2.getSummaryData()[idx_ups];
-        int o2_e = (Integer) o2.getSummaryData()[idx_upe];
-        int o2_cat = scoreCategory((String) o2.getSummaryData()[idx_mcat]);
-
-        if (o1_cat == o2_cat)
-        {
-          if (o1_s == o2_s)
-          {
-            int o1_xtent = o1_e - o1_s;
-            int o2_xtent = o2_e - o2_s;
-            if (o1_xtent == o2_xtent)
-            {
-              if (o1_cat == scoreCategory(EXP_CATEGORIES.get(0)))
-              {
-                // experimental structures, so rank on quality
-                double o1_res = (Double) o1.getSummaryData()[idx_resol];
-                double o2_res = (Double) o2.getSummaryData()[idx_resol];
-                return (o2_res < o1_res) ? 1 : (o2_res == o1_res) ? 0 : -1;
-              }
-              else
-              {
-                // models, so rank on qmean
-                float o1_mq = (Float) o1.getSummaryData()[idx_mqual];
-                float o2_mq = (Float) o2.getSummaryData()[idx_mqual];
-                return (o2_mq < o1_mq) ? 1 : (o2_mq == o1_mq) ? 0 : -1;
-              }
-            }
-            else
-            {
-              return o1_xtent - o2_xtent;
-            }
-          }
-          else
-          {
-            return o1_s - o2_s;
-          }
-        }
-        else
-        {
-          return o2_cat - o1_cat;
-        }
-      }
-
-      @Override
-      public boolean equals(Object obj)
-      {
-        return super.equals(obj);
-      }
-    });
+    TDBResultAnalyser analyser= new TDBResultAnalyser(seq,collectedResults, lastTdbRequest);
+    
+    
     FTSRestResponse resultList = new FTSRestResponse();
-    resultList.setNumberOfItemsFound(filteredResponse.size());
-    resultList.setSearchSummary(filteredResponse);
+
+    List<FTSData> filteredResponse = analyser.getFilteredResponse();
+    
+    List<FTSData> selectedStructures = analyser.selectStructures(filteredResponse);
+    resultList.setNumberOfItemsFound(selectedStructures.size());
+    resultList.setSearchSummary(selectedStructures);
     return resultList;
   }
 
@@ -340,8 +247,30 @@ public class ThreeDBStructureChooserQuerySource
     int typeColumnIndex = restable.getColumn("Provider").getModelIndex();
     int categoryColumnIndex = restable.getColumn("Model Category")
             .getModelIndex();
+    final int up_start_idx = restable.getColumn("Uniprot Start").getModelIndex();
+    final int up_end_idx = restable.getColumn("Uniprot End").getModelIndex();
+    int i=0;
+    
+    // bleugh!
+    Integer[] sellist = new Integer[selectedRows.length];
+    for (Integer row: selectedRows)
+    {
+      sellist[i++] = row;
+    }
+    // Sort rows by coverage
+    Arrays.sort(sellist,new Comparator<Integer>()
+    {
+      @Override
+      public int compare(Integer o1, Integer o2)
+      {
+        int o1_xt = ((Integer)restable.getValueAt(o1, up_end_idx)) - (Integer)restable.getValueAt(o1, up_start_idx);
+        int o2_xt = ((Integer)restable.getValueAt(o2, up_end_idx)) - (Integer)restable.getValueAt(o2, up_start_idx);
+        return o2_xt-o1_xt;
+      }
+    });
 
-    for (int row : selectedRows)
+    
+    for (int row : sellist)
     {
       // unique id - could be a horrible hash