From e36731274aafe1e930805c19a0f60372c4c6392a Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Wed, 8 Sep 2021 16:58:22 +0100 Subject: [PATCH] JAL-3829 refactor and JAL-3391 JAL-2935 first stab at coverage based selection of models for a sequence --- .../gui/structurechooser/TDBResultAnalyser.java | 203 ++++++++++++++++++++ .../ThreeDBStructureChooserQuerySource.java | 135 +++---------- 2 files changed, 235 insertions(+), 103 deletions(-) create mode 100644 src/jalview/gui/structurechooser/TDBResultAnalyser.java diff --git a/src/jalview/gui/structurechooser/TDBResultAnalyser.java b/src/jalview/gui/structurechooser/TDBResultAnalyser.java new file mode 100644 index 0000000..f73f397 --- /dev/null +++ b/src/jalview/gui/structurechooser/TDBResultAnalyser.java @@ -0,0 +1,203 @@ +package jalview.gui.structurechooser; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +import jalview.datamodel.SequenceI; +import jalview.fts.api.FTSData; +import jalview.fts.core.FTSRestRequest; + +public class TDBResultAnalyser +{ + + /** + * model categories - update as needed. warnings output if unknown types + * encountered. + * + * Order denotes 'trust' + */ + private static List EXP_CATEGORIES = Arrays + .asList(new String[] + { "EXPERIMENTALLY DETERMINED", "DEEP LEARNING", + "TEMPLATE-BASED" }); + + private SequenceI seq; + + private Collection collectedResults; + + private FTSRestRequest lastTdbRequest; + + private int idx_ups; + + private int idx_upe; + + private int idx_mcat; + + private int idx_mqual; + + private int idx_resol; + + public TDBResultAnalyser(SequenceI seq, + Collection collectedResults, + FTSRestRequest lastTdbRequest) + { + this.seq = seq; + this.collectedResults = collectedResults; + this.lastTdbRequest = lastTdbRequest; + idx_ups = lastTdbRequest.getFieldIndex("Uniprot Start"); + idx_upe = lastTdbRequest.getFieldIndex("Uniprot End"); + idx_mcat = lastTdbRequest.getFieldIndex("Model Category"); + idx_mqual = lastTdbRequest.getFieldIndex("Qmean"); + idx_resol = lastTdbRequest.getFieldIndex("Resolution"); + } + private final int scoreCategory(String cat) + { + // TODO: make quicker + int idx = EXP_CATEGORIES.indexOf(cat.toUpperCase()); + if (idx == -1) + { + System.out.println("Unknown category: '" + cat + "'"); + } + return -EXP_CATEGORIES.size() - idx; + } + + /** + * sorts records discovered by 3D beacons and excludes any that don't + * intersect with the sequence's start/end rage + * + * @return + */ + public List getFilteredResponse() + { + List filteredResponse = new ArrayList(); + + // ignore anything outside the sequence region + for (FTSData row : collectedResults) + { + int up_s = (Integer) row.getSummaryData()[idx_ups]; + int up_e = (Integer) row.getSummaryData()[idx_upe]; + + if (seq == row.getSummaryData()[0] && up_e > seq.getStart() + && up_s < seq.getEnd()) + { + filteredResponse.add(row); + } + } + // sort according to decreasing length, + // increasing start + Collections.sort(filteredResponse, new Comparator() + { + + + @Override + public int compare(FTSData o1, FTSData o2) + { + Object[] o1data = o1.getSummaryData(); + Object[] o2data = o2.getSummaryData(); + int o1_s = (Integer) o1data[idx_ups]; + int o1_e = (Integer) o1data[idx_upe]; + int o1_cat = scoreCategory((String) o1data[idx_mcat]); + int o2_s = (Integer) o2data[idx_ups]; + int o2_e = (Integer) o2data[idx_upe]; + int o2_cat = scoreCategory((String) o2data[idx_mcat]); + + if (o1_cat == o2_cat) + { + if (o1_s == o2_s) + { + int o1_xtent = o1_e - o1_s; + int o2_xtent = o2_e - o2_s; + if (o1_xtent == o2_xtent) + { + if (o1_cat == scoreCategory(EXP_CATEGORIES.get(0))) + { + // experimental structures, so rank on quality + double o1_res = (Double) o1data[idx_resol]; + double o2_res = (Double) o2data[idx_resol]; + return (o2_res < o1_res) ? 1 : (o2_res == o1_res) ? 0 : -1; + } + else + { + // models, so rank on qmean + float o1_mq = (Float) o1data[idx_mqual]; + float o2_mq = (Float) o2data[idx_mqual]; + return (o2_mq < o1_mq) ? 1 : (o2_mq == o1_mq) ? 0 : -1; + } + } + else + { + return o1_xtent - o2_xtent; + } + } + else + { + return o1_s - o2_s; + } + } + else + { + return o2_cat - o1_cat; + } + } + + @Override + public boolean equals(Object obj) + { + return super.equals(obj); + } + }); + return filteredResponse; + } + + /** + * return list of structures to be marked as selected for this sequence according to given criteria + * @param filteredStructures - sorted, filtered structures from getFilteredResponse + * + */ + public List selectStructures(List filteredStructures) + { + List selected = new ArrayList(); + BitSet cover = new BitSet(); + cover.set(seq.getStart(),seq.getEnd()); + // walk down the list of structures, selecting some to add to selected + for (FTSData structure:filteredStructures) + { + Object[] odata=structure.getSummaryData(); + int o1_s = (Integer) odata[idx_ups]; + int o1_e = (Integer) odata[idx_upe]; + int o1_cat = scoreCategory((String) odata[idx_mcat]); + BitSet scover = new BitSet(); + // measure intersection + scover.set(o1_s,o1_e); + scover.and(cover); + if (scover.cardinality()>4) + { + selected.add(structure); + // clear the range covered by this structure + cover.andNot(scover); + } + } + // final step is to sort on length - this might help the superposition process + Collections.sort(selected,new Comparator() + { + @Override + public int compare(FTSData o1, FTSData o2) + { + Object[] o1data = o1.getSummaryData(); + Object[] o2data = o2.getSummaryData(); + int o1_xt = ((Integer) o1data[idx_upe]) - ((Integer) o1data[idx_ups]); + int o1_cat = scoreCategory((String) o1data[idx_mcat]); + int o2_xt = ((Integer) o2data[idx_upe]-(Integer) o2data[idx_ups]); + int o2_cat = scoreCategory((String) o2data[idx_mcat]); + return o2_xt-o1_xt; + } + }); + return selected; + } + +} diff --git a/src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java b/src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java index 1c04d3a..e5f96be 100644 --- a/src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java +++ b/src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java @@ -201,16 +201,6 @@ public class ThreeDBStructureChooserQuerySource } /** - * model categories - update as needed. warnings output if unknown types - * encountered. - * - * Order denotes 'trust' - */ - private static List EXP_CATEGORIES = Arrays - .asList(new String[] - { "EXPERIMENTALLY DETERMINED", "DEEP LEARNING", "TEMPLATE-BASED" }); - - /** * FTSRestClient specific query builder to pick top ranked entry from a * fetchStructuresMetaData query * @@ -231,99 +221,16 @@ public class ThreeDBStructureChooserQuerySource boolean b) throws Exception { - List filteredResponse = new ArrayList(); - final int idx_ups = lastTdbRequest.getFieldIndex("Uniprot Start"); - final int idx_upe = lastTdbRequest.getFieldIndex("Uniprot End"); - final int idx_mcat = lastTdbRequest.getFieldIndex("Model Category"); - final int idx_mqual = lastTdbRequest.getFieldIndex("Qmean"); - final int idx_resol = lastTdbRequest.getFieldIndex("Resolution"); - - // ignore anything outside the sequence region - for (FTSData row : collectedResults) - { - int up_s = (Integer) row.getSummaryData()[idx_ups]; - int up_e = (Integer) row.getSummaryData()[idx_upe]; - - if (seq == row.getSummaryData()[0] && up_e > seq.getStart() - && up_s < seq.getEnd()) - { - filteredResponse.add(row); - } - } - // sort according to decreasing length, - // increasing start - Collections.sort(filteredResponse, new Comparator() - { - - private final int scoreCategory(String cat) - { - // TODO: make quicker - int idx = EXP_CATEGORIES.indexOf(cat.toUpperCase()); - if (idx == -1) - { - System.out.println("Unknown category: '" + cat + "'"); - } - return -EXP_CATEGORIES.size() - idx; - } - - @Override - public int compare(FTSData o1, FTSData o2) - { - int o1_s = (Integer) o1.getSummaryData()[idx_ups]; - int o1_e = (Integer) o1.getSummaryData()[idx_upe]; - int o1_cat = scoreCategory((String) o1.getSummaryData()[idx_mcat]); - int o2_s = (Integer) o2.getSummaryData()[idx_ups]; - int o2_e = (Integer) o2.getSummaryData()[idx_upe]; - int o2_cat = scoreCategory((String) o2.getSummaryData()[idx_mcat]); - - if (o1_cat == o2_cat) - { - if (o1_s == o2_s) - { - int o1_xtent = o1_e - o1_s; - int o2_xtent = o2_e - o2_s; - if (o1_xtent == o2_xtent) - { - if (o1_cat == scoreCategory(EXP_CATEGORIES.get(0))) - { - // experimental structures, so rank on quality - double o1_res = (Double) o1.getSummaryData()[idx_resol]; - double o2_res = (Double) o2.getSummaryData()[idx_resol]; - return (o2_res < o1_res) ? 1 : (o2_res == o1_res) ? 0 : -1; - } - else - { - // models, so rank on qmean - float o1_mq = (Float) o1.getSummaryData()[idx_mqual]; - float o2_mq = (Float) o2.getSummaryData()[idx_mqual]; - return (o2_mq < o1_mq) ? 1 : (o2_mq == o1_mq) ? 0 : -1; - } - } - else - { - return o1_xtent - o2_xtent; - } - } - else - { - return o1_s - o2_s; - } - } - else - { - return o2_cat - o1_cat; - } - } - - @Override - public boolean equals(Object obj) - { - return super.equals(obj); - } - }); + TDBResultAnalyser analyser= new TDBResultAnalyser(seq,collectedResults, lastTdbRequest); + + FTSRestResponse resultList = new FTSRestResponse(); - resultList.setNumberOfItemsFound(filteredResponse.size()); - resultList.setSearchSummary(filteredResponse); + + List filteredResponse = analyser.getFilteredResponse(); + + List selectedStructures = analyser.selectStructures(filteredResponse); + resultList.setNumberOfItemsFound(selectedStructures.size()); + resultList.setSearchSummary(selectedStructures); return resultList; } @@ -340,8 +247,30 @@ public class ThreeDBStructureChooserQuerySource int typeColumnIndex = restable.getColumn("Provider").getModelIndex(); int categoryColumnIndex = restable.getColumn("Model Category") .getModelIndex(); + final int up_start_idx = restable.getColumn("Uniprot Start").getModelIndex(); + final int up_end_idx = restable.getColumn("Uniprot End").getModelIndex(); + int i=0; + + // bleugh! + Integer[] sellist = new Integer[selectedRows.length]; + for (Integer row: selectedRows) + { + sellist[i++] = row; + } + // Sort rows by coverage + Arrays.sort(sellist,new Comparator() + { + @Override + public int compare(Integer o1, Integer o2) + { + int o1_xt = ((Integer)restable.getValueAt(o1, up_end_idx)) - (Integer)restable.getValueAt(o1, up_start_idx); + int o2_xt = ((Integer)restable.getValueAt(o2, up_end_idx)) - (Integer)restable.getValueAt(o2, up_start_idx); + return o2_xt-o1_xt; + } + }); - for (int row : selectedRows) + + for (int row : sellist) { // unique id - could be a horrible hash -- 1.7.10.2