From c689b58a9aaa704f5e5160e44da5b84e8984d6a0 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 19 May 2017 11:47:16 +0100 Subject: [PATCH] JAL-2490 performant lookup of features for sorting --- resources/lang/Messages.properties | 1 - resources/lang/Messages_es.properties | 1 - src/jalview/analysis/AlignmentSorter.java | 245 +++++++++++------------- test/jalview/analysis/AlignmentSorterTest.java | 8 +- 4 files changed, 118 insertions(+), 137 deletions(-) diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties index f6eeb26..4772d55 100644 --- a/resources/lang/Messages.properties +++ b/resources/lang/Messages.properties @@ -914,7 +914,6 @@ label.as_percentage = As Percentage error.not_implemented = Not implemented error.no_such_method_as_clone1_for = No such method as clone1 for {0} error.null_from_clone1 = Null from clone1! -error.implementation_error_sortbyfeature = Implementation Error - sortByFeature method must be one of FEATURE_SCORE, FEATURE_LABEL or FEATURE_DENSITY. error.not_yet_implemented = Not yet implemented error.unknown_type_dna_or_pep = Unknown Type {0} - dna or pep are the only allowed values. error.implementation_error_dont_know_threshold_annotationcolourgradient = Implementation error: don't know about threshold setting for current AnnotationColourGradient. diff --git a/resources/lang/Messages_es.properties b/resources/lang/Messages_es.properties index ad4d2c4..dd5ba99 100644 --- a/resources/lang/Messages_es.properties +++ b/resources/lang/Messages_es.properties @@ -839,7 +839,6 @@ label.as_percentage = Como Porcentaje error.not_implemented = No implementado error.no_such_method_as_clone1_for = No existe ese método como un clone1 de {0} error.null_from_clone1 = Nulo de clone1! -error.implementation_error_sortbyfeature = Error de implementación - sortByFeature debe ser uno de FEATURE_SCORE, FEATURE_LABEL o FEATURE_DENSITY. error.not_yet_implemented = No se ha implementado todavía error.unknown_type_dna_or_pep = Tipo desconocido {0} - dna o pep son los únicos valores permitidos error.implementation_error_dont_know_threshold_annotationcolourgradient = Error de implementación: no se conoce el valor umbral para el AnnotationColourGradient actual. diff --git a/src/jalview/analysis/AlignmentSorter.java b/src/jalview/analysis/AlignmentSorter.java index cc4c469..681d3b7 100755 --- a/src/jalview/analysis/AlignmentSorter.java +++ b/src/jalview/analysis/AlignmentSorter.java @@ -29,10 +29,11 @@ import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; import jalview.datamodel.SequenceNode; -import jalview.util.MessageManager; import jalview.util.QuickSort; import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; import java.util.List; /** @@ -52,7 +53,7 @@ import java.util.List; */ public class AlignmentSorter { - /** + /* * todo: refactor searches to follow a basic pattern: (search property, last * search state, current sort direction) */ @@ -70,19 +71,18 @@ public class AlignmentSorter static boolean sortTreeAscending = true; - /** - * last Annotation Label used by sortByScore + /* + * last Annotation Label used for sort by Annotation score */ - private static String lastSortByScore; - - private static boolean sortByScoreAscending = true; + private static String lastSortByAnnotation; - /** - * compact representation of last arguments to SortByFeatureScore + /* + * string hash of last arguments to sortByFeature + * (sort order toggles if this is unchanged between sorts) */ - private static String lastSortByFeatureScore; + private static String sortByFeatureCriteria; - private static boolean sortByFeatureScoreAscending = true; + private static boolean sortByFeatureAscending = true; private static boolean sortLengthAscending; @@ -658,9 +658,9 @@ public class AlignmentSorter } jalview.util.QuickSort.sort(scores, seqs); - if (lastSortByScore != scoreLabel) + if (lastSortByAnnotation != scoreLabel) { - lastSortByScore = scoreLabel; + lastSortByAnnotation = scoreLabel; setOrder(alignment, seqs); } else @@ -709,64 +709,34 @@ public class AlignmentSorter * If the sort is repeated for the same combination of types and groups, sort * order is reversed. * - * @param featureLabels + * @param featureTypes * a list of feature types to include (or null for all) - * @param groupLabels + * @param groups * a list of feature groups to include (or null for all) - * @param start + * @param startCol * start column position to include (base zero) - * @param stop + * @param endCol * end column position to include (base zero) * @param alignment * the alignment to be sorted * @param method * either "average_score" or "density" ("text" not yet implemented) */ - public static void sortByFeature(List featureLabels, - List groupLabels, int start, int stop, + public static void sortByFeature(List featureTypes, + List groups, final int startCol, final int endCol, AlignmentI alignment, String method) { if (method != FEATURE_SCORE && method != FEATURE_LABEL && method != FEATURE_DENSITY) { - throw new Error( - MessageManager - .getString("error.implementation_error_sortbyfeature")); + String msg = String + .format("Implementation Error - sortByFeature method must be either '%s' or '%s'", + FEATURE_SCORE, FEATURE_DENSITY); + System.err.println(msg); + return; } - boolean ignoreScore = method != FEATURE_SCORE; - StringBuffer scoreLabel = new StringBuffer(); - scoreLabel.append(start + stop + method); - // This doesn't quite work yet - we'd like to have a canonical ordering that - // can be preserved from call to call - if (featureLabels != null) - { - for (String label : featureLabels) - { - scoreLabel.append(label); - } - } - if (groupLabels != null) - { - for (String label : groupLabels) - { - scoreLabel.append(label); - } - } - - /* - * if resorting the same feature, toggle sort order - */ - if (lastSortByFeatureScore == null - || !scoreLabel.toString().equals(lastSortByFeatureScore)) - { - sortByFeatureScoreAscending = true; - } - else - { - sortByFeatureScoreAscending = !sortByFeatureScoreAscending; - } - lastSortByFeatureScore = scoreLabel.toString(); + flipFeatureSortIfUnchanged(method, featureTypes, groups, startCol, endCol); SequenceI[] seqs = alignment.getSequencesArray(); @@ -775,52 +745,42 @@ public class AlignmentSorter int hasScores = 0; // number of scores present on set double[] scores = new double[seqs.length]; int[] seqScores = new int[seqs.length]; - Object[] feats = new Object[seqs.length]; - double min = 0, max = 0; + Object[][] feats = new Object[seqs.length][]; + double min = 0d; + double max = 0d; + for (int i = 0; i < seqs.length; i++) { - SequenceFeature[] sf = seqs[i].getSequenceFeatures(); - if (sf == null) - { - sf = new SequenceFeature[0]; - } - else - { - SequenceFeature[] tmp = new SequenceFeature[sf.length]; - for (int s = 0; s < tmp.length; s++) - { - tmp[s] = sf[s]; - } - sf = tmp; - } - int sstart = (start == -1) ? start : seqs[i].findPosition(start); - int sstop = (stop == -1) ? stop : seqs[i].findPosition(stop); + /* + * get sequence residues overlapping column region + * and features for residue positions and specified types + */ + // TODO new method findPositions(startCol, endCol)? JAL-2544 + int startResidue = seqs[i].findPosition(startCol); + int endResidue = seqs[i].findPosition(endCol); + String[] types = featureTypes == null ? null : featureTypes + .toArray(new String[featureTypes.size()]); + List sfs = seqs[i].getFeatures().findFeatures( + startResidue, endResidue, types); + seqScores[i] = 0; scores[i] = 0.0; - int n = sf.length; - for (int f = 0; f < sf.length; f++) + + Iterator it = sfs.listIterator(); + while (it.hasNext()) { - // filter for selection criteria - if ( - // ignore features outwith alignment start-stop positions. - (sf[f].end < sstart || sf[f].begin > sstop) || - // or ignore based on selection criteria - (featureLabels != null && !AlignmentSorter - .containsIgnoreCase(sf[f].type, featureLabels)) - || (groupLabels != null - // problem here: we cannot eliminate null feature group features - && (sf[f].getFeatureGroup() != null && !AlignmentSorter - .containsIgnoreCase(sf[f].getFeatureGroup(), - groupLabels)))) + SequenceFeature sf = it.next(); + + String featureGroup = sf.getFeatureGroup(); + if (groups != null && featureGroup != null + && !groups.contains(featureGroup)) { - // forget about this feature - sf[f] = null; - n--; + it.remove(); } else { - // or, also take a look at the scores if necessary. - if (!ignoreScore && !Float.isNaN(sf[f].getScore())) + float score = sf.getScore(); + if (FEATURE_SCORE.equals(method) && !Float.isNaN(score)) { if (seqScores[i] == 0) { @@ -828,33 +788,26 @@ public class AlignmentSorter } seqScores[i]++; hasScore[i] = true; - scores[i] += sf[f].getScore(); // take the first instance of this - // score. + scores[i] += score; + // take the first instance of this score // ?? } } } - SequenceFeature[] fs; - feats[i] = fs = new SequenceFeature[n]; - if (n > 0) + + feats[i] = sfs.toArray(new SequenceFeature[sfs.size()]); + if (!sfs.isEmpty()) { - n = 0; - for (int f = 0; f < sf.length; f++) - { - if (sf[f] != null) - { - ((SequenceFeature[]) feats[i])[n++] = sf[f]; - } - } if (method == FEATURE_LABEL) { - // order the labels by alphabet - String[] labs = new String[fs.length]; - for (int l = 0; l < labs.length; l++) + // order the labels by alphabet (not yet implemented) + String[] labs = new String[sfs.size()]; + for (int l = 0; l < sfs.size(); l++) { - labs[l] = (fs[l].getDescription() != null ? fs[l] - .getDescription() : fs[l].getType()); + SequenceFeature sf = sfs.get(l); + String description = sf.getDescription(); + labs[l] = (description != null ? description : sf.getType()); } - QuickSort.sort(labs, ((Object[]) feats[i])); + QuickSort.sort(labs, feats[i]); } } if (hasScore[i]) @@ -864,23 +817,18 @@ public class AlignmentSorter // update the score bounds. if (hasScores == 1) { - max = min = scores[i]; + min = scores[i]; + max = min; } else { - if (max < scores[i]) - { - max = scores[i]; - } - if (min > scores[i]) - { - min = scores[i]; - } + max = Math.max(max, scores[i]); + min = Math.min(min, scores[i]); } } } - if (method == FEATURE_SCORE) + if (FEATURE_SCORE.equals(method)) { if (hasScores == 0) { @@ -905,9 +853,9 @@ public class AlignmentSorter } } } - QuickSort.sortByDouble(scores, seqs, sortByFeatureScoreAscending); + QuickSort.sortByDouble(scores, seqs, sortByFeatureAscending); } - else if (method == FEATURE_DENSITY) + else if (FEATURE_DENSITY.equals(method)) { for (int i = 0; i < seqs.length; i++) { @@ -917,18 +865,53 @@ public class AlignmentSorter // System.err.println("Sorting on Density: seq "+seqs[i].getName()+ // " Feats: "+featureCount+" Score : "+scores[i]); } - QuickSort.sortByDouble(scores, seqs, sortByFeatureScoreAscending); + QuickSort.sortByDouble(scores, seqs, sortByFeatureAscending); } - else + + setOrder(alignment, seqs); + } + + /** + * Builds a string hash of criteria for sorting, and if unchanged from last + * time, reverse the sort order + * + * @param method + * @param featureTypes + * @param groups + * @param startCol + * @param endCol + */ + protected static void flipFeatureSortIfUnchanged(String method, + List featureTypes, List groups, + final int startCol, final int endCol) + { + StringBuilder sb = new StringBuilder(64); + sb.append(startCol).append(method).append(endCol); + if (featureTypes != null) { - if (method == FEATURE_LABEL) - { - throw new Error( - MessageManager.getString("error.not_yet_implemented")); - } + Collections.sort(featureTypes); + sb.append(featureTypes.toString()); + } + if (groups != null) + { + Collections.sort(groups); + sb.append(groups.toString()); } + String scoreCriteria = sb.toString(); - setOrder(alignment, seqs); + /* + * if resorting on the same criteria, toggle sort order + */ + if (sortByFeatureCriteria == null + || !scoreCriteria.equals(sortByFeatureCriteria)) + { + sortByFeatureAscending = true; + } + else + { + sortByFeatureAscending = !sortByFeatureAscending; + } + sortByFeatureCriteria = scoreCriteria; } } diff --git a/test/jalview/analysis/AlignmentSorterTest.java b/test/jalview/analysis/AlignmentSorterTest.java index 0255f66..3b9be23 100644 --- a/test/jalview/analysis/AlignmentSorterTest.java +++ b/test/jalview/analysis/AlignmentSorterTest.java @@ -31,7 +31,7 @@ public class AlignmentSorterTest /* * sort with no score features does nothing */ - PA.setValue(AlignmentSorter.class, "lastSortByFeatureScore", null); + PA.setValue(AlignmentSorter.class, "sortByFeatureCriteria", null); AlignmentSorter.sortByFeature(null, null, 0, al.getWidth(), al, AlignmentSorter.FEATURE_SCORE); @@ -62,9 +62,9 @@ public class AlignmentSorterTest /* * sort by ascending score, no filter on feature type or group - * NB sort order for the same feature set (none) is toggled so descending + * NB sort order for the same feature set (none) gets toggled, so descending */ - PA.setValue(AlignmentSorter.class, "sortByFeatureScoreAscending", true); + PA.setValue(AlignmentSorter.class, "sortByFeatureAscending", true); AlignmentSorter.sortByFeature(null, null, 0, al.getWidth(), al, AlignmentSorter.FEATURE_SCORE); assertSame(al.getSequenceAt(3), seq3); // -0.5 @@ -114,7 +114,7 @@ public class AlignmentSorterTest * seq1 is now 2.0, seq3 is now -4 */ // fails because seq1.findPosition(4) returns 4 - // although residue 4 is in column 5! + // although residue 4 is in column 5! - JAL-2544 AlignmentSorter.sortByFeature(null, null, 0, 4, al, AlignmentSorter.FEATURE_SCORE); assertSame(al.getSequenceAt(0), seq3); // -4 -- 1.7.10.2