X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAlignmentSorter.java;h=81bddc2ebd637e7908687360a56e373447ce83e1;hb=57738a1f3c19b1c3a00bd3ac5108f8cd0af32f99;hp=fe2cfc7c7dfb70d5861f72c2dbc52831a3e3c743;hpb=506d60f0e188723ddc91c26824b41ac7034df3fe;p=jalview.git diff --git a/src/jalview/analysis/AlignmentSorter.java b/src/jalview/analysis/AlignmentSorter.java index fe2cfc7..81bddc2 100755 --- a/src/jalview/analysis/AlignmentSorter.java +++ b/src/jalview/analysis/AlignmentSorter.java @@ -1,27 +1,40 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.4) - * Copyright (C) 2008 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. + * This file is part of Jalview. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.analysis; -import java.util.*; - -import jalview.datamodel.*; -import jalview.util.*; +import jalview.analysis.scoremodels.PIDModel; +import jalview.analysis.scoremodels.SimilarityParams; +import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.AlignmentOrder; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceGroup; +import jalview.datamodel.SequenceI; +import jalview.datamodel.SequenceNode; +import jalview.util.QuickSort; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; /** * Routines for manipulating the order of a multiple sequence alignment TODO: @@ -40,6 +53,10 @@ import jalview.util.*; */ public class AlignmentSorter { + /* + * todo: refactor searches to follow a basic pattern: (search property, last + * search state, current sort direction) + */ static boolean sortIdAscending = true; static int lastGroupHash = 0; @@ -50,38 +67,54 @@ public class AlignmentSorter static boolean sortOrderAscending = true; - static NJTree lastTree = null; + static TreeModel lastTree = null; static boolean sortTreeAscending = true; - private static String lastSortByScore; + /* + * last Annotation Label used for sort by Annotation score + */ + private static String lastSortByAnnotation; + + /* + * string hash of last arguments to sortByFeature + * (sort order toggles if this is unchanged between sorts) + */ + private static String sortByFeatureCriteria; + + private static boolean sortByFeatureAscending = true; + + private static boolean sortLengthAscending; /** - * Sort by Percentage Identity w.r.t. s + * Sorts sequences in the alignment by Percentage Identity with the given + * reference sequence, sorting the highest identity to the top * * @param align - * AlignmentI + * AlignmentI * @param s - * SequenceI - * @param tosort - * sequences from align that are to be sorted. + * SequenceI + * @param end */ - public static void sortByPID(AlignmentI align, SequenceI s, - SequenceI[] tosort) + public static void sortByPID(AlignmentI align, SequenceI s) { int nSeq = align.getHeight(); float[] scores = new float[nSeq]; SequenceI[] seqs = new SequenceI[nSeq]; + String refSeq = s.getSequenceAsString(); + SimilarityParams pidParams = new SimilarityParams(true, true, true, + true); for (int i = 0; i < nSeq; i++) { - scores[i] = Comparison.PID(align.getSequenceAt(i) - .getSequenceAsString(), s.getSequenceAsString()); + scores[i] = (float) PIDModel.computePID( + align.getSequenceAt(i).getSequenceAsString(), refSeq, + pidParams); seqs[i] = align.getSequenceAt(i); } - QuickSort.sort(scores, 0, scores.length - 1, seqs); + QuickSort.sort(scores, seqs); setReverseOrder(align, seqs); } @@ -90,9 +123,9 @@ public class AlignmentSorter * Reverse the order of the sort * * @param align - * DOCUMENT ME! + * DOCUMENT ME! * @param seqs - * DOCUMENT ME! + * DOCUMENT ME! */ private static void setReverseOrder(AlignmentI align, SequenceI[] seqs) { @@ -110,11 +143,15 @@ public class AlignmentSorter } // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work - for (int i = 0; i < len; i++) + List asq = align.getSequences(); + synchronized (asq) { - // SequenceI tmp = seqs[i]; - align.getSequences().setElementAt(seqs[nSeq - i - 1], i); - align.getSequences().setElementAt(seqs[i], nSeq - i - 1); + for (int i = 0; i < len; i++) + { + // SequenceI tmp = seqs[i]; + asq.set(i, seqs[nSeq - i - 1]); + asq.set(nSeq - i - 1, seqs[i]); + } } } @@ -122,11 +159,11 @@ public class AlignmentSorter * Sets the Alignment object with the given sequences * * @param align - * Alignment object to be updated + * Alignment object to be updated * @param tmp - * sequences as a vector + * sequences as a vector */ - private static void setOrder(AlignmentI align, Vector tmp) + private static void setOrder(AlignmentI align, List tmp) { setOrder(align, vectorSubsetToArray(tmp, align.getSequences())); } @@ -135,38 +172,40 @@ public class AlignmentSorter * Sets the Alignment object with the given sequences * * @param align - * DOCUMENT ME! + * DOCUMENT ME! * @param seqs - * sequences as an array + * sequences as an array */ public static void setOrder(AlignmentI align, SequenceI[] seqs) { // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work - Vector algn = align.getSequences(); - Vector tmp = new Vector(); - - for (int i = 0; i < seqs.length; i++) + List algn = align.getSequences(); + synchronized (algn) { - if (algn.contains(seqs[i])) + List tmp = new ArrayList<>(); + + for (int i = 0; i < seqs.length; i++) { - tmp.addElement(seqs[i]); + if (algn.contains(seqs[i])) + { + tmp.add(seqs[i]); + } } - } - algn.removeAllElements(); - // User may have hidden seqs, then clicked undo or redo - for (int i = 0; i < tmp.size(); i++) - { - algn.addElement(tmp.elementAt(i)); + algn.clear(); + // User may have hidden seqs, then clicked undo or redo + for (int i = 0; i < tmp.size(); i++) + { + algn.add(tmp.get(i)); + } } - } /** * Sorts by ID. Numbers are sorted before letters. * * @param align - * The alignment object to sort + * The alignment object to sort */ public static void sortByID(AlignmentI align) { @@ -196,18 +235,51 @@ public class AlignmentSorter } /** + * Sorts by sequence length + * + * @param align + * The alignment object to sort + */ + public static void sortByLength(AlignmentI align) + { + int nSeq = align.getHeight(); + + float[] length = new float[nSeq]; + SequenceI[] seqs = new SequenceI[nSeq]; + + for (int i = 0; i < nSeq; i++) + { + seqs[i] = align.getSequenceAt(i); + length[i] = (seqs[i].getEnd() - seqs[i].getStart()); + } + + QuickSort.sort(length, seqs); + + if (sortLengthAscending) + { + setReverseOrder(align, seqs); + } + else + { + setOrder(align, seqs); + } + + sortLengthAscending = !sortLengthAscending; + } + + /** * Sorts the alignment by size of group.
* Maintains the order of sequences in each group by order in given alignment * object. * * @param align - * sorts the given alignment object by group + * sorts the given alignment object by group */ public static void sortByGroup(AlignmentI align) { // MAINTAINS ORIGNAL SEQUENCE ORDER, // ORDERS BY GROUP SIZE - Vector groups = new Vector(); + List groups = new ArrayList<>(); if (groups.hashCode() != lastGroupHash) { @@ -221,17 +293,15 @@ public class AlignmentSorter // SORTS GROUPS BY SIZE // //////////////////// - for (int i = 0; i < align.getGroups().size(); i++) + for (SequenceGroup sg : align.getGroups()) { - SequenceGroup sg = (SequenceGroup) align.getGroups().elementAt(i); - for (int j = 0; j < groups.size(); j++) { - SequenceGroup sg2 = (SequenceGroup) groups.elementAt(j); + SequenceGroup sg2 = groups.get(j); if (sg.getSize() > sg2.getSize()) { - groups.insertElementAt(sg, j); + groups.add(j, sg); break; } @@ -239,22 +309,22 @@ public class AlignmentSorter if (!groups.contains(sg)) { - groups.addElement(sg); + groups.add(sg); } } // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER // ///////////////////////////////////////////// - Vector seqs = new Vector(); + List seqs = new ArrayList<>(); for (int i = 0; i < groups.size(); i++) { - SequenceGroup sg = (SequenceGroup) groups.elementAt(i); + SequenceGroup sg = groups.get(i); SequenceI[] orderedseqs = sg.getSequencesInOrder(align); for (int j = 0; j < orderedseqs.length; j++) { - seqs.addElement(orderedseqs[j]); + seqs.add(orderedseqs[j]); } } @@ -270,39 +340,25 @@ public class AlignmentSorter } /** - * Converts Vector to array. java 1.18 does not have Vector.toArray() + * Select sequences in order from tmp that is present in mask, and any + * remaining sequences in mask not in tmp * * @param tmp - * Vector of SequenceI objects - * - * @return array of Sequence[] - */ - private static SequenceI[] vectorToArray(Vector tmp) - { - SequenceI[] seqs = new SequenceI[tmp.size()]; - - for (int i = 0; i < tmp.size(); i++) - { - seqs[i] = (SequenceI) tmp.elementAt(i); - } - - return seqs; - } - - /** - * DOCUMENT ME! - * - * @param tmp - * DOCUMENT ME! + * thread safe collection of sequences * @param mask - * DOCUMENT ME! + * thread safe collection of sequences * - * @return DOCUMENT ME! + * @return intersect(tmp,mask)+intersect(complement(tmp),mask) */ - private static SequenceI[] vectorSubsetToArray(Vector tmp, Vector mask) + private static SequenceI[] vectorSubsetToArray(List tmp, + List mask) { - Vector seqs = new Vector(); - int i; + // or? + // tmp2 = tmp.retainAll(mask); + // return tmp2.addAll(mask.removeAll(tmp2)) + + ArrayList seqs = new ArrayList<>(); + int i, idx; boolean[] tmask = new boolean[mask.size()]; for (i = 0; i < mask.size(); i++) @@ -312,12 +368,12 @@ public class AlignmentSorter for (i = 0; i < tmp.size(); i++) { - Object sq = tmp.elementAt(i); - - if (mask.contains(sq) && tmask[mask.indexOf(sq)]) + SequenceI sq = tmp.get(i); + idx = mask.indexOf(sq); + if (idx > -1 && tmask[idx]) { - tmask[mask.indexOf(sq)] = false; - seqs.addElement(sq); + tmask[idx] = false; + seqs.add(sq); } } @@ -325,25 +381,25 @@ public class AlignmentSorter { if (tmask[i]) { - seqs.addElement(mask.elementAt(i)); + seqs.add(mask.get(i)); } } - return vectorToArray(seqs); + return seqs.toArray(new SequenceI[seqs.size()]); } /** * Sorts by a given AlignmentOrder object * * @param align - * Alignment to order + * Alignment to order * @param order - * specified order for alignment + * specified order for alignment */ public static void sortBy(AlignmentI align, AlignmentOrder order) { // Get an ordered vector of sequences which may also be present in align - Vector tmp = order.getOrder(); + List tmp = order.getOrder(); if (lastOrder == order) { @@ -360,7 +416,8 @@ public class AlignmentSorter } else { - setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences())); + setReverseOrder(align, + vectorSubsetToArray(tmp, align.getSequences())); } } @@ -368,17 +425,18 @@ public class AlignmentSorter * DOCUMENT ME! * * @param align - * alignment to order + * alignment to order * @param tree - * tree which has + * tree which has * * @return DOCUMENT ME! */ - private static Vector getOrderByTree(AlignmentI align, NJTree tree) + private static List getOrderByTree(AlignmentI align, + TreeModel tree) { int nSeq = align.getHeight(); - Vector tmp = new Vector(); + List tmp = new ArrayList<>(); tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences()); @@ -387,15 +445,16 @@ public class AlignmentSorter // TODO: JBPNote - decide if this is always an error // (eg. not when a tree is associated to another alignment which has more // sequences) - if (tmp.size() < nSeq) + if (tmp.size() != nSeq) { addStrays(align, tmp); } if (tmp.size() != nSeq) { - System.err.println("ERROR: tmp.size()=" + tmp.size() + " != nseq=" - + nSeq + " in getOrderByTree"); + System.err.println("WARNING: tmp.size()=" + tmp.size() + " != nseq=" + + nSeq + + " in getOrderByTree - tree contains sequences not in alignment"); } } @@ -406,13 +465,13 @@ public class AlignmentSorter * Sorts the alignment by a given tree * * @param align - * alignment to order + * alignment to order * @param tree - * tree which has + * tree which has */ - public static void sortByTree(AlignmentI align, NJTree tree) + public static void sortByTree(AlignmentI align, TreeModel tree) { - Vector tmp = getOrderByTree(align, tree); + List tmp = getOrderByTree(align, tree); // tmp should properly permute align with tree. if (lastTree != tree) @@ -431,7 +490,8 @@ public class AlignmentSorter } else { - setReverseOrder(align, vectorSubsetToArray(tmp, align.getSequences())); + setReverseOrder(align, + vectorSubsetToArray(tmp, align.getSequences())); } } @@ -439,23 +499,23 @@ public class AlignmentSorter * DOCUMENT ME! * * @param align - * DOCUMENT ME! - * @param seqs - * DOCUMENT ME! + * DOCUMENT ME! + * @param tmp + * DOCUMENT ME! */ - private static void addStrays(AlignmentI align, Vector seqs) + private static void addStrays(AlignmentI align, List tmp) { int nSeq = align.getHeight(); for (int i = 0; i < nSeq; i++) { - if (!seqs.contains(align.getSequenceAt(i))) + if (!tmp.contains(align.getSequenceAt(i))) { - seqs.addElement(align.getSequenceAt(i)); + tmp.add(align.getSequenceAt(i)); } } - if (nSeq != seqs.size()) + if (nSeq != tmp.size()) { System.err .println("ERROR: Size still not right even after addStrays"); @@ -466,16 +526,16 @@ public class AlignmentSorter * DOCUMENT ME! * * @param node - * DOCUMENT ME! + * DOCUMENT ME! * @param tmp - * DOCUMENT ME! + * DOCUMENT ME! * @param seqset - * DOCUMENT ME! + * DOCUMENT ME! * * @return DOCUMENT ME! */ - private static Vector _sortByTree(SequenceNode node, Vector tmp, - Vector seqset) + private static List _sortByTree(SequenceNode node, + List tmp, List seqset) { if (node == null) { @@ -491,9 +551,11 @@ public class AlignmentSorter { if (node.element() instanceof SequenceI) { - if (!tmp.contains(node.element())) + if (!tmp.contains(node.element())) // && (seqset==null || + // seqset.size()==0 || + // seqset.contains(tmp))) { - tmp.addElement((SequenceI) node.element()); + tmp.add((SequenceI) node.element()); } } } @@ -524,7 +586,7 @@ public class AlignmentSorter for (int i = 0; i < alignment.length; i++) { - ids[i] = (new Float(alignment[i].getName().substring(8))) + ids[i] = (Float.valueOf(alignment[i].getName().substring(8))) .floatValue(); } @@ -536,17 +598,17 @@ public class AlignmentSorter * particular scoreLabel. Or reverse if same label was used previously * * @param scoreLabel - * exact label for sequence associated AlignmentAnnotation - * scores to use for sorting. + * exact label for sequence associated AlignmentAnnotation scores to + * use for sorting. * @param alignment - * sequences to be sorted + * sequences to be sorted */ public static void sortByAnnotationScore(String scoreLabel, AlignmentI alignment) { SequenceI[] seqs = alignment.getSequencesArray(); boolean[] hasScore = new boolean[seqs.length]; // per sequence score - // presence + // presence int hasScores = 0; // number of scores present on set double[] scores = new double[seqs.length]; double min = 0, max = 0; @@ -558,7 +620,7 @@ public class AlignmentSorter hasScores++; hasScore[i] = true; scores[i] = scoreAnn[0].getScore(); // take the first instance of this - // score. + // score. if (hasScores == 1) { max = min = scores[i]; @@ -590,15 +652,15 @@ public class AlignmentSorter { if (!hasScore[i]) { - scores[i] = (max + i); + scores[i] = (max + i + 1.0); } } } jalview.util.QuickSort.sort(scores, seqs); - if (lastSortByScore != scoreLabel) + if (lastSortByAnnotation != scoreLabel) { - lastSortByScore = scoreLabel; + lastSortByAnnotation = scoreLabel; setOrder(alignment, seqs); } else @@ -606,4 +668,232 @@ public class AlignmentSorter setReverseOrder(alignment, seqs); } } + + /** + * types of feature ordering: Sort by score : average score - or total score - + * over all features in region Sort by feature label text: (or if null - + * feature type text) - numerical or alphabetical Sort by feature density: + * based on counts - ignoring individual text or scores for each feature + */ + public static String FEATURE_SCORE = "average_score"; + + public static String FEATURE_LABEL = "text"; + + public static String FEATURE_DENSITY = "density"; + + /** + * Sort sequences by feature score or density, optionally restricted by + * feature types, feature groups, or alignment start/end positions. + *

+ * If the sort is repeated for the same combination of types and groups, sort + * order is reversed. + * + * @param featureTypes + * a list of feature types to include (or null for all) + * @param groups + * a list of feature groups to include (or null for all) + * @param startCol + * start column position to include (base zero) + * @param endCol + * end column position to include (base zero) + * @param alignment + * the alignment to be sorted + * @param method + * either "average_score" or "density" ("text" not yet implemented) + */ + public static void sortByFeature(List featureTypes, + List groups, final int startCol, final int endCol, + AlignmentI alignment, String method) + { + if (method != FEATURE_SCORE && method != FEATURE_LABEL + && method != FEATURE_DENSITY) + { + String msg = String.format( + "Implementation Error - sortByFeature method must be either '%s' or '%s'", + FEATURE_SCORE, FEATURE_DENSITY); + System.err.println(msg); + return; + } + + flipFeatureSortIfUnchanged(method, featureTypes, groups, startCol, + endCol); + + SequenceI[] seqs = alignment.getSequencesArray(); + + boolean[] hasScore = new boolean[seqs.length]; // per sequence score + // presence + int hasScores = 0; // number of scores present on set + double[] scores = new double[seqs.length]; + int[] seqScores = new int[seqs.length]; + Object[][] feats = new Object[seqs.length][]; + double min = 0d; + double max = 0d; + + for (int i = 0; i < seqs.length; i++) + { + /* + * get sequence residues overlapping column region + * and features for residue positions and specified types + */ + String[] types = featureTypes == null ? null + : featureTypes.toArray(new String[featureTypes.size()]); + List sfs = seqs[i].findFeatures(startCol + 1, + endCol + 1, types); + + seqScores[i] = 0; + scores[i] = 0.0; + + Iterator it = sfs.listIterator(); + while (it.hasNext()) + { + SequenceFeature sf = it.next(); + + /* + * accept all features with null or empty group, otherwise + * check group is one of the currently visible groups + */ + String featureGroup = sf.getFeatureGroup(); + if (groups != null && featureGroup != null + && !"".equals(featureGroup) + && !groups.contains(featureGroup)) + { + it.remove(); + } + else + { + float score = sf.getScore(); + if (FEATURE_SCORE.equals(method) && !Float.isNaN(score)) + { + if (seqScores[i] == 0) + { + hasScores++; + } + seqScores[i]++; + hasScore[i] = true; + scores[i] += score; + // take the first instance of this score // ?? + } + } + } + + feats[i] = sfs.toArray(new SequenceFeature[sfs.size()]); + if (!sfs.isEmpty()) + { + if (method == FEATURE_LABEL) + { + // order the labels by alphabet (not yet implemented) + String[] labs = new String[sfs.size()]; + for (int l = 0; l < sfs.size(); l++) + { + SequenceFeature sf = sfs.get(l); + String description = sf.getDescription(); + labs[l] = (description != null ? description : sf.getType()); + } + QuickSort.sort(labs, feats[i]); + } + } + if (hasScore[i]) + { + // compute average score + scores[i] /= seqScores[i]; + // update the score bounds. + if (hasScores == 1) + { + min = scores[i]; + max = min; + } + else + { + max = Math.max(max, scores[i]); + min = Math.min(min, scores[i]); + } + } + } + + if (FEATURE_SCORE.equals(method)) + { + if (hasScores == 0) + { + return; // do nothing - no scores present to sort by. + } + // pad score matrix + if (hasScores < seqs.length) + { + for (int i = 0; i < seqs.length; i++) + { + if (!hasScore[i]) + { + scores[i] = (max + 1 + i); + } + else + { + // int nf = (feats[i] == null) ? 0 + // : ((SequenceFeature[]) feats[i]).length; + // // System.err.println("Sorting on Score: seq " + + // seqs[i].getName() + // + " Feats: " + nf + " Score : " + scores[i]); + } + } + } + QuickSort.sortByDouble(scores, seqs, sortByFeatureAscending); + } + else if (FEATURE_DENSITY.equals(method)) + { + for (int i = 0; i < seqs.length; i++) + { + int featureCount = feats[i] == null ? 0 + : ((SequenceFeature[]) feats[i]).length; + scores[i] = featureCount; + // System.err.println("Sorting on Density: seq "+seqs[i].getName()+ + // " Feats: "+featureCount+" Score : "+scores[i]); + } + QuickSort.sortByDouble(scores, seqs, sortByFeatureAscending); + } + + setOrder(alignment, seqs); + } + + /** + * Builds a string hash of criteria for sorting, and if unchanged from last + * time, reverse the sort order + * + * @param method + * @param featureTypes + * @param groups + * @param startCol + * @param endCol + */ + protected static void flipFeatureSortIfUnchanged(String method, + List featureTypes, List groups, + final int startCol, final int endCol) + { + StringBuilder sb = new StringBuilder(64); + sb.append(startCol).append(method).append(endCol); + if (featureTypes != null) + { + Collections.sort(featureTypes); + sb.append(featureTypes.toString()); + } + if (groups != null) + { + Collections.sort(groups); + sb.append(groups.toString()); + } + String scoreCriteria = sb.toString(); + + /* + * if resorting on the same criteria, toggle sort order + */ + if (sortByFeatureCriteria == null + || !scoreCriteria.equals(sortByFeatureCriteria)) + { + sortByFeatureAscending = true; + } + else + { + sortByFeatureAscending = !sortByFeatureAscending; + } + sortByFeatureCriteria = scoreCriteria; + } + }