X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAlignmentSorter.java;h=d78b5b39d1a9224e57131b1bbedea07d30c7c4e1;hb=a45774ee31d9f35d4eff46d54d7deab719afb092;hp=fe2cfc7c7dfb70d5861f72c2dbc52831a3e3c743;hpb=506d60f0e188723ddc91c26824b41ac7034df3fe;p=jalview.git diff --git a/src/jalview/analysis/AlignmentSorter.java b/src/jalview/analysis/AlignmentSorter.java index fe2cfc7..d78b5b3 100755 --- a/src/jalview/analysis/AlignmentSorter.java +++ b/src/jalview/analysis/AlignmentSorter.java @@ -1,20 +1,19 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.4) - * Copyright (C) 2008 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) + * Copyright (C) 2011 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. + * This file is part of Jalview. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with Jalview. If not, see . */ package jalview.analysis; @@ -40,6 +39,10 @@ import jalview.util.*; */ public class AlignmentSorter { + /** + * todo: refactor searches to follow a basic pattern: (search property, last + * search state, current sort direction) + */ static boolean sortIdAscending = true; static int lastGroupHash = 0; @@ -54,21 +57,54 @@ public class AlignmentSorter static boolean sortTreeAscending = true; + /** + * last Annotation Label used by sortByScore + */ private static String lastSortByScore; + private static boolean sortByScoreAscending = true; + + /** + * compact representation of last arguments to SortByFeatureScore + */ + private static String lastSortByFeatureScore; + + private static boolean sortByFeatureScoreAscending = true; + + private static boolean sortLengthAscending; + /** * Sort by Percentage Identity w.r.t. s * * @param align - * AlignmentI + * AlignmentI * @param s - * SequenceI + * SequenceI * @param tosort - * sequences from align that are to be sorted. + * sequences from align that are to be sorted. */ public static void sortByPID(AlignmentI align, SequenceI s, SequenceI[] tosort) { + sortByPID(align, s, tosort, 0, -1); + } + + /** + * Sort by Percentage Identity w.r.t. s + * + * @param align + * AlignmentI + * @param s + * SequenceI + * @param tosort + * sequences from align that are to be sorted. + * @param start + * start column (0 for beginning + * @param end + */ + public static void sortByPID(AlignmentI align, SequenceI s, + SequenceI[] tosort, int start, int end) + { int nSeq = align.getHeight(); float[] scores = new float[nSeq]; @@ -90,9 +126,9 @@ public class AlignmentSorter * Reverse the order of the sort * * @param align - * DOCUMENT ME! + * DOCUMENT ME! * @param seqs - * DOCUMENT ME! + * DOCUMENT ME! */ private static void setReverseOrder(AlignmentI align, SequenceI[] seqs) { @@ -122,9 +158,9 @@ public class AlignmentSorter * Sets the Alignment object with the given sequences * * @param align - * Alignment object to be updated + * Alignment object to be updated * @param tmp - * sequences as a vector + * sequences as a vector */ private static void setOrder(AlignmentI align, Vector tmp) { @@ -135,9 +171,9 @@ public class AlignmentSorter * Sets the Alignment object with the given sequences * * @param align - * DOCUMENT ME! + * DOCUMENT ME! * @param seqs - * sequences as an array + * sequences as an array */ public static void setOrder(AlignmentI align, SequenceI[] seqs) { @@ -166,7 +202,7 @@ public class AlignmentSorter * Sorts by ID. Numbers are sorted before letters. * * @param align - * The alignment object to sort + * The alignment object to sort */ public static void sortByID(AlignmentI align) { @@ -196,12 +232,45 @@ public class AlignmentSorter } /** + * Sorts by sequence length + * + * @param align + * The alignment object to sort + */ + public static void sortByLength(AlignmentI align) + { + int nSeq = align.getHeight(); + + float[] length = new float[nSeq]; + SequenceI[] seqs = new SequenceI[nSeq]; + + for (int i = 0; i < nSeq; i++) + { + seqs[i] = align.getSequenceAt(i); + length[i] = (float) (seqs[i].getEnd() - seqs[i].getStart()); + } + + QuickSort.sort(length, seqs); + + if (sortLengthAscending) + { + setReverseOrder(align, seqs); + } + else + { + setOrder(align, seqs); + } + + sortLengthAscending = !sortLengthAscending; + } + + /** * Sorts the alignment by size of group.
* Maintains the order of sequences in each group by order in given alignment * object. * * @param align - * sorts the given alignment object by group + * sorts the given alignment object by group */ public static void sortByGroup(AlignmentI align) { @@ -273,7 +342,7 @@ public class AlignmentSorter * Converts Vector to array. java 1.18 does not have Vector.toArray() * * @param tmp - * Vector of SequenceI objects + * Vector of SequenceI objects * * @return array of Sequence[] */ @@ -293,16 +362,16 @@ public class AlignmentSorter * DOCUMENT ME! * * @param tmp - * DOCUMENT ME! + * DOCUMENT ME! * @param mask - * DOCUMENT ME! + * DOCUMENT ME! * * @return DOCUMENT ME! */ private static SequenceI[] vectorSubsetToArray(Vector tmp, Vector mask) { Vector seqs = new Vector(); - int i; + int i, idx; boolean[] tmask = new boolean[mask.size()]; for (i = 0; i < mask.size(); i++) @@ -313,10 +382,10 @@ public class AlignmentSorter for (i = 0; i < tmp.size(); i++) { Object sq = tmp.elementAt(i); - - if (mask.contains(sq) && tmask[mask.indexOf(sq)]) + idx = mask.indexOf(sq); + if (idx > -1 && tmask[idx]) { - tmask[mask.indexOf(sq)] = false; + tmask[idx] = false; seqs.addElement(sq); } } @@ -336,9 +405,9 @@ public class AlignmentSorter * Sorts by a given AlignmentOrder object * * @param align - * Alignment to order + * Alignment to order * @param order - * specified order for alignment + * specified order for alignment */ public static void sortBy(AlignmentI align, AlignmentOrder order) { @@ -368,9 +437,9 @@ public class AlignmentSorter * DOCUMENT ME! * * @param align - * alignment to order + * alignment to order * @param tree - * tree which has + * tree which has * * @return DOCUMENT ME! */ @@ -387,15 +456,15 @@ public class AlignmentSorter // TODO: JBPNote - decide if this is always an error // (eg. not when a tree is associated to another alignment which has more // sequences) - if (tmp.size() < nSeq) + if (tmp.size() != nSeq) { addStrays(align, tmp); } if (tmp.size() != nSeq) { - System.err.println("ERROR: tmp.size()=" + tmp.size() + " != nseq=" - + nSeq + " in getOrderByTree"); + System.err.println("WARNING: tmp.size()=" + tmp.size() + " != nseq=" + + nSeq + " in getOrderByTree - tree contains sequences not in alignment"); } } @@ -406,9 +475,9 @@ public class AlignmentSorter * Sorts the alignment by a given tree * * @param align - * alignment to order + * alignment to order * @param tree - * tree which has + * tree which has */ public static void sortByTree(AlignmentI align, NJTree tree) { @@ -439,9 +508,9 @@ public class AlignmentSorter * DOCUMENT ME! * * @param align - * DOCUMENT ME! + * DOCUMENT ME! * @param seqs - * DOCUMENT ME! + * DOCUMENT ME! */ private static void addStrays(AlignmentI align, Vector seqs) { @@ -466,11 +535,11 @@ public class AlignmentSorter * DOCUMENT ME! * * @param node - * DOCUMENT ME! + * DOCUMENT ME! * @param tmp - * DOCUMENT ME! + * DOCUMENT ME! * @param seqset - * DOCUMENT ME! + * DOCUMENT ME! * * @return DOCUMENT ME! */ @@ -491,7 +560,7 @@ public class AlignmentSorter { if (node.element() instanceof SequenceI) { - if (!tmp.contains(node.element())) + if (!tmp.contains(node.element())) // && (seqset==null || seqset.size()==0 || seqset.contains(tmp))) { tmp.addElement((SequenceI) node.element()); } @@ -536,17 +605,17 @@ public class AlignmentSorter * particular scoreLabel. Or reverse if same label was used previously * * @param scoreLabel - * exact label for sequence associated AlignmentAnnotation - * scores to use for sorting. + * exact label for sequence associated AlignmentAnnotation scores to + * use for sorting. * @param alignment - * sequences to be sorted + * sequences to be sorted */ public static void sortByAnnotationScore(String scoreLabel, AlignmentI alignment) { SequenceI[] seqs = alignment.getSequencesArray(); boolean[] hasScore = new boolean[seqs.length]; // per sequence score - // presence + // presence int hasScores = 0; // number of scores present on set double[] scores = new double[seqs.length]; double min = 0, max = 0; @@ -558,7 +627,7 @@ public class AlignmentSorter hasScores++; hasScore[i] = true; scores[i] = scoreAnn[0].getScore(); // take the first instance of this - // score. + // score. if (hasScores == 1) { max = min = scores[i]; @@ -590,7 +659,7 @@ public class AlignmentSorter { if (!hasScore[i]) { - scores[i] = (max + i); + scores[i] = (max + i + 1.0); } } } @@ -606,4 +675,275 @@ public class AlignmentSorter setReverseOrder(alignment, seqs); } } + + /** + * types of feature ordering: Sort by score : average score - or total score - + * over all features in region Sort by feature label text: (or if null - + * feature type text) - numerical or alphabetical Sort by feature density: + * based on counts - ignoring individual text or scores for each feature + */ + public static String FEATURE_SCORE = "average_score"; + + public static String FEATURE_LABEL = "text"; + + public static String FEATURE_DENSITY = "density"; + + /** + * sort the alignment using the features on each sequence found between start + * and stop with the given featureLabel (and optional group qualifier) + * + * @param featureLabel + * (may not be null) + * @param groupLabel + * (may be null) + * @param start + * (-1 to include non-positional features) + * @param stop + * (-1 to only sort on non-positional features) + * @param alignment + * - aligned sequences containing features + * @param method + * - one of the string constants FEATURE_SCORE, FEATURE_LABEL, + * FEATURE_DENSITY + */ + public static void sortByFeature(String featureLabel, String groupLabel, + int start, int stop, AlignmentI alignment, String method) + { + sortByFeature(featureLabel == null ? null : new String[] + { featureLabel }, groupLabel == null ? null : new String[] + { groupLabel }, start, stop, alignment, method); + } + + private static boolean containsIgnoreCase(final String lab, + final String[] labs) + { + if (labs == null) + { + return true; + } + if (lab == null) + { + return false; + } + for (int q = 0; q < labs.length; q++) + { + if (labs[q] != null && lab.equalsIgnoreCase(labs[q])) + { + return true; + } + } + return false; + } + + public static void sortByFeature(String[] featureLabels, + String[] groupLabels, int start, int stop, AlignmentI alignment, + String method) + { + if (method != FEATURE_SCORE && method != FEATURE_LABEL + && method != FEATURE_DENSITY) + { + throw new Error( + "Implementation Error - sortByFeature method must be one of FEATURE_SCORE, FEATURE_LABEL or FEATURE_DENSITY."); + } + boolean ignoreScore = method != FEATURE_SCORE; + StringBuffer scoreLabel = new StringBuffer(); + scoreLabel.append(start + stop + method); + // This doesn't quite work yet - we'd like to have a canonical ordering that + // can be preserved from call to call + for (int i = 0; featureLabels != null && i < featureLabels.length; i++) + { + scoreLabel.append(featureLabels[i] == null ? "null" + : featureLabels[i]); + } + for (int i = 0; groupLabels != null && i < groupLabels.length; i++) + { + scoreLabel.append(groupLabels[i] == null ? "null" : groupLabels[i]); + } + SequenceI[] seqs = alignment.getSequencesArray(); + + boolean[] hasScore = new boolean[seqs.length]; // per sequence score + // presence + int hasScores = 0; // number of scores present on set + double[] scores = new double[seqs.length]; + int[] seqScores = new int[seqs.length]; + Object[] feats = new Object[seqs.length]; + double min = 0, max = 0; + for (int i = 0; i < seqs.length; i++) + { + SequenceFeature[] sf = seqs[i].getSequenceFeatures(); + if (sf == null && seqs[i].getDatasetSequence() != null) + { + sf = seqs[i].getDatasetSequence().getSequenceFeatures(); + } + if (sf == null) + { + sf = new SequenceFeature[0]; + } + else + { + SequenceFeature[] tmp = new SequenceFeature[sf.length]; + for (int s = 0; s < tmp.length; s++) + { + tmp[s] = sf[s]; + } + sf = tmp; + } + int sstart = (start == -1) ? start : seqs[i].findPosition(start); + int sstop = (stop == -1) ? stop : seqs[i].findPosition(stop); + seqScores[i] = 0; + scores[i] = 0.0; + int n = sf.length; + for (int f = 0; f < sf.length; f++) + { + // filter for selection criteria + if ( + // ignore features outwith alignment start-stop positions. + (sf[f].end < sstart || sf[f].begin > sstop) || + // or ignore based on selection criteria + (featureLabels != null && !AlignmentSorter + .containsIgnoreCase(sf[f].type, featureLabels)) + || (groupLabels != null + // problem here: we cannot eliminate null feature group features + && (sf[f].getFeatureGroup() != null && !AlignmentSorter + .containsIgnoreCase(sf[f].getFeatureGroup(), + groupLabels)))) + { + // forget about this feature + sf[f] = null; + n--; + } + else + { + // or, also take a look at the scores if necessary. + if (!ignoreScore && sf[f].getScore() != Float.NaN) + { + if (seqScores[i] == 0) + { + hasScores++; + } + seqScores[i]++; + hasScore[i] = true; + scores[i] += sf[f].getScore(); // take the first instance of this + // score. + } + } + } + SequenceFeature[] fs; + feats[i] = fs = new SequenceFeature[n]; + if (n > 0) + { + n = 0; + for (int f = 0; f < sf.length; f++) + { + if (sf[f] != null) + { + ((SequenceFeature[]) feats[i])[n++] = sf[f]; + } + } + if (method == FEATURE_LABEL) + { + // order the labels by alphabet + String[] labs = new String[fs.length]; + for (int l = 0; l < labs.length; l++) + { + labs[l] = (fs[l].getDescription() != null ? fs[l] + .getDescription() : fs[l].getType()); + } + jalview.util.QuickSort.sort(labs, ((Object[]) feats[i])); + } + } + if (hasScore[i]) + { + // compute average score + scores[i] /= seqScores[i]; + // update the score bounds. + if (hasScores == 1) + { + max = min = scores[i]; + } + else + { + if (max < scores[i]) + { + max = scores[i]; + } + if (min > scores[i]) + { + min = scores[i]; + } + } + } + } + + if (method == FEATURE_SCORE) + { + if (hasScores == 0) + { + return; // do nothing - no scores present to sort by. + } + // pad score matrix + if (hasScores < seqs.length) + { + for (int i = 0; i < seqs.length; i++) + { + if (!hasScore[i]) + { + scores[i] = (max + 1 + i); + } + else + { + int nf = (feats[i] == null) ? 0 + : ((SequenceFeature[]) feats[i]).length; + // System.err.println("Sorting on Score: seq "+seqs[i].getName()+ + // " Feats: "+nf+" Score : "+scores[i]); + } + } + } + + jalview.util.QuickSort.sort(scores, seqs); + } + else if (method == FEATURE_DENSITY) + { + + // break ties between equivalent numbers for adjacent sequences by adding + // 1/Nseq*i on the original order + double fr = 0.9 / (1.0 * seqs.length); + for (int i = 0; i < seqs.length; i++) + { + double nf; + scores[i] = (0.05 + fr * i) + + (nf = ((feats[i] == null) ? 0.0 + : 1.0 * ((SequenceFeature[]) feats[i]).length)); + // System.err.println("Sorting on Density: seq "+seqs[i].getName()+ + // " Feats: "+nf+" Score : "+scores[i]); + } + jalview.util.QuickSort.sort(scores, seqs); + } + else + { + if (method == FEATURE_LABEL) + { + throw new Error("Not yet implemented."); + } + } + if (lastSortByFeatureScore == null + || !scoreLabel.toString().equals(lastSortByFeatureScore)) + { + sortByFeatureScoreAscending = true; + } + else + { + sortByFeatureScoreAscending = !sortByFeatureScoreAscending; + } + if (sortByFeatureScoreAscending) + { + setOrder(alignment, seqs); + } + else + { + setReverseOrder(alignment, seqs); + } + lastSortByFeatureScore = scoreLabel.toString(); + } + }