X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FGrouping.java;h=165a3df6c8b19f505f1b137fe0a3f5dabf77ec00;hb=db93a1adcbe0a4eaaf06e0a70ade0d6c5c1961c3;hp=bba668db9e68a827b95320c8a99b887513ac6942;hpb=87deef0fa44f3aa7d7d86ac7241a9d8c93b2408f;p=jalview.git diff --git a/src/jalview/analysis/Grouping.java b/src/jalview/analysis/Grouping.java index bba668d..165a3df 100644 --- a/src/jalview/analysis/Grouping.java +++ b/src/jalview/analysis/Grouping.java @@ -1,273 +1,294 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9.0b2) + * Copyright (C) 2015 The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.analysis; -import jalview.datamodel.AlignmentI; -import jalview.datamodel.SequenceFeature; +import jalview.datamodel.ColumnSelection; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; -import java.util.Enumeration; -import java.util.Hashtable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.Vector; /** - * various methods for defining groups on an alignment based on some other properties + * various methods for defining groups on an alignment based on some other + * properties + * * @author JimP - * + * */ public class Grouping { /** - * Divide the given sequences based on the equivalence of their corresponding selectedChars string. If exgroups is provided, existing groups will be subdivided. + * Divide the given sequences based on the equivalence of their corresponding + * selectedChars string. If exgroups is provided, existing groups will be + * subdivided. + * * @param sequences * @param selectedChars - * @param exgroups + * @param list * @return */ - public static SequenceGroup[] makeGroupsFrom(SequenceI[] sequences, String[] selectedChars, Vector exgroups) + public static SequenceGroup[] makeGroupsFrom(SequenceI[] sequences, + String[] selectedChars, List list) { - // TODO: determine how to get/recover input data for group generation - Hashtable gps = new Hashtable(); - int width = 0,i; - Hashtable pgroup = new Hashtable(); - if (exgroups!=null) + // TODO: determine how to get/recover input data for group generation + Map> gps = new HashMap>(); + int width = 0, i; + Map pgroup = new HashMap(); + if (list != null) { - SequenceGroup sg; - for (Enumeration g=exgroups.elements(); g.hasMoreElements(); ) + for (SequenceGroup sg : list) { - sg = (SequenceGroup) g.nextElement(); - for (Enumeration sq = sg.getSequences(null).elements(); sq.hasMoreElements(); ) - pgroup.put(sq.nextElement().toString(), sg); + for (SequenceI sq : sg.getSequences(null)) + { + pgroup.put(sq.toString(), sg); + } } } for (i = 0; i < sequences.length; i++) { String schar = selectedChars[i]; - SequenceGroup pgp = (SequenceGroup) pgroup.get(((Object) sequences[i]).toString()); - if (pgp!=null) + SequenceGroup pgp = pgroup.get(((Object) sequences[i]).toString()); + if (pgp != null) { - schar = pgp.getName()+":"+schar; + schar = pgp.getName() + ":" + schar; } - Vector svec = (Vector) gps.get(schar); + List svec = gps.get(schar); if (svec == null) { - svec = new Vector(); + svec = new ArrayList(); gps.put(schar, svec); } - if (width list) { - // TODO implement divideByFeature - /* - if (method!=AlignmentSorter.FEATURE_SCORE && method!=AlignmentSorter.FEATURE_LABEL && method!=AlignmentSorter.FEATURE_DENSITY) - { - throw new Error("Implementation Error - sortByFeature method must be one of FEATURE_SCORE, FEATURE_LABEL or FEATURE_DENSITY."); - } - boolean ignoreScore=method!=AlignmentSorter.FEATURE_SCORE; - StringBuffer scoreLabel = new StringBuffer(); - scoreLabel.append(start+stop+method); - // This doesn't work yet - we'd like to have a canonical ordering that can be preserved from call to call - for (int i=0;featureLabels!=null && i> gps = new HashMap>(); + Map pgroup = new HashMap(); + if (list != null) { - SequenceFeature[] sf = seqs[i].getSequenceFeatures(); - if (sf==null && seqs[i].getDatasetSequence()!=null) + for (SequenceGroup sg : list) { - sf = seqs[i].getDatasetSequence().getSequenceFeatures(); - } - if (sf==null) - { - sf = new SequenceFeature[0]; - } else { - SequenceFeature[] tmp = new SequenceFeature[sf.length]; - for (int s=0; s sstop) - || - // or ignore based on selection criteria - (featureLabels != null && !AlignmentSorter.containsIgnoreCase(sf[f].type, featureLabels)) - || (groupLabels != null - // problem here: we cannot eliminate null feature group features - && (sf[f].getFeatureGroup() != null - && !AlignmentSorter.containsIgnoreCase(sf[f].getFeatureGroup(), groupLabels)))) - { - // forget about this feature - sf[f] = null; - n--; - } else { - // or, also take a look at the scores if necessary. - if (!ignoreScore && sf[f].getScore()!=Float.NaN) - { - if (seqScores[i]==0) - { - hasScores++; - } - seqScores[i]++; - hasScore[i] = true; - scores[i] += sf[f].getScore(); // take the first instance of this - // score. - } - } - } - SequenceFeature[] fs; - feats[i] = fs = new SequenceFeature[n]; - if (n>0) - { - n=0; - for (int f=0;f scores[i]) - { - min = scores[i]; - } + pgroup.put(sq.toString(), sg); } } } - - if (method==FEATURE_SCORE) - { - if (hasScores == 0) + + /* + * get selected columns (in the order they were selected); + * note this could include right-to-left ranges + */ + int[] spos = new int[cs.getSelected().size()]; + int width = -1; + int i = 0; + for (Integer pos : cs.getSelected()) { - return; // do nothing - no scores present to sort by. + spos[i++] = pos.intValue(); } - // pad score matrix - if (hasScores < seqs.length) + + for (i = 0; i < sequences.length; i++) { - for (int i = 0; i < seqs.length; i++) + int slen = sequences[i].getLength(); + if (width < slen) { - if (!hasScore[i]) - { - scores[i] = (max + i); - } else { - int nf=(feats[i]==null) ? 0 :((SequenceFeature[]) feats[i]).length; - System.err.println("Sorting on Score: seq "+seqs[i].getName()+ " Feats: "+nf+" Score : "+scores[i]); - } + width = slen; } - } - jalview.util.QuickSort.sort(scores, seqs); - } - else - if (method==FEATURE_DENSITY) + SequenceGroup pgp = pgroup.get(((Object) sequences[i]).toString()); + StringBuilder schar = new StringBuilder(); + if (pgp != null) + { + schar.append(pgp.getName() + ":"); + } + for (int p : spos) { - - // break ties between equivalent numbers for adjacent sequences by adding 1/Nseq*i on the original order - double fr = 0.9/(1.0*seqs.length); - for (int i=0;i= slen) { - double nf; - scores[i] = (0.05+fr*i)+(nf=((feats[i]==null) ? 0.0 :1.0*((SequenceFeature[]) feats[i]).length)); - System.err.println("Sorting on Density: seq "+seqs[i].getName()+ " Feats: "+nf+" Score : "+scores[i]); + schar.append("~"); } - jalview.util.QuickSort.sort(scores, seqs); - } - else { - if (method==FEATURE_LABEL) + else { - throw new Error("Not yet implemented."); + schar.append(sequences[i].getCharAt(p)); } } - if (lastSortByFeatureScore ==null || scoreLabel.equals(lastSortByFeatureScore)) - { - setOrder(alignment, seqs); + List svec = gps.get(schar.toString()); + if (svec == null) + { + svec = new ArrayList(); + gps.put(schar.toString(), svec); + } + svec.add(sequences[i]); } - else + // make some groups + SequenceGroup[] groups = new SequenceGroup[gps.size()]; + i = 0; + for (String key : gps.keySet()) { - setReverseOrder(alignment, seqs); + SequenceGroup group = new SequenceGroup(gps.get(key), "Subseq: " + + key, null, true, true, false, 0, width - 1); + + groups[i++] = group; } - lastSortByFeatureScore = scoreLabel.toString(); */ + gps.clear(); + pgroup.clear(); + return groups; } + /** + * subdivide the given sequences based on the distribution of features + * + * @param featureLabels + * - null or one or more feature types to filter on. + * @param groupLabels + * - null or set of groups to filter features on + * @param start + * - range for feature filter + * @param stop + * - range for feature filter + * @param sequences + * - sequences to be divided + * @param exgroups + * - existing groups to be subdivided + * @param method + * - density, description, score + */ + public static void divideByFeature(String[] featureLabels, + String[] groupLabels, int start, int stop, SequenceI[] sequences, + Vector exgroups, String method) + { + // TODO implement divideByFeature + /* + * if (method!=AlignmentSorter.FEATURE_SCORE && + * method!=AlignmentSorter.FEATURE_LABEL && + * method!=AlignmentSorter.FEATURE_DENSITY) { throw newError( + * "Implementation Error - sortByFeature method must be one of FEATURE_SCORE, FEATURE_LABEL or FEATURE_DENSITY." + * ); } boolean ignoreScore=method!=AlignmentSorter.FEATURE_SCORE; + * StringBuffer scoreLabel = new StringBuffer(); + * scoreLabel.append(start+stop+method); // This doesn't work yet - we'd + * like to have a canonical ordering that can be preserved from call to call + * for (int i=0;featureLabels!=null && i sstop) || // or ignore based on + * selection criteria (featureLabels != null && + * !AlignmentSorter.containsIgnoreCase(sf[f].type, featureLabels)) || + * (groupLabels != null // problem here: we cannot eliminate null feature + * group features && (sf[f].getFeatureGroup() != null && + * !AlignmentSorter.containsIgnoreCase(sf[f].getFeatureGroup(), + * groupLabels)))) { // forget about this feature sf[f] = null; n--; } else + * { // or, also take a look at the scores if necessary. if (!ignoreScore && + * sf[f].getScore()!=Float.NaN) { if (seqScores[i]==0) { hasScores++; } + * seqScores[i]++; hasScore[i] = true; scores[i] += sf[f].getScore(); // + * take the first instance of this // score. } } } SequenceFeature[] fs; + * feats[i] = fs = new SequenceFeature[n]; if (n>0) { n=0; for (int + * f=0;f scores[i]) { + * min = scores[i]; } } } } + * + * if (method==FEATURE_SCORE) { if (hasScores == 0) { return; // do nothing + * - no scores present to sort by. } // pad score matrix if (hasScores < + * seqs.length) { for (int i = 0; i < seqs.length; i++) { if (!hasScore[i]) + * { scores[i] = (max + i); } else { int nf=(feats[i]==null) ? 0 + * :((SequenceFeature[]) feats[i]).length; + * System.err.println("Sorting on Score: seq "+seqs[i].getName()+ + * " Feats: "+nf+" Score : "+scores[i]); } } } + * + * jalview.util.QuickSort.sort(scores, seqs); } else if + * (method==FEATURE_DENSITY) { + * + * // break ties between equivalent numbers for adjacent sequences by adding + * 1/Nseq*i on the original order double fr = 0.9/(1.0*seqs.length); for + * (int i=0;i