2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.datamodel.ColumnSelection;
24 import jalview.datamodel.HiddenColumns;
25 import jalview.datamodel.SequenceGroup;
26 import jalview.datamodel.SequenceI;
28 import java.util.ArrayList;
29 import java.util.HashMap;
30 import java.util.List;
32 import java.util.Vector;
35 * various methods for defining groups on an alignment based on some other
44 * Divide the given sequences based on the equivalence of their corresponding
45 * selectedChars string. If exgroups is provided, existing groups will be
49 * @param selectedChars
53 public static SequenceGroup[] makeGroupsFrom(SequenceI[] sequences,
54 String[] selectedChars, List<SequenceGroup> list)
56 // TODO: determine how to get/recover input data for group generation
57 Map<String, List<SequenceI>> gps = new HashMap<String, List<SequenceI>>();
59 Map<String, SequenceGroup> pgroup = new HashMap<String, SequenceGroup>();
62 for (SequenceGroup sg : list)
64 for (SequenceI sq : sg.getSequences(null))
66 pgroup.put(sq.toString(), sg);
70 for (i = 0; i < sequences.length; i++)
72 String schar = selectedChars[i];
73 SequenceGroup pgp = pgroup.get(((Object) sequences[i]).toString());
76 schar = pgp.getName() + ":" + schar;
78 List<SequenceI> svec = gps.get(schar);
81 svec = new ArrayList<SequenceI>();
84 if (width < sequences[i].getLength())
86 width = sequences[i].getLength();
88 svec.add(sequences[i]);
91 SequenceGroup[] groups = new SequenceGroup[gps.size()];
93 for (String key : gps.keySet())
95 SequenceGroup group = new SequenceGroup(gps.get(key),
96 "Subseq: " + key, null, true, true, false, 0, width - 1);
106 * Divide the given sequences based on the equivalence of characters at
107 * selected columns If exgroups is provided, existing groups will be
111 * @param columnSelection
113 * @param hiddenColumns
116 public static SequenceGroup[] makeGroupsFromCols(SequenceI[] sequences,
117 ColumnSelection cs, List<SequenceGroup> list,
118 HiddenColumns hiddenColumns)
120 // TODO: determine how to get/recover input data for group generation
121 Map<String, List<SequenceI>> gps = new HashMap<String, List<SequenceI>>();
122 Map<String, SequenceGroup> pgroup = new HashMap<String, SequenceGroup>();
125 for (SequenceGroup sg : list)
127 for (SequenceI sq : sg.getSequences(null))
129 pgroup.put(sq.toString(), sg);
135 * get selected columns (in the order they were selected);
136 * note this could include right-to-left ranges
138 int[] spos = new int[cs.getSelected().size()];
141 for (Integer pos : cs.getSelected())
143 if (hiddenColumns == null || hiddenColumns.isVisible(pos.intValue()))
145 spos[i++] = pos.intValue();
150 // mark end of visible column position
153 // actual number of visible columns
154 for (i = 0; i < sequences.length; i++)
156 int slen = sequences[i].getLength();
162 SequenceGroup pgp = pgroup.get(((Object) sequences[i]).toString());
163 StringBuilder schar = new StringBuilder();
166 schar.append(pgp.getName() + ":");
180 schar.append(sequences[i].getCharAt(p));
183 List<SequenceI> svec = gps.get(schar.toString());
186 svec = new ArrayList<SequenceI>();
187 gps.put(schar.toString(), svec);
189 svec.add(sequences[i]);
192 SequenceGroup[] groups = new SequenceGroup[gps.size()];
194 for (String key : gps.keySet())
196 SequenceGroup group = new SequenceGroup(gps.get(key),
197 "Subseq: " + key, null, true, true, false, 0, width - 1);
207 * subdivide the given sequences based on the distribution of features
209 * @param featureLabels
210 * - null or one or more feature types to filter on.
212 * - null or set of groups to filter features on
214 * - range for feature filter
216 * - range for feature filter
218 * - sequences to be divided
220 * - existing groups to be subdivided
222 * - density, description, score
224 public static void divideByFeature(String[] featureLabels,
225 String[] groupLabels, int start, int stop, SequenceI[] sequences,
226 Vector exgroups, String method)
228 // TODO implement divideByFeature
230 * if (method!=AlignmentSorter.FEATURE_SCORE &&
231 * method!=AlignmentSorter.FEATURE_LABEL &&
232 * method!=AlignmentSorter.FEATURE_DENSITY) { throw newError(
233 * "Implementation Error - sortByFeature method must be one of FEATURE_SCORE, FEATURE_LABEL or FEATURE_DENSITY."
234 * ); } boolean ignoreScore=method!=AlignmentSorter.FEATURE_SCORE;
235 * StringBuffer scoreLabel = new StringBuffer();
236 * scoreLabel.append(start+stop+method); // This doesn't work yet - we'd
237 * like to have a canonical ordering that can be preserved from call to call
238 * for (int i=0;featureLabels!=null && i<featureLabels.length; i++) {
239 * scoreLabel.append(featureLabels[i]==null ? "null" : featureLabels[i]); }
240 * for (int i=0;groupLabels!=null && i<groupLabels.length; i++) {
241 * scoreLabel.append(groupLabels[i]==null ? "null" : groupLabels[i]); }
242 * SequenceI[] seqs = alignment.getSequencesArray();
244 * boolean[] hasScore = new boolean[seqs.length]; // per sequence score //
245 * presence int hasScores = 0; // number of scores present on set double[]
246 * scores = new double[seqs.length]; int[] seqScores = new int[seqs.length];
247 * Object[] feats = new Object[seqs.length]; double min = 0, max = 0; for
248 * (int i = 0; i < seqs.length; i++) { SequenceFeature[] sf =
249 * seqs[i].getSequenceFeatures(); if (sf==null &&
250 * seqs[i].getDatasetSequence()!=null) { sf =
251 * seqs[i].getDatasetSequence().getSequenceFeatures(); } if (sf==null) { sf
252 * = new SequenceFeature[0]; } else { SequenceFeature[] tmp = new
253 * SequenceFeature[sf.length]; for (int s=0; s<tmp.length;s++) { tmp[s] =
254 * sf[s]; } sf = tmp; } int sstart = (start==-1) ? start :
255 * seqs[i].findPosition(start); int sstop = (stop==-1) ? stop :
256 * seqs[i].findPosition(stop); seqScores[i]=0; scores[i]=0.0; int
257 * n=sf.length; for (int f=0;f<sf.length;f++) { // filter for selection
258 * criteria if ( // ignore features outwith alignment start-stop positions.
259 * (sf[f].end < sstart || sf[f].begin > sstop) || // or ignore based on
260 * selection criteria (featureLabels != null &&
261 * !AlignmentSorter.containsIgnoreCase(sf[f].type, featureLabels)) ||
262 * (groupLabels != null // problem here: we cannot eliminate null feature
263 * group features && (sf[f].getFeatureGroup() != null &&
264 * !AlignmentSorter.containsIgnoreCase(sf[f].getFeatureGroup(),
265 * groupLabels)))) { // forget about this feature sf[f] = null; n--; } else
266 * { // or, also take a look at the scores if necessary. if (!ignoreScore &&
267 * sf[f].getScore()!=Float.NaN) { if (seqScores[i]==0) { hasScores++; }
268 * seqScores[i]++; hasScore[i] = true; scores[i] += sf[f].getScore(); //
269 * take the first instance of this // score. } } } SequenceFeature[] fs;
270 * feats[i] = fs = new SequenceFeature[n]; if (n>0) { n=0; for (int
271 * f=0;f<sf.length;f++) { if (sf[f]!=null) { ((SequenceFeature[])
272 * feats[i])[n++] = sf[f]; } } if (method==FEATURE_LABEL) { // order the
273 * labels by alphabet String[] labs = new String[fs.length]; for (int
274 * l=0;l<labs.length; l++) { labs[l] = (fs[l].getDescription()!=null ?
275 * fs[l].getDescription() : fs[l].getType()); }
276 * jalview.util.QuickSort.sort(labs, ((Object[]) feats[i])); } } if
277 * (hasScore[i]) { // compute average score scores[i]/=seqScores[i]; //
278 * update the score bounds. if (hasScores == 1) { max = min = scores[i]; }
279 * else { if (max < scores[i]) { max = scores[i]; } if (min > scores[i]) {
280 * min = scores[i]; } } } }
282 * if (method==FEATURE_SCORE) { if (hasScores == 0) { return; // do nothing
283 * - no scores present to sort by. } // pad score matrix if (hasScores <
284 * seqs.length) { for (int i = 0; i < seqs.length; i++) { if (!hasScore[i])
285 * { scores[i] = (max + i); } else { int nf=(feats[i]==null) ? 0
286 * :((SequenceFeature[]) feats[i]).length;
287 * System.err.println("Sorting on Score: seq "+seqs[i].getName()+
288 * " Feats: "+nf+" Score : "+scores[i]); } } }
290 * jalview.util.QuickSort.sort(scores, seqs); } else if
291 * (method==FEATURE_DENSITY) {
293 * // break ties between equivalent numbers for adjacent sequences by adding
294 * 1/Nseq*i on the original order double fr = 0.9/(1.0*seqs.length); for
295 * (int i=0;i<seqs.length; i++) { double nf; scores[i] =
296 * (0.05+fr*i)+(nf=((feats[i]==null) ? 0.0 :1.0*((SequenceFeature[])
297 * feats[i]).length));
298 * System.err.println("Sorting on Density: seq "+seqs[i].getName()+
299 * " Feats: "+nf+" Score : "+scores[i]); }
300 * jalview.util.QuickSort.sort(scores, seqs); } else { if
301 * (method==FEATURE_LABEL) { throw new Error("Not yet implemented."); } } if
302 * (lastSortByFeatureScore ==null ||
303 * scoreLabel.equals(lastSortByFeatureScore)) { setOrder(alignment, seqs); }
304 * else { setReverseOrder(alignment, seqs); } lastSortByFeatureScore =
305 * scoreLabel.toString();