/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.analysis;
import jalview.datamodel.ColumnSelection;
import jalview.datamodel.SequenceGroup;
import jalview.datamodel.SequenceI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Vector;
/**
* various methods for defining groups on an alignment based on some other
* properties
*
* @author JimP
*
*/
public class Grouping
{
/**
* Divide the given sequences based on the equivalence of their corresponding
* selectedChars string. If exgroups is provided, existing groups will be
* subdivided.
*
* @param sequences
* @param selectedChars
* @param list
* @return
*/
public static SequenceGroup[] makeGroupsFrom(SequenceI[] sequences,
String[] selectedChars, List list)
{
// TODO: determine how to get/recover input data for group generation
Map> gps = new HashMap>();
int width = 0, i;
Map pgroup = new HashMap();
if (list != null)
{
for (SequenceGroup sg : list)
{
for (SequenceI sq : sg.getSequences(null))
{
pgroup.put(sq.toString(), sg);
}
}
}
for (i = 0; i < sequences.length; i++)
{
String schar = selectedChars[i];
SequenceGroup pgp = pgroup.get(((Object) sequences[i]).toString());
if (pgp != null)
{
schar = pgp.getName() + ":" + schar;
}
List svec = gps.get(schar);
if (svec == null)
{
svec = new ArrayList();
gps.put(schar, svec);
}
if (width < sequences[i].getLength())
{
width = sequences[i].getLength();
}
svec.add(sequences[i]);
}
// make some groups
SequenceGroup[] groups = new SequenceGroup[gps.size()];
i = 0;
for (String key : gps.keySet())
{
SequenceGroup group = new SequenceGroup(gps.get(key),
"Subseq: " + key, null, true, true, false, 0, width - 1);
groups[i++] = group;
}
gps.clear();
pgroup.clear();
return groups;
}
/**
* Divide the given sequences based on the equivalence of characters at
* selected columns If exgroups is provided, existing groups will be
* subdivided.
*
* @param sequences
* @param columnSelection
* @param list
* @return
*/
public static SequenceGroup[] makeGroupsFromCols(SequenceI[] sequences,
ColumnSelection cs, List list)
{
// TODO: determine how to get/recover input data for group generation
Map> gps = new HashMap>();
Map pgroup = new HashMap();
if (list != null)
{
for (SequenceGroup sg : list)
{
for (SequenceI sq : sg.getSequences(null))
{
pgroup.put(sq.toString(), sg);
}
}
}
int[] spos = new int[cs.getSelected().size()];
int width = -1;
int i = 0;
for (Integer pos : cs.getSelected())
{
spos[i++] = pos.intValue();
}
;
for (i = 0; i < sequences.length; i++)
{
int slen = sequences[i].getLength();
if (width < slen)
{
width = slen;
}
SequenceGroup pgp = pgroup.get(((Object) sequences[i]).toString());
StringBuilder schar = new StringBuilder();
if (pgp != null)
{
schar.append(pgp.getName() + ":");
}
for (int p : spos)
{
if (p >= slen)
{
schar.append("~");
}
else
{
schar.append(sequences[i].getCharAt(p));
}
}
List svec = gps.get(schar.toString());
if (svec == null)
{
svec = new ArrayList();
gps.put(schar.toString(), svec);
}
svec.add(sequences[i]);
}
// make some groups
SequenceGroup[] groups = new SequenceGroup[gps.size()];
i = 0;
for (String key : gps.keySet())
{
SequenceGroup group = new SequenceGroup(gps.get(key), "Subseq: "
+ key, null, true, true, false, 0, width - 1);
groups[i++] = group;
}
gps.clear();
pgroup.clear();
return groups;
}
/**
* subdivide the given sequences based on the distribution of features
*
* @param featureLabels
* - null or one or more feature types to filter on.
* @param groupLabels
* - null or set of groups to filter features on
* @param start
* - range for feature filter
* @param stop
* - range for feature filter
* @param sequences
* - sequences to be divided
* @param exgroups
* - existing groups to be subdivided
* @param method
* - density, description, score
*/
public static void divideByFeature(String[] featureLabels,
String[] groupLabels, int start, int stop, SequenceI[] sequences,
Vector exgroups, String method)
{
// TODO implement divideByFeature
/*
* if (method!=AlignmentSorter.FEATURE_SCORE &&
* method!=AlignmentSorter.FEATURE_LABEL &&
* method!=AlignmentSorter.FEATURE_DENSITY) { throw newError(
* "Implementation Error - sortByFeature method must be one of FEATURE_SCORE, FEATURE_LABEL or FEATURE_DENSITY."
* ); } boolean ignoreScore=method!=AlignmentSorter.FEATURE_SCORE;
* StringBuffer scoreLabel = new StringBuffer();
* scoreLabel.append(start+stop+method); // This doesn't work yet - we'd
* like to have a canonical ordering that can be preserved from call to call
* for (int i=0;featureLabels!=null && i sstop) || // or ignore based on
* selection criteria (featureLabels != null &&
* !AlignmentSorter.containsIgnoreCase(sf[f].type, featureLabels)) ||
* (groupLabels != null // problem here: we cannot eliminate null feature
* group features && (sf[f].getFeatureGroup() != null &&
* !AlignmentSorter.containsIgnoreCase(sf[f].getFeatureGroup(),
* groupLabels)))) { // forget about this feature sf[f] = null; n--; } else
* { // or, also take a look at the scores if necessary. if (!ignoreScore &&
* sf[f].getScore()!=Float.NaN) { if (seqScores[i]==0) { hasScores++; }
* seqScores[i]++; hasScore[i] = true; scores[i] += sf[f].getScore(); //
* take the first instance of this // score. } } } SequenceFeature[] fs;
* feats[i] = fs = new SequenceFeature[n]; if (n>0) { n=0; for (int
* f=0;f scores[i]) {
* min = scores[i]; } } } }
*
* if (method==FEATURE_SCORE) { if (hasScores == 0) { return; // do nothing
* - no scores present to sort by. } // pad score matrix if (hasScores <
* seqs.length) { for (int i = 0; i < seqs.length; i++) { if (!hasScore[i])
* { scores[i] = (max + i); } else { int nf=(feats[i]==null) ? 0
* :((SequenceFeature[]) feats[i]).length;
* System.err.println("Sorting on Score: seq "+seqs[i].getName()+
* " Feats: "+nf+" Score : "+scores[i]); } } }
*
* jalview.util.QuickSort.sort(scores, seqs); } else if
* (method==FEATURE_DENSITY) {
*
* // break ties between equivalent numbers for adjacent sequences by adding
* 1/Nseq*i on the original order double fr = 0.9/(1.0*seqs.length); for
* (int i=0;i