/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.analysis.scoremodels;
import jalview.api.AlignmentViewPanel;
import jalview.api.FeatureRenderer;
import jalview.api.analysis.ScoreModelI;
import jalview.api.analysis.SimilarityParamsI;
import jalview.datamodel.AlignmentView;
import jalview.datamodel.SeqCigar;
import jalview.datamodel.SequenceFeature;
import jalview.math.Matrix;
import jalview.math.MatrixI;
import jalview.util.SetUtils;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class FeatureDistanceModel extends DistanceScoreModel
{
private static final String NAME = "Sequence Feature Similarity";
private String description;
FeatureRenderer fr;
/**
* Constructor
*/
public FeatureDistanceModel()
{
}
@Override
public ScoreModelI getInstance(AlignmentViewPanel view)
{
FeatureDistanceModel instance;
try
{
instance = this.getClass().getDeclaredConstructor().newInstance();
instance.configureFromAlignmentView(view);
return instance;
} catch (InstantiationException | IllegalAccessException e)
{
System.err.println("Error in " + getClass().getName()
+ ".getInstance(): " + e.getMessage());
return null;
} catch (ReflectiveOperationException roe)
{
return null;
}
}
boolean configureFromAlignmentView(AlignmentViewPanel view)
{
fr = view.cloneFeatureRenderer();
return true;
}
/**
* Calculates a distance measure [i][j] between each pair of sequences as the
* average number of features they have but do not share. That is, find the
* features each sequence pair has at each column, ignore feature types they
* have in common, and count the rest. The totals are normalised by the number
* of columns processed.
*
* The parameters argument provides settings for treatment of gap-residue
* aligned positions, and whether the score is over the longer or shorter of
* each pair of sequences
*
* @param seqData
* @param params
*/
@Override
public MatrixI findDistances(AlignmentView seqData,
SimilarityParamsI params)
{
SeqCigar[] seqs = seqData.getSequences();
int noseqs = seqs.length;
int cpwidth = 0;// = seqData.getWidth();
double[][] distances = new double[noseqs][noseqs];
List dft = null;
if (fr != null)
{
dft = fr.getDisplayedFeatureTypes();
}
if (dft == null || dft.isEmpty())
{
return new Matrix(distances);
}
// need to get real position for view position
int[] viscont = seqData.getVisibleContigs();
/*
* scan each column, compute and add to each distance[i, j]
* the number of feature types that seqi and seqj do not share
*/
for (int vc = 0; vc < viscont.length; vc += 2)
{
for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++)
{
cpwidth++;
/*
* first record feature types in this column for each sequence
*/
Map> sfap = findFeatureTypesAtColumn(seqs,
cpos);
/*
* count feature types on either i'th or j'th sequence but not both
* and add this 'distance' measure to the total for [i, j] for j > i
*/
for (int i = 0; i < (noseqs - 1); i++)
{
for (int j = i + 1; j < noseqs; j++)
{
SeqCigar sc1 = seqs[i];
SeqCigar sc2 = seqs[j];
Set set1 = sfap.get(sc1);
Set set2 = sfap.get(sc2);
boolean gap1 = set1 == null;
boolean gap2 = set2 == null;
/*
* gap-gap always scores zero
* residue-residue is always scored
* include gap-residue score if params say to do so
*/
if ((!gap1 && !gap2) || params.includeGaps())
{
int seqDistance = SetUtils.countDisjunction(set1, set2);
distances[i][j] += seqDistance;
}
}
}
}
}
/*
* normalise the distance scores (summed over columns) by the
* number of visible columns used in the calculation
* and fill in the bottom half of the matrix
*/
// TODO JAL-2424 cpwidth may be out by 1 - affects scores but not tree shape
for (int i = 0; i < noseqs; i++)
{
for (int j = i + 1; j < noseqs; j++)
{
distances[i][j] /= cpwidth;
distances[j][i] = distances[i][j];
}
}
return new Matrix(distances);
}
/**
* Builds and returns a map containing a (possibly empty) list (one per
* SeqCigar) of visible feature types at the given column position. The map
* does not include entries for features which straddle a gapped column
* positions.
*
* @param seqs
* @param columnPosition
* (0..)
* @return
*/
protected Map> findFeatureTypesAtColumn(
SeqCigar[] seqs, int columnPosition)
{
Map> sfap = new HashMap<>();
for (SeqCigar seq : seqs)
{
int spos = seq.findPosition(columnPosition);
if (spos != -1)
{
/*
* position is not a gap
*/
Set types = new HashSet<>();
List sfs = fr
.findFeaturesAtResidue(seq.getRefSeq(), spos, spos);
for (SequenceFeature sf : sfs)
{
types.add(sf.getType());
}
sfap.put(seq, types);
}
}
return sfap;
}
@Override
public String getName()
{
return NAME;
}
@Override
public String getDescription()
{
return description;
}
@Override
public boolean isDNA()
{
return true;
}
@Override
public boolean isProtein()
{
return true;
}
@Override
public String toString()
{
return "Score between sequences based on hamming distance between binary vectors marking features displayed at each column";
}
}