/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.analysis.scoremodels;
import jalview.api.analysis.ScoreModelI;
import jalview.api.analysis.ViewBasedAnalysisI;
import jalview.datamodel.AlignmentView;
import jalview.datamodel.SeqCigar;
import jalview.datamodel.SequenceFeature;
import jalview.util.SetUtils;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class FeatureScoreModel implements ScoreModelI, ViewBasedAnalysisI
{
jalview.api.FeatureRenderer fr;
@Override
public boolean configureFromAlignmentView(
jalview.api.AlignmentViewPanel view)
{
fr = view.cloneFeatureRenderer();
return true;
}
/**
* Calculates a distance measure [i][j] between each pair of sequences as the
* average number of features they have but do not share. That is, find the
* features each sequence pair has at each column, ignore feature types they
* have in common, and count the rest. The totals are normalised by the number
* of columns processed.
*/
@Override
public float[][] findDistances(AlignmentView seqData)
{
List dft = fr.getDisplayedFeatureTypes();
SeqCigar[] seqs = seqData.getSequences();
int noseqs = seqs.length;
int cpwidth = 0;// = seqData.getWidth();
float[][] distance = new float[noseqs][noseqs];
if (dft.isEmpty())
{
return distance;
}
// need to get real position for view position
int[] viscont = seqData.getVisibleContigs();
/*
* scan each column, compute and add to each distance[i, j]
* the number of feature types that seqi and seqj do not share
*/
for (int vc = 0; vc < viscont.length; vc += 2)
{
for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++)
{
cpwidth++;
/*
* first pass: record features types in column for each sequence
*/
Map> sfap = findFeatureTypesAtColumn(
seqs, cpos);
/*
* count feature types on either i'th or j'th sequence but not both
* and add this 'distance' measure to the total for [i, j] for j > i
*/
for (int i = 0; i < (noseqs - 1); i++)
{
for (int j = i + 1; j < noseqs; j++)
{
int seqDistance = SetUtils.countDisjunction(sfap.get(seqs[i]),
sfap.get(seqs[j]));
distance[i][j] += seqDistance;
}
}
}
}
/*
* normalise the distance scores (summed over columns) by the
* number of visible columns used in the calculation
*/
for (int i = 0; i < noseqs; i++)
{
for (int j = i + 1; j < noseqs; j++)
{
distance[i][j] /= cpwidth;
distance[j][i] = distance[i][j];
}
}
return distance;
}
/**
* Builds and returns a list (one per SeqCigar) of visible feature types at
* the given column position
*
* @param seqs
* @param columnPosition
* @return
*/
protected Map> findFeatureTypesAtColumn(
SeqCigar[] seqs, int columnPosition)
{
Map> sfap = new HashMap>();
for (SeqCigar seq : seqs)
{
Set types = new HashSet();
int spos = seq.findPosition(columnPosition);
if (spos != -1)
{
List sfs = fr.findFeaturesAtRes(seq.getRefSeq(),
spos);
for (SequenceFeature sf : sfs)
{
types.add(sf.getType());
}
}
sfap.put(seq, types);
}
return sfap;
}
@Override
public String getName()
{
return "Sequence Feature Similarity";
}
@Override
public boolean isDNA()
{
return true;
}
@Override
public boolean isProtein()
{
return true;
}
@Override
public String toString()
{
return "Score between sequences based on hamming distance between binary vectors marking features displayed at each column";
}
}