--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.analysis.scoremodels;
+
+import jalview.api.analysis.DistanceModelI;
+import jalview.api.analysis.ViewBasedAnalysisI;
+import jalview.datamodel.AlignmentView;
+import jalview.datamodel.SeqCigar;
+import jalview.datamodel.SequenceFeature;
+import jalview.util.SetUtils;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+public class FeatureDistanceModel implements DistanceModelI, ViewBasedAnalysisI
+{
+ jalview.api.FeatureRenderer fr;
+
+ @Override
+ public boolean configureFromAlignmentView(
+ jalview.api.AlignmentViewPanel view)
+ {
+ fr = view.cloneFeatureRenderer();
+ return true;
+ }
+
+ /**
+ * Calculates a distance measure [i][j] between each pair of sequences as the
+ * average number of features they have but do not share. That is, find the
+ * features each sequence pair has at each column, ignore feature types they
+ * have in common, and count the rest. The totals are normalised by the number
+ * of columns processed.
+ */
+ @Override
+ public float[][] findDistances(AlignmentView seqData)
+ {
+ List<String> dft = fr.getDisplayedFeatureTypes();
+ SeqCigar[] seqs = seqData.getSequences();
+ int noseqs = seqs.length;
+ int cpwidth = 0;// = seqData.getWidth();
+ float[][] distance = new float[noseqs][noseqs];
+ if (dft.isEmpty())
+ {
+ return distance;
+ }
+
+ // need to get real position for view position
+ int[] viscont = seqData.getVisibleContigs();
+
+ /*
+ * scan each column, compute and add to each distance[i, j]
+ * the number of feature types that seqi and seqj do not share
+ */
+ for (int vc = 0; vc < viscont.length; vc += 2)
+ {
+ for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++)
+ {
+ cpwidth++;
+
+ /*
+ * first pass: record features types in column for each sequence
+ */
+ Map<SeqCigar, Set<String>> sfap = findFeatureTypesAtColumn(
+ seqs, cpos);
+
+ /*
+ * count feature types on either i'th or j'th sequence but not both
+ * and add this 'distance' measure to the total for [i, j] for j > i
+ */
+ for (int i = 0; i < (noseqs - 1); i++)
+ {
+ for (int j = i + 1; j < noseqs; j++)
+ {
+ int seqDistance = SetUtils.countDisjunction(sfap.get(seqs[i]),
+ sfap.get(seqs[j]));
+ distance[i][j] += seqDistance;
+ }
+ }
+ }
+ }
+
+ /*
+ * normalise the distance scores (summed over columns) by the
+ * number of visible columns used in the calculation
+ */
+ for (int i = 0; i < noseqs; i++)
+ {
+ for (int j = i + 1; j < noseqs; j++)
+ {
+ distance[i][j] /= cpwidth;
+ distance[j][i] = distance[i][j];
+ }
+ }
+ return distance;
+ }
+
+ /**
+ * Builds and returns a list (one per SeqCigar) of visible feature types at
+ * the given column position
+ *
+ * @param seqs
+ * @param columnPosition
+ * @return
+ */
+ protected Map<SeqCigar, Set<String>> findFeatureTypesAtColumn(
+ SeqCigar[] seqs, int columnPosition)
+ {
+ Map<SeqCigar, Set<String>> sfap = new HashMap<SeqCigar, Set<String>>();
+ for (SeqCigar seq : seqs)
+ {
+ Set<String> types = new HashSet<String>();
+ int spos = seq.findPosition(columnPosition);
+ if (spos != -1)
+ {
+ List<SequenceFeature> sfs = fr.findFeaturesAtRes(seq.getRefSeq(),
+ spos);
+ for (SequenceFeature sf : sfs)
+ {
+ types.add(sf.getType());
+ }
+ }
+ sfap.put(seq, types);
+ }
+ return sfap;
+ }
+
+ @Override
+ public String getName()
+ {
+ return "Sequence Feature Similarity";
+ }
+
+ @Override
+ public boolean isDNA()
+ {
+ return true;
+ }
+
+ @Override
+ public boolean isProtein()
+ {
+ return true;
+ }
+
+ @Override
+ public String toString()
+ {
+ return "Score between sequences based on hamming distance between binary vectors marking features displayed at each column";
+ }
+}