2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis.scoremodels;
23 import jalview.api.AlignmentViewPanel;
24 import jalview.api.FeatureRenderer;
25 import jalview.api.analysis.ScoreModelI;
26 import jalview.api.analysis.SimilarityParamsI;
27 import jalview.datamodel.AlignmentView;
28 import jalview.datamodel.SeqCigar;
29 import jalview.datamodel.SequenceFeature;
30 import jalview.math.Matrix;
31 import jalview.math.MatrixI;
32 import jalview.util.SetUtils;
34 import java.util.HashMap;
35 import java.util.HashSet;
36 import java.util.List;
40 public class FeatureDistanceModel extends DistanceScoreModel
42 private static final String NAME = "Sequence Feature Similarity";
44 private String description;
51 public FeatureDistanceModel()
56 public ScoreModelI getInstance(AlignmentViewPanel view)
58 FeatureDistanceModel instance;
61 instance = this.getClass().getDeclaredConstructor().newInstance();
62 instance.configureFromAlignmentView(view);
64 } catch (InstantiationException | IllegalAccessException e)
66 System.err.println("Error in " + getClass().getName()
67 + ".getInstance(): " + e.getMessage());
69 } catch (ReflectiveOperationException roe)
75 boolean configureFromAlignmentView(AlignmentViewPanel view)
78 fr = view.cloneFeatureRenderer();
83 * Calculates a distance measure [i][j] between each pair of sequences as the
84 * average number of features they have but do not share. That is, find the
85 * features each sequence pair has at each column, ignore feature types they
86 * have in common, and count the rest. The totals are normalised by the number
87 * of columns processed.
89 * The parameters argument provides settings for treatment of gap-residue
90 * aligned positions, and whether the score is over the longer or shorter of
91 * each pair of sequences
97 public MatrixI findDistances(AlignmentView seqData,
98 SimilarityParamsI params)
100 SeqCigar[] seqs = seqData.getSequences();
101 int noseqs = seqs.length;
102 int cpwidth = 0;// = seqData.getWidth();
103 double[][] distances = new double[noseqs][noseqs];
104 List<String> dft = null;
107 dft = fr.getDisplayedFeatureTypes();
109 if (dft == null || dft.isEmpty())
111 return new Matrix(distances);
114 // need to get real position for view position
115 int[] viscont = seqData.getVisibleContigs();
118 * scan each column, compute and add to each distance[i, j]
119 * the number of feature types that seqi and seqj do not share
121 for (int vc = 0; vc < viscont.length; vc += 2)
123 for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++)
128 * first record feature types in this column for each sequence
130 Map<SeqCigar, Set<String>> sfap = findFeatureTypesAtColumn(seqs,
134 * count feature types on either i'th or j'th sequence but not both
135 * and add this 'distance' measure to the total for [i, j] for j > i
137 for (int i = 0; i < (noseqs - 1); i++)
139 for (int j = i + 1; j < noseqs; j++)
141 SeqCigar sc1 = seqs[i];
142 SeqCigar sc2 = seqs[j];
143 Set<String> set1 = sfap.get(sc1);
144 Set<String> set2 = sfap.get(sc2);
145 boolean gap1 = set1 == null;
146 boolean gap2 = set2 == null;
149 * gap-gap always scores zero
150 * residue-residue is always scored
151 * include gap-residue score if params say to do so
153 if ((!gap1 && !gap2) || params.includeGaps())
155 int seqDistance = SetUtils.countDisjunction(set1, set2);
156 distances[i][j] += seqDistance;
164 * normalise the distance scores (summed over columns) by the
165 * number of visible columns used in the calculation
166 * and fill in the bottom half of the matrix
168 // TODO JAL-2424 cpwidth may be out by 1 - affects scores but not tree shape
169 for (int i = 0; i < noseqs; i++)
171 for (int j = i + 1; j < noseqs; j++)
173 distances[i][j] /= cpwidth;
174 distances[j][i] = distances[i][j];
177 return new Matrix(distances);
181 * Builds and returns a map containing a (possibly empty) list (one per
182 * SeqCigar) of visible feature types at the given column position. The map
183 * does not include entries for features which straddle a gapped column
187 * @param columnPosition
191 protected Map<SeqCigar, Set<String>> findFeatureTypesAtColumn(
192 SeqCigar[] seqs, int columnPosition)
194 Map<SeqCigar, Set<String>> sfap = new HashMap<>();
195 for (SeqCigar seq : seqs)
197 int spos = seq.findPosition(columnPosition);
201 * position is not a gap
203 Set<String> types = new HashSet<>();
204 List<SequenceFeature> sfs = fr
205 .findFeaturesAtResidue(seq.getRefSeq(), spos, spos);
206 for (SequenceFeature sf : sfs)
208 types.add(sf.getType());
210 sfap.put(seq, types);
217 public String getName()
223 public String getDescription()
229 public boolean isDNA()
235 public boolean isProtein()
241 public String toString()
243 return "Score between sequences based on hamming distance between binary vectors marking features displayed at each column";