2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis.scoremodels;
23 import jalview.api.analysis.DistanceModelI;
24 import jalview.api.analysis.ViewBasedAnalysisI;
25 import jalview.datamodel.AlignmentView;
26 import jalview.datamodel.SeqCigar;
27 import jalview.datamodel.SequenceFeature;
28 import jalview.util.SetUtils;
30 import java.util.HashMap;
31 import java.util.HashSet;
32 import java.util.List;
36 public class FeatureDistanceModel implements DistanceModelI, ViewBasedAnalysisI
38 jalview.api.FeatureRenderer fr;
41 public boolean configureFromAlignmentView(
42 jalview.api.AlignmentViewPanel view)
44 fr = view.cloneFeatureRenderer();
49 * Calculates a distance measure [i][j] between each pair of sequences as the
50 * average number of features they have but do not share. That is, find the
51 * features each sequence pair has at each column, ignore feature types they
52 * have in common, and count the rest. The totals are normalised by the number
53 * of columns processed.
56 public float[][] findDistances(AlignmentView seqData)
58 List<String> dft = fr.getDisplayedFeatureTypes();
59 SeqCigar[] seqs = seqData.getSequences();
60 int noseqs = seqs.length;
61 int cpwidth = 0;// = seqData.getWidth();
62 float[][] distance = new float[noseqs][noseqs];
68 // need to get real position for view position
69 int[] viscont = seqData.getVisibleContigs();
72 * scan each column, compute and add to each distance[i, j]
73 * the number of feature types that seqi and seqj do not share
75 for (int vc = 0; vc < viscont.length; vc += 2)
77 for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++)
82 * first pass: record features types in column for each sequence
84 Map<SeqCigar, Set<String>> sfap = findFeatureTypesAtColumn(
88 * count feature types on either i'th or j'th sequence but not both
89 * and add this 'distance' measure to the total for [i, j] for j > i
91 for (int i = 0; i < (noseqs - 1); i++)
93 for (int j = i + 1; j < noseqs; j++)
95 int seqDistance = SetUtils.countDisjunction(sfap.get(seqs[i]),
97 distance[i][j] += seqDistance;
104 * normalise the distance scores (summed over columns) by the
105 * number of visible columns used in the calculation
107 for (int i = 0; i < noseqs; i++)
109 for (int j = i + 1; j < noseqs; j++)
111 distance[i][j] /= cpwidth;
112 distance[j][i] = distance[i][j];
119 * Builds and returns a list (one per SeqCigar) of visible feature types at
120 * the given column position
123 * @param columnPosition
126 protected Map<SeqCigar, Set<String>> findFeatureTypesAtColumn(
127 SeqCigar[] seqs, int columnPosition)
129 Map<SeqCigar, Set<String>> sfap = new HashMap<SeqCigar, Set<String>>();
130 for (SeqCigar seq : seqs)
132 Set<String> types = new HashSet<String>();
133 int spos = seq.findPosition(columnPosition);
136 List<SequenceFeature> sfs = fr.findFeaturesAtRes(seq.getRefSeq(),
138 for (SequenceFeature sf : sfs)
140 types.add(sf.getType());
143 sfap.put(seq, types);
149 public String getName()
151 return "Sequence Feature Similarity";
155 public boolean isDNA()
161 public boolean isProtein()
167 public String toString()
169 return "Score between sequences based on hamming distance between binary vectors marking features displayed at each column";