types. Sequences with similar distributions of features of the
same type will be grouped together in trees computed with this
metric. <em>This measure was introduced in Jalview 2.9</em></li>
+
+ <li><strong>Secondary Structure Similarity</strong><br>Trees are
+ generated using a distance matrix, which is constructed from Jaccard
+ distances that specifically consider the secondary structure features
+ observed at each column of the alignment.
+ <ul>
+ <li>For secondary structure similarity analysis, at any given column
+ <em>i</em>, the range of unique secondary structures is between 0 and 2,
+ reflecting the presence of helices, sheets, coils and gaps.
+ <br>The similarity at column <em>i</em> = Total
+ number of unique secondary structures (which can range from 0 to 2)
+ - Sum of the number of secondary structures in common at column
+ <em>i</em> (which can be either 0 or 1)<br>The similarity scores are
+ summed across all columns and then divided by the total number of
+ columns to calculate an average similarity score.
+ </li>
+ </ul>
+ Distance calculations are based on the secondary structures
+ currently displayed. Sequences with similar distributions of secondary
+ structures will be grouped together in trees.<br>
+ <em>The distance between two sequences is maximum when one
+ sequence has a defined secondary structure annotation track and the
+ other does not, indicating complete dissimilarity between them.
+ Whereas, the distance between two sequences is minimum when both of
+ the sequences within the comparison do not have a defined secondary
+ structure annotation track.</em>
+ </li>
</ul>
<p>
<strong>Tree Construction Methods</strong>
import jalview.math.MatrixI;
import jalview.util.SetUtils;
+import java.util.HashMap;
import java.util.HashSet;
+import java.util.Map;
import java.util.Set;
/* This class contains methods to calculate distance score between
private static final String NAME = "Secondary Structure Similarity";
private static final String SS_ANNOTATION_LABEL = "Secondary Structure";
+
+ private static final String SS_ANNOTATION_FROM_JPRED_LABEL = "jnetpred";
private String description;
@Override
public MatrixI findDistances(AlignmentView seqData,
SimilarityParamsI params)
- {
+ {
+
SeqCigar[] seqs = seqData.getSequences();
int noseqs = seqs.length; //no of sequences
int cpwidth = 0; // = seqData.getWidth();
// need to get real position for view position
int[] viscont = seqData.getVisibleContigs();
+ Map<String, HashSet<String>> calcIdMapInAlignmentAnnotation = new HashMap<String,HashSet<String>>();
+
+ AlignmentAnnotation[] alignAnnotList = fr.getViewport().getAlignment().getAlignmentAnnotation();
+ if(alignAnnotList.length > 0) {
+
+ for (AlignmentAnnotation aa: alignAnnotList) {
+ if (SS_ANNOTATION_LABEL.equals(aa.label) || SS_ANNOTATION_FROM_JPRED_LABEL.equals(aa.label)) {
+ calcIdMapInAlignmentAnnotation.computeIfAbsent(aa.getCalcId(), k -> new HashSet<>()).add(aa.description);
+ }
+
+ }
+ }
+
- Set<SeqCigar> seqsWithUndefinedSS = findSeqsWithUndefinedSS(seqs);
+ Set<SeqCigar> seqsWithUndefinedSS = findSeqsWithUndefinedSS(seqs, calcIdMapInAlignmentAnnotation);
/*
{
SeqCigar sc1 = seqs[i];
SeqCigar sc2 = seqs[j];
+
//check if ss is defined
boolean undefinedSS1 = seqsWithUndefinedSS.contains(sc1);
/*
* gap-gap always scores zero
- * residue-residue is always scored
- * include gap-residue score if params say to do so
+ * ss-ss is always scored
+ * include gap-ss scores 1 if params say to do so
*/
if ((!gap1 && !gap2) || params.includeGaps())
{
* (0..)
* @return
*/
- protected Set<SeqCigar> findSeqsWithUndefinedSS(
- SeqCigar[] seqs)
- {
- Set<SeqCigar> seqsWithUndefinedSS = new HashSet<>();
- for (SeqCigar seq : seqs)
- {
-
- AlignmentAnnotation[] aa = seq.getRefSeq().getAnnotation(SS_ANNOTATION_LABEL);
- if (aa == null) {
- /*
- * secondary structure is undefined for the seq
- * Add seq in the set
- */
- seqsWithUndefinedSS.add(seq);
+ private static final String[] SS_ANNOTATION_LABELS = {
+ SS_ANNOTATION_LABEL,
+ SS_ANNOTATION_FROM_JPRED_LABEL
+ };
+
+ protected Set<SeqCigar> findSeqsWithUndefinedSS(SeqCigar[] seqs, Map<String, HashSet<String>> calcIdMapInAlignmentAnnotation) {
+ Set<SeqCigar> seqsWithUndefinedSS = new HashSet<>();
+ for (SeqCigar seq : seqs) {
+ if (isSSUndefinedOrNotAdded(seq, calcIdMapInAlignmentAnnotation)) {
+ seqsWithUndefinedSS.add(seq);
+ }
}
- }
- return seqsWithUndefinedSS;
+ return seqsWithUndefinedSS;
+ }
+
+ private boolean isSSUndefinedOrNotAdded(SeqCigar seq, Map<String, HashSet<String>> calcIdMapInAlignmentAnnotation) {
+ for (String label : SS_ANNOTATION_LABELS) {
+ AlignmentAnnotation[] annotations = seq.getRefSeq().getAnnotation(label);
+ if (annotations != null) {
+ for (AlignmentAnnotation annotation : annotations) {
+ HashSet<String> descriptionList = calcIdMapInAlignmentAnnotation.get(annotation.getCalcId());
+ if (descriptionList.contains(annotation.description)) {
+ // Secondary structure annotation is present and added to the track, no need to add seq
+ return false;
+ }
+ }
+ }
+ }
+ // Either annotations are undefined or not added to the track
+ return true;
}
+
/**
* Finds secondary structure annotation for a given sequence (SeqCigar)
* and column position corresponding to the sequence.
//fetch the position in sequence for the column and finds the
//corresponding secondary structure annotation
+ //TO DO - consider based on priority
int seqPosition = seq.findPosition(columnPosition);
AlignmentAnnotation[] aa = seq.getRefSeq().getAnnotation(SS_ANNOTATION_LABEL);
+
+ if(aa == null) {
+ aa = seq.getRefSeq().getAnnotation(SS_ANNOTATION_FROM_JPRED_LABEL);
+ }
+
if (aa != null) {
if (aa[0].getAnnotationForPosition(seqPosition) != null) {
Annotation a = aa[0].getAnnotationForPosition(seqPosition);
ss = a.secondaryStructure;
//There is no representation for coil and it can be either ' ' or null.
- if (ss == ' ') {
+ if (ss == ' ' || ss == '-') {
ss = COIL;
}
}