import jalview.math.MatrixI;
import jalview.util.SetUtils;
+import java.util.HashMap;
import java.util.HashSet;
+import java.util.Map;
import java.util.Set;
/* This class contains methods to calculate distance score between
public class SecondaryStructureDistanceModel extends DistanceScoreModel
{
private static final String NAME = "Secondary Structure Similarity";
+
+ private static final String SS_ANNOTATION_LABEL = "Secondary Structure";
+
+ private static final String SS_ANNOTATION_FROM_JPRED_LABEL = "jnetpred";
private String description;
+ //maximum distance score is defined as 2 as the possible number of unique ss is 2.
+ private static final int MAX_SCORE = 2;
+
+ //minimum distance score is defined as 2 as the possible number of unique ss is 2.
+ private static final int MIN_SCORE = 0;
+
+ private static final char COIL = 'C';
+
FeatureRenderer fr;
/**
@Override
public MatrixI findDistances(AlignmentView seqData,
SimilarityParamsI params)
- {
+ {
+
SeqCigar[] seqs = seqData.getSequences();
int noseqs = seqs.length; //no of sequences
int cpwidth = 0; // = seqData.getWidth();
// need to get real position for view position
int[] viscont = seqData.getVisibleContigs();
+ Map<String, HashSet<String>> calcIdMapInAlignmentAnnotation = new HashMap<String,HashSet<String>>();
+
+ AlignmentAnnotation[] alignAnnotList = fr.getViewport().getAlignment().getAlignmentAnnotation();
+ if(alignAnnotList.length > 0) {
+
+ for (AlignmentAnnotation aa: alignAnnotList) {
+ if (SS_ANNOTATION_LABEL.equals(aa.label) || SS_ANNOTATION_FROM_JPRED_LABEL.equals(aa.label)) {
+ calcIdMapInAlignmentAnnotation.computeIfAbsent(aa.getCalcId(), k -> new HashSet<>()).add(aa.description);
+ }
+
+ }
+ }
+
+
+ Set<SeqCigar> seqsWithUndefinedSS = findSeqsWithUndefinedSS(seqs, calcIdMapInAlignmentAnnotation);
+
/*
* scan each column, compute and add to each distance[i, j]
{
SeqCigar sc1 = seqs[i];
SeqCigar sc2 = seqs[j];
+
+
+ //check if ss is defined
+ boolean undefinedSS1 = seqsWithUndefinedSS.contains(sc1);
+ boolean undefinedSS2 = seqsWithUndefinedSS.contains(sc2);
+
+ // Set distance to 0 if both SS are not defined
+ if (undefinedSS1 && undefinedSS2) {
+ distances[i][j] += MIN_SCORE;
+ continue;
+ }
+
+ // Set distance to maximum score if either one SS is not defined
+ else if(undefinedSS1 || undefinedSS2) {
+ distances[i][j] += MAX_SCORE;
+ continue;
+ }
+
+ //check if the sequence contains gap in the current column
boolean gap1 = !seqsWithoutGapAtCol.contains(sc1);
- boolean gap2 = !seqsWithoutGapAtCol.contains(sc2);
+ boolean gap2 = !seqsWithoutGapAtCol.contains(sc2);
//Variable to store secondary structure at the current column
Set<String> secondaryStructure1 = new HashSet<String>();
//secondary structure is fetched only if the current column is not
//gap for the sequence
- if(!gap1) {
+ if(!gap1 && !undefinedSS1) {
secondaryStructure1.addAll(
findSSAnnotationForGivenSeqAndCol(seqs[i], cpos));
}
- if(!gap2) {
+ if(!gap2 && !undefinedSS2) {
secondaryStructure2.addAll(
findSSAnnotationForGivenSeqAndCol(seqs[j], cpos));
- }
+ }
/*
* gap-gap always scores zero
- * residue-residue is always scored
- * include gap-residue score if params say to do so
+ * ss-ss is always scored
+ * include gap-ss scores 1 if params say to do so
*/
if ((!gap1 && !gap2) || params.includeGaps())
{
}
/**
+ * Builds and returns a set containing sequences (SeqCigar) which have
+ * no secondary structures defined
+ *
+ * @param seqs
+ * (0..)
+ * @return
+ */
+ private static final String[] SS_ANNOTATION_LABELS = {
+ SS_ANNOTATION_LABEL,
+ SS_ANNOTATION_FROM_JPRED_LABEL
+ };
+
+ protected Set<SeqCigar> findSeqsWithUndefinedSS(SeqCigar[] seqs, Map<String, HashSet<String>> calcIdMapInAlignmentAnnotation) {
+ Set<SeqCigar> seqsWithUndefinedSS = new HashSet<>();
+ for (SeqCigar seq : seqs) {
+ if (isSSUndefinedOrNotAdded(seq, calcIdMapInAlignmentAnnotation)) {
+ seqsWithUndefinedSS.add(seq);
+ }
+ }
+ return seqsWithUndefinedSS;
+ }
+
+ private boolean isSSUndefinedOrNotAdded(SeqCigar seq, Map<String, HashSet<String>> calcIdMapInAlignmentAnnotation) {
+ for (String label : SS_ANNOTATION_LABELS) {
+ AlignmentAnnotation[] annotations = seq.getRefSeq().getAnnotation(label);
+ if (annotations != null) {
+ for (AlignmentAnnotation annotation : annotations) {
+ HashSet<String> descriptionList = calcIdMapInAlignmentAnnotation.get(annotation.getCalcId());
+ if (descriptionList.contains(annotation.description)) {
+ // Secondary structure annotation is present and added to the track, no need to add seq
+ return false;
+ }
+ }
+ }
+ }
+ // Either annotations are undefined or not added to the track
+ return true;
+ }
+
+
+ /**
* Finds secondary structure annotation for a given sequence (SeqCigar)
* and column position corresponding to the sequence.
*
{
Set<String> secondaryStructure = new HashSet<String>();
- char ss = '\0'; //default null character
+ char ss;
//fetch the position in sequence for the column and finds the
//corresponding secondary structure annotation
+ //TO DO - consider based on priority
int seqPosition = seq.findPosition(columnPosition);
- AlignmentAnnotation[] aa = seq.getRefSeq().getAnnotation("Secondary Structure");
+ AlignmentAnnotation[] aa = seq.getRefSeq().getAnnotation(SS_ANNOTATION_LABEL);
+
+ if(aa == null) {
+ aa = seq.getRefSeq().getAnnotation(SS_ANNOTATION_FROM_JPRED_LABEL);
+ }
+
if (aa != null) {
+ if (aa[0].getAnnotationForPosition(seqPosition) != null) {
Annotation a = aa[0].getAnnotationForPosition(seqPosition);
ss = a.secondaryStructure;
- if (ss == ' ') {
- ss = 'C'; // In JalView, 'C' is represented as ' '
- }
- if (ss != '\0') { // Check if ss is not the default null character
- secondaryStructure.add(String.valueOf(ss));
+
+ //There is no representation for coil and it can be either ' ' or null.
+ if (ss == ' ' || ss == '-') {
+ ss = COIL;
}
+ }
+ else {
+ ss = COIL;
+ }
+ secondaryStructure.add(String.valueOf(ss));
}
+
return secondaryStructure;
}
@Override
public boolean isDNA()
{
- return false;
+ return false;
}
@Override
@Override
public String toString()
{
- return "Score between sequences based on hamming distance between binary vectors marking features displayed at each column";
+ return "Score between sequences based on hamming distance between binary vectors marking secondary structure displayed at each column";
}
}
\ No newline at end of file