--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.analysis.scoremodels;
+
+import jalview.analysis.AlignmentUtils;
+import jalview.api.AlignmentViewPanel;
+import jalview.api.FeatureRenderer;
+import jalview.api.analysis.ScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.datamodel.AlignmentAnnotation;
+import jalview.datamodel.AlignmentView;
+import jalview.datamodel.Annotation;
+import jalview.datamodel.SeqCigar;
+import jalview.math.Matrix;
+import jalview.math.MatrixI;
+import jalview.util.Constants;
+import jalview.util.SetUtils;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+/* This class contains methods to calculate distance score between
+ * secondary structure annotations of the sequences.
+ */
+public class SecondaryStructureDistanceModel extends DistanceScoreModel
+{
+ private static final String NAME = "Secondary Structure Similarity";
+
+ private ScoreMatrix ssRateMatrix;
+
+ private String description;
+
+ FeatureRenderer fr;
+
+
+ /**
+ * Constructor
+ */
+ public SecondaryStructureDistanceModel()
+ {
+
+ }
+
+ @Override
+ public ScoreModelI getInstance(AlignmentViewPanel view)
+ {
+ SecondaryStructureDistanceModel instance;
+ try
+ {
+ instance = this.getClass().getDeclaredConstructor().newInstance();
+ instance.configureFromAlignmentView(view);
+ return instance;
+ } catch (InstantiationException | IllegalAccessException e)
+ {
+ jalview.bin.Console.errPrintln("Error in " + getClass().getName()
+ + ".getInstance(): " + e.getMessage());
+ return null;
+ } catch (ReflectiveOperationException roe)
+ {
+ return null;
+ }
+ }
+
+ boolean configureFromAlignmentView(AlignmentViewPanel view)
+
+ {
+ fr = view.cloneFeatureRenderer();
+ return true;
+ }
+
+ /**
+ * Calculates distance score [i][j] between each pair of protein sequences
+ * based on their secondary structure annotations (H, E, C).
+ * The final score is normalised by the number of
+ * alignment columns processed, providing an average similarity score.
+ * <p>
+ * The parameters argument can include settings for handling gap-residue aligned
+ * positions and may determine if the score calculation is based on the longer or shorter
+ * sequence in each pair. This can be important for handling partial alignments or
+ * sequences of significantly different lengths.
+ *
+ * @param seqData The aligned sequence data including secondary structure annotations.
+ * @param params Additional parameters for customising the scoring process, such as gap
+ * handling and sequence length consideration.
+ */
+ @Override
+ public MatrixI findDistances(AlignmentView seqData,
+ SimilarityParamsI params)
+ {
+
+ SeqCigar[] seqs = seqData.getSequences();
+ int noseqs = seqs.length; //no of sequences
+ int cpwidth = 0; // = seqData.getWidth();
+ double[][] similarities = new double[noseqs][noseqs]; //matrix to store similarity score
+ //secondary structure source parameter selected by the user from the drop down.
+ String ssSource = params.getSecondaryStructureSource();
+ ssRateMatrix = ScoreModels.getInstance().getSecondaryStructureMatrix();
+
+ //defining the default value for secondary structure source as 3d structures
+ //or JPred if user selected JPred
+ String selectedSSSource = Constants.SS_ANNOTATION_LABEL;
+ if(ssSource.equals(Constants.SECONDARY_STRUCTURE_LABELS.get(Constants.SS_ANNOTATION_FROM_JPRED_LABEL)))
+ {
+ selectedSSSource = Constants.SS_ANNOTATION_FROM_JPRED_LABEL;
+ }
+
+ // need to get real position for view position
+ int[] viscont = seqData.getVisibleContigs();
+
+ /*
+ * Add secondary structure annotations that are added to the annotation track
+ * to the map
+ */
+ Map<String, HashSet<String>> ssAlignmentAnnotationForSequences
+ = new HashMap<String,HashSet<String>>();
+
+ AlignmentAnnotation[] alignAnnotList = fr.getViewport().getAlignment()
+ .getAlignmentAnnotation();
+
+ if(alignAnnotList.length > 0) {
+ for (AlignmentAnnotation aa: alignAnnotList) {
+ if (selectedSSSource.equals(aa.label)) {
+ ssAlignmentAnnotationForSequences.computeIfAbsent(
+ aa.sequenceRef.getName(), k -> new HashSet<>())
+ .add(aa.description);
+ }
+ }
+ }
+
+ /*
+ * Get the set of sequences which are not considered for the calculation.
+ * Following sequences are added:
+ * 1. Sequences without a defined secondary structure from the selected
+ * source.
+ * 2. Sequences whose secondary structure annotations are not added to
+ * the annotation track
+ */
+ Set<SeqCigar> seqsWithUndefinedSS
+ = findSeqsWithUndefinedSS(seqs, ssAlignmentAnnotationForSequences);
+
+ /*
+ * scan each column, compute and add to each similarity[i, j]
+ * the number of secondary structure annotation that seqi
+ * and seqj do not share
+ */
+ for (int vc = 0; vc < viscont.length; vc += 2)
+ {
+ //Iterates for each column position
+ for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++)
+ {
+ cpwidth++; //used to normalise the similarity score
+
+ /*
+ * get set of sequences without gap in the current column
+ */
+ Set<SeqCigar> seqsWithoutGapAtCol = findSeqsWithoutGapAtColumn(seqs, cpos);
+
+ /*
+ * calculate similarity score for each secondary structure annotation on i'th and j'th
+ * sequence and add this measure to the similarities matrix
+ * for [i, j] for j > i
+ */
+ for (int i = 0; i < (noseqs - 1); i++)
+ {
+ //Iterates for each sequences
+ for (int j = i + 1; j < noseqs; j++)
+ {
+ SeqCigar sc1 = seqs[i];
+ SeqCigar sc2 = seqs[j];
+
+
+ //check if ss is defined
+ boolean undefinedSS1 = seqsWithUndefinedSS.contains(sc1);
+ boolean undefinedSS2 = seqsWithUndefinedSS.contains(sc2);
+
+ // Set similarity to max score if both SS are not defined
+ if (undefinedSS1 && undefinedSS2) {
+ similarities[i][j] += ssRateMatrix.getMaximumScore();
+ continue;
+ }
+
+ // Set similarity to minimum score if either one SS is not defined
+ else if(undefinedSS1 || undefinedSS2) {
+ similarities[i][j] += ssRateMatrix.getMinimumScore();
+ continue;
+ }
+
+ //check if the sequence contains gap in the current column
+ boolean gap1 = !seqsWithoutGapAtCol.contains(sc1);
+ boolean gap2 = !seqsWithoutGapAtCol.contains(sc2);
+
+ //Variable to store secondary structure at the current column
+ char ss1 = '*';
+ char ss2 = '*';
+
+ //secondary structure is fetched only if the current column is not
+ //gap for the sequence
+ if(!gap1 && !undefinedSS1) {
+ //fetch the position in sequence for the column and finds the
+ //corresponding secondary structure annotation
+ //TO DO - consider based on priority and displayed
+ int seqPosition = seqs[i].findPosition(cpos);
+ AlignmentAnnotation[] aa = seqs[i].getRefSeq().getAnnotation(selectedSSSource);
+ if(aa!=null)
+ ss1 =
+ AlignmentUtils.findSSAnnotationForGivenSeqposition(aa[0], seqPosition);
+ }
+
+ if(!gap2 && !undefinedSS2) {
+ int seqPosition = seqs[j].findPosition(cpos);
+ AlignmentAnnotation[] aa = seqs[j].getRefSeq().getAnnotation(selectedSSSource);
+ if(aa!=null)
+ ss2 =
+ AlignmentUtils.findSSAnnotationForGivenSeqposition(aa[0], seqPosition);
+ }
+
+ if ((!gap1 && !gap2) || params.includeGaps())
+ {
+ // Calculate similarity score based on the substitution matrix
+ double similarityScore = ssRateMatrix.getPairwiseScore(ss1, ss2);
+ similarities[i][j] += similarityScore;
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * normalise the similarity scores (summed over columns) by the
+ * number of visible columns used in the calculation
+ * and fill in the bottom half of the matrix
+ */
+ // TODO JAL-2424 cpwidth may be out by 1 - affects scores but not tree shape
+
+ for (int i = 0; i < noseqs; i++)
+ {
+ for (int j = i + 1; j < noseqs; j++)
+ {
+ similarities[i][j] /= cpwidth;
+ similarities[j][i] = similarities[i][j];
+ }
+ }
+ return ssRateMatrix.similarityToDistance(new Matrix(similarities));
+
+ }
+
+ /**
+ * Builds and returns a set containing sequences (SeqCigar) which do not
+ * have a gap at the given column position.
+ *
+ * @param seqs
+ * @param columnPosition
+ * (0..)
+ * @return
+ */
+ private Set<SeqCigar> findSeqsWithoutGapAtColumn(
+ SeqCigar[] seqs, int columnPosition)
+ {
+ Set<SeqCigar> seqsWithoutGapAtCol = new HashSet<>();
+ for (SeqCigar seq : seqs)
+ {
+ int spos = seq.findPosition(columnPosition);
+ if (spos != -1)
+ {
+ /*
+ * position is not a gap
+ */
+ seqsWithoutGapAtCol.add(seq);
+ }
+ }
+ return seqsWithoutGapAtCol;
+ }
+
+
+ /**
+ * Builds and returns a set containing sequences (SeqCigar) which
+ * are not considered for the similarity calculation.
+ * Following sequences are added:
+ * 1. Sequences without a defined secondary structure from the selected
+ * source.
+ * 2. Sequences whose secondary structure annotations are not added to
+ * the annotation track
+ * @param seqs
+ * @param ssAlignmentAnnotationForSequences
+ * @return
+ */
+ private Set<SeqCigar> findSeqsWithUndefinedSS(SeqCigar[] seqs,
+ Map<String, HashSet<String>> ssAlignmentAnnotationForSequences) {
+ Set<SeqCigar> seqsWithUndefinedSS = new HashSet<>();
+ for (SeqCigar seq : seqs) {
+ if (isSSUndefinedOrNotAdded(seq, ssAlignmentAnnotationForSequences)) {
+ seqsWithUndefinedSS.add(seq);
+ }
+ }
+ return seqsWithUndefinedSS;
+ }
+
+
+ /**
+ * Returns true if a sequence (SeqCigar) should not be
+ * considered for the similarity calculation.
+ * Following conditions are checked:
+ * 1. Sequence without a defined secondary structure from the selected
+ * source.
+ * 2. Sequences whose secondary structure annotations are not added to
+ * the annotation track
+ * @param seq
+ * @param ssAlignmentAnnotationForSequences
+ * @return
+ */
+ private boolean isSSUndefinedOrNotAdded(SeqCigar seq,
+ Map<String, HashSet<String>> ssAlignmentAnnotationForSequences) {
+ for (String label : Constants.SECONDARY_STRUCTURE_LABELS.keySet()) {
+ AlignmentAnnotation[] annotations = seq.getRefSeq().getAnnotation(label);
+ if (annotations != null) {
+ for (AlignmentAnnotation annotation : annotations) {
+ HashSet<String> descriptionSet = ssAlignmentAnnotationForSequences
+ .get(annotation.sequenceRef.getName());
+ if (descriptionSet != null)
+ {
+ if (descriptionSet.contains(annotation.description)) {
+ // Secondary structure annotation is present and
+ //added to the track, no need to add seq
+ return false;
+ }
+ }
+ }
+ }
+ }
+ // Either annotations are undefined or not added to the track
+ return true;
+ }
+
+ @Override
+ public String getName()
+ {
+ return NAME;
+ }
+
+ @Override
+ public String getDescription()
+ {
+ return description;
+ }
+
+ @Override
+ public boolean isDNA()
+ {
+ return false;
+ }
+
+ @Override
+ public boolean isProtein()
+ {
+ return false;
+ }
+
+ @Override
+ public boolean isSecondaryStructure()
+ {
+ return true;
+ }
+
+ @Override
+ public String toString()
+ {
+ return "Score between sequences based on similarity between binary "
+ + "vectors marking secondary structure displayed at each column";
+ }
+}
\ No newline at end of file