--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.analysis.scoremodels;
+
+import jalview.api.AlignmentViewPanel;
+import jalview.api.FeatureRenderer;
+import jalview.api.analysis.ScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.datamodel.AlignmentAnnotation;
+import jalview.datamodel.AlignmentView;
+import jalview.datamodel.Annotation;
+import jalview.datamodel.SeqCigar;
+import jalview.math.Matrix;
+import jalview.math.MatrixI;
+import jalview.util.SetUtils;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/* This class contains methods to calculate distance score between
+ * secondary structure annotations of the sequences. The inverse of
+ * the score is later calculated for similarity score.
+ */
+public class SecondaryStructureDistanceModel extends DistanceScoreModel
+{
+ private static final String NAME = "Secondary Structure Similarity";
+
+ private String description;
+
+ FeatureRenderer fr;
+
+ /**
+ * Constructor
+ */
+ public SecondaryStructureDistanceModel()
+ {
+
+ }
+
+ @Override
+ public ScoreModelI getInstance(AlignmentViewPanel view)
+ {
+ SecondaryStructureDistanceModel instance;
+ try
+ {
+ instance = this.getClass().getDeclaredConstructor().newInstance();
+ instance.configureFromAlignmentView(view);
+ return instance;
+ } catch (InstantiationException | IllegalAccessException e)
+ {
+ jalview.bin.Console.errPrintln("Error in " + getClass().getName()
+ + ".getInstance(): " + e.getMessage());
+ return null;
+ } catch (ReflectiveOperationException roe)
+ {
+ return null;
+ }
+ }
+
+ boolean configureFromAlignmentView(AlignmentViewPanel view)
+
+ {
+ fr = view.cloneFeatureRenderer();
+ return true;
+ }
+
+ /**
+ * Calculates a distance measure [i][j] between each pair of sequences as the
+ * average number of features they have but do not share. That is, find the
+ * features each sequence pair has at each column, ignore feature types they
+ * have in common, and count the rest. The totals are normalised by the number
+ * of columns processed.
+ * <p>
+ * The parameters argument provides settings for treatment of gap-residue
+ * aligned positions, and whether the score is over the longer or shorter of
+ * each pair of sequences
+ *
+ * @param seqData
+ * @param params
+ */
+
+ /**
+ * Calculates distance score [i][j] between each pair of protein sequences
+ * based on their secondary structure annotations (H, E, C). That is, find the
+ * secondary structures each sequence has at each column and scores positively for
+ * each non similar secondary structure annotations. Scores 0 for similar secondary
+ * structure annotations. The final score is normalized by the number of
+ * alignment columns processed, providing an average similarity score.
+ * <p>
+ * The parameters argument can include settings for handling gap-residue aligned
+ * positions and may determine if the score calculation is based on the longer or shorter
+ * sequence in each pair. This can be important for handling partial alignments or
+ * sequences of significantly different lengths.
+ *
+ * @param seqData The aligned sequence data including secondary structure annotations.
+ * @param params Additional parameters for customizing the scoring process, such as gap
+ * handling and sequence length consideration.
+ */
+ @Override
+ public MatrixI findDistances(AlignmentView seqData,
+ SimilarityParamsI params)
+ {
+ SeqCigar[] seqs = seqData.getSequences();
+ int noseqs = seqs.length; //no of sequences
+ int cpwidth = 0; // = seqData.getWidth();
+ double[][] distances = new double[noseqs][noseqs]; //matrix to store distance score
+
+ // need to get real position for view position
+ int[] viscont = seqData.getVisibleContigs();
+
+ /*
+ * scan each column, compute and add to each distance[i, j]
+ * the number of secondary structure annotation that seqi
+ * and seqj do not share
+ */
+ for (int vc = 0; vc < viscont.length; vc += 2)
+ {
+ //Iterates for each column position
+ for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++)
+ {
+ cpwidth++; //used to normalise the distance score
+
+ /*
+ * get set of sequences without gap in the current column
+ */
+ Set<SeqCigar> seqsWithoutGapAtCol = findSeqsWithoutGapAtColumn(seqs, cpos);
+
+ /*
+ * count score for each dissimilar secondary structure annotation on i'th and j'th
+ * sequence. Igonre if similar and add this 'distance' measure to the total
+ * for [i, j] for j > i
+ */
+ for (int i = 0; i < (noseqs - 1); i++)
+ {
+ //Iterates for each sequences
+ for (int j = i + 1; j < noseqs; j++)
+ {
+ SeqCigar sc1 = seqs[i];
+ SeqCigar sc2 = seqs[j];
+ boolean gap1 = !seqsWithoutGapAtCol.contains(sc1);
+ boolean gap2 = !seqsWithoutGapAtCol.contains(sc2);
+
+ //Variable to store secondary structure at the current column
+ Set<String> secondaryStructure1 = new HashSet<String>();
+ Set<String> secondaryStructure2 = new HashSet<String>();
+
+ //secondary structure is fetched only if the current column is not
+ //gap for the sequence
+ if(!gap1) {
+ secondaryStructure1.addAll(
+ findSSAnnotationForGivenSeqAndCol(seqs[i], cpos));
+ }
+
+ if(!gap2) {
+ secondaryStructure2.addAll(
+ findSSAnnotationForGivenSeqAndCol(seqs[j], cpos));
+ }
+
+ /*
+ * gap-gap always scores zero
+ * residue-residue is always scored
+ * include gap-residue score if params say to do so
+ */
+ if ((!gap1 && !gap2) || params.includeGaps())
+ {
+ int seqDistance = SetUtils.countDisjunction(
+ secondaryStructure1, secondaryStructure2);
+ distances[i][j] += seqDistance;
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * normalise the distance scores (summed over columns) by the
+ * number of visible columns used in the calculation
+ * and fill in the bottom half of the matrix
+ */
+ // TODO JAL-2424 cpwidth may be out by 1 - affects scores but not tree shape
+ for (int i = 0; i < noseqs; i++)
+ {
+ for (int j = i + 1; j < noseqs; j++)
+ {
+ distances[i][j] /= cpwidth;
+ distances[j][i] = distances[i][j];
+ }
+ }
+ return new Matrix(distances);
+ }
+
+ /**
+ * Builds and returns a set containing sequences (SeqCigar) which do not
+ * have a gap at the given column position.
+ *
+ * @param seqs
+ * @param columnPosition
+ * (0..)
+ * @return
+ */
+ protected Set<SeqCigar> findSeqsWithoutGapAtColumn(
+ SeqCigar[] seqs, int columnPosition)
+ {
+ Set<SeqCigar> seqsWithoutGapAtCol = new HashSet<>();
+ for (SeqCigar seq : seqs)
+ {
+ int spos = seq.findPosition(columnPosition);
+ if (spos != -1)
+ {
+ /*
+ * position is not a gap
+ */
+ seqsWithoutGapAtCol.add(seq);
+ }
+ }
+ return seqsWithoutGapAtCol;
+ }
+
+ /**
+ * Finds secondary structure annotation for a given sequence (SeqCigar)
+ * and column position corresponding to the sequence.
+ *
+ * @param seq
+ * @param columnPosition
+ * (0..)
+ * @return
+ */
+ private Set<String> findSSAnnotationForGivenSeqAndCol(
+ SeqCigar seq, int columnPosition)
+ {
+ Set<String> secondaryStructure = new HashSet<String>();
+
+ char ss = '\0'; //default null character
+
+ //fetch the position in sequence for the column and finds the
+ //corresponding secondary structure annotation
+ int seqPosition = seq.findPosition(columnPosition);
+ AlignmentAnnotation[] aa = seq.getRefSeq().getAnnotation("Secondary Structure");
+ if (aa != null) {
+ Annotation a = aa[0].getAnnotationForPosition(seqPosition);
+ ss = a.secondaryStructure;
+ if (ss == ' ') {
+ ss = 'C'; // In JalView, 'C' is represented as ' '
+ }
+ if (ss != '\0') { // Check if ss is not the default null character
+ secondaryStructure.add(String.valueOf(ss));
+ }
+ }
+ return secondaryStructure;
+ }
+
+
+ @Override
+ public String getName()
+ {
+ return NAME;
+ }
+
+ @Override
+ public String getDescription()
+ {
+ return description;
+ }
+
+ @Override
+ public boolean isDNA()
+ {
+ return false;
+ }
+
+ @Override
+ public boolean isProtein()
+ {
+ return false;
+ }
+
+ @Override
+ public boolean isSecondaryStructure()
+ {
+ return true;
+ }
+
+ @Override
+ public String toString()
+ {
+ return "Score between sequences based on hamming distance between binary vectors marking features displayed at each column";
+ }
+}
\ No newline at end of file
--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.analysis.scoremodels;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+import jalview.api.analysis.ScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentAnnotation;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.AlignmentView;
+import jalview.datamodel.Annotation;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.gui.AlignFrame;
+import jalview.gui.AlignViewport;
+import jalview.gui.JvOptionPane;
+import jalview.io.DataSourceType;
+import jalview.io.FileLoader;
+import jalview.math.MatrixI;
+
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+// This class tests methods in Class SecondaryStructureDistanceModel
+public class SecondaryStructureDistanceModelTest
+{
+
+ @BeforeClass(alwaysRun = true)
+ public void setUpJvOptionPane()
+ {
+ JvOptionPane.setInteractiveMode(false);
+ JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
+ }
+
+ public static String alntestFile = "FER1_MESCR/72-76 DVYIL\nFER1_SPIOL/71-75 DVYIL\nFER3_RAPSA/21-25 DVYVL\nFER1_MAIZE/73-77 DVYIL\n";
+
+ int[] sf1 = new int[] { 74, 74, 73, 73, 23, 23, -1, -1 };
+
+ int[] sf2 = new int[] { -1, -1, 74, 75, -1, -1, 76, 77 };
+
+ int[] sf3 = new int[] { -1, -1, -1, -1, -1, -1, 76, 77 };
+
+ /**
+ * <pre>
+ * Load test alignment and add features to sequences:
+ * FER1_MESCR FER1_SPIOL FER3_RAPSA FER1_MAIZE
+ * sf1 X X X
+ * sf2 X X
+ * sf3 X
+ * </pre>
+ *
+ * @return
+ */
+ public AlignFrame getTestAlignmentFrame()
+ {
+ AlignFrame alf = new FileLoader(false)
+ .LoadFileWaitTillLoaded(alntestFile, DataSourceType.PASTE);
+ AlignmentI al = alf.getViewport().getAlignment();
+ Assert.assertEquals(al.getHeight(), 4);
+ Assert.assertEquals(al.getWidth(), 5);
+ for (int i = 0; i < 4; i++)
+ {
+ SequenceI ds = al.getSequenceAt(i).getDatasetSequence();
+ if (sf1[i * 2] > 0)
+ {
+ ds.addSequenceFeature(new SequenceFeature("sf1", "sf1", sf1[i * 2],
+ sf1[i * 2 + 1], "sf1"));
+ }
+ if (sf2[i * 2] > 0)
+ {
+ ds.addSequenceFeature(new SequenceFeature("sf2", "sf2", sf2[i * 2],
+ sf2[i * 2 + 1], "sf2"));
+ }
+ if (sf3[i * 2] > 0)
+ {
+ ds.addSequenceFeature(new SequenceFeature("sf3", "sf3", sf3[i * 2],
+ sf3[i * 2 + 1], "sf3"));
+ }
+ }
+ alf.setShowSeqFeatures(true);
+ alf.getFeatureRenderer().setVisible("sf1");
+ alf.getFeatureRenderer().setVisible("sf2");
+ alf.getFeatureRenderer().setVisible("sf3");
+ alf.getFeatureRenderer().findAllFeatures(true);
+ Assert.assertEquals(
+ alf.getFeatureRenderer().getDisplayedFeatureTypes().size(), 3,
+ "Number of feature types");
+ assertTrue(alf.getCurrentView().areFeaturesDisplayed());
+ return alf;
+ }
+
+
+
+ /**
+ * Verify computed distances of sequences with similar secondary structures
+ */
+ @Test(groups = "Functional")
+ public void testFindDistances_AllSimilar()
+ {
+ AlignFrame af = setupAlignmentView("All Similar");
+ AlignViewport viewport = af.getViewport();
+ AlignmentView view = viewport.getAlignmentView(false);
+
+ ScoreModelI sm = new SecondaryStructureDistanceModel();
+ sm = ScoreModels.getInstance().getScoreModel(sm.getName(),
+ af.alignPanel);
+
+ /*
+ * feature distance model always normalises by region width
+ */
+
+ SimilarityParamsI params = new SimilarityParams(false, true, true, true);
+ MatrixI distances = sm.findDistances(view, params);
+ assertEquals(distances.getValue(0, 0), 0d);
+ assertEquals(distances.getValue(1, 1), 0d);
+ assertEquals(distances.getValue(0, 1), 0d / 4);
+ assertEquals(distances.getValue(1, 0), 0d / 4);
+ }
+
+ /**
+ * Verify computed distances of sequences with partially similar secondary structures
+ */
+ @Test(groups = "Functional")
+ public void testFindDistances_PartiallySimilar()
+ {
+ AlignFrame af = setupAlignmentView("Partially Similar");
+ AlignViewport viewport = af.getViewport();
+ AlignmentView view = viewport.getAlignmentView(false);
+
+ ScoreModelI sm = new SecondaryStructureDistanceModel();
+ sm = ScoreModels.getInstance().getScoreModel(sm.getName(),
+ af.alignPanel);
+
+ /*
+ * score = 0 + 0 + 2 + 2 = 4/4
+ */
+ SimilarityParamsI params = new SimilarityParams(false, true, true, true);
+ MatrixI distances = sm.findDistances(view, params);
+ assertEquals(distances.getValue(0, 0), 0d);
+ assertEquals(distances.getValue(1, 1), 0d);
+ assertEquals(distances.getValue(0, 1), 1d); // should be 13d/6
+ assertEquals(distances.getValue(1, 0), 1d);
+ }
+
+ /**
+ * Verify computed distances of sequences with dissimilar secondary structures
+ */
+ @Test(groups = "Functional")
+ public void testFindDistances_notSimilar()
+ {
+ AlignFrame af = setupAlignmentView("Not Similar");
+ AlignViewport viewport = af.getViewport();
+ AlignmentView view = viewport.getAlignmentView(false);
+
+ ScoreModelI sm = new SecondaryStructureDistanceModel();
+ sm = ScoreModels.getInstance().getScoreModel(sm.getName(),
+ af.alignPanel);
+
+ /*
+ * score = 2 + 2 + 2 + 2 = 8/4
+ */
+ SimilarityParamsI params = new SimilarityParams(false, true, true, true);
+ MatrixI distances = sm.findDistances(view, params);
+ assertEquals(distances.getValue(0, 0), 0d);
+ assertEquals(distances.getValue(1, 1), 0d);
+ assertEquals(distances.getValue(0, 1), 2d);
+ assertEquals(distances.getValue(1, 0), 2d);
+ }
+
+ /**
+ * Verify computed distances of sequences with gap
+ */
+ @Test(groups = "Functional")
+ public void testFindDistances_withGap()
+ {
+ AlignFrame af = setupAlignmentViewWithGap("Not Similar");
+ AlignViewport viewport = af.getViewport();
+ AlignmentView view = viewport.getAlignmentView(false);
+
+ ScoreModelI sm = new SecondaryStructureDistanceModel();
+ sm = ScoreModels.getInstance().getScoreModel(sm.getName(),
+ af.alignPanel);
+
+ /*
+ * feature distance model always normalises by region width
+ * gap-gap is always included (but scores zero)
+ * the only variable parameter is 'includeGaps'
+ */
+
+ /*
+ * include gaps
+ * score = 0 + 0 + 1 + 0 = 1/4
+ */
+ SimilarityParamsI params = new SimilarityParams(false, true, true, true);
+ MatrixI distances = sm.findDistances(view, params);
+ assertEquals(distances.getValue(0, 0), 0d);
+ assertEquals(distances.getValue(1, 1), 0d);
+ assertEquals(distances.getValue(0, 1), 1d/4);
+ assertEquals(distances.getValue(1, 0), 1d/4);
+
+ /*
+ * exclude gaps
+ * score = 0 + 0 + 0 + 0 = 0/4
+ */
+
+ SimilarityParamsI params2 = new SimilarityParams(false, true, false, true);
+ MatrixI distances2 = sm.findDistances(view, params2);
+ assertEquals(distances2.getValue(0, 1), 0d);
+ assertEquals(distances2.getValue(1, 0), 0d);
+ }
+
+
+
+ /**
+ * <pre>
+ * Set up
+ * column 1 2 3 4
+ * seq s1 F R K S
+ *
+ * seq s2 F S J L
+ * </pre>
+ *
+ * @return
+ */
+ protected AlignFrame setupAlignmentView(String similar)
+ {
+ /*
+ * sequences without gaps
+ */
+ SequenceI s1 = new Sequence("s1", "FRKS");
+ SequenceI s2 = new Sequence("s2", "FSJL");
+
+ s1.addSequenceFeature(
+ new SequenceFeature("chain", null, 1, 4, 0f, null));
+ s1.addSequenceFeature(
+ new SequenceFeature("domain", null, 1, 4, 0f, null));
+ s2.addSequenceFeature(
+ new SequenceFeature("chain", null, 1, 4, 0f, null));
+ s2.addSequenceFeature(
+ new SequenceFeature("metal", null, 1, 4, 0f, null));
+ s2.addSequenceFeature(
+ new SequenceFeature("Pfam", null, 1, 4, 0f, null));
+
+
+ /*
+ * Set up secondary structure annotations
+ */
+ Annotation ssE = new Annotation("","",'E',0);
+ Annotation ssH = new Annotation("","",'H',0);
+ Annotation ssS = new Annotation(".","",' ',0);
+
+ Annotation[] anns1;
+ Annotation[] anns2;
+
+ /* All secondary structure annotations are similar for each column
+ * Set up
+ * column 1 2 3 4
+ * seq s1 F R K S
+ * ss E H S E
+ *
+ * seq s2 F S J L
+ * ss E H S E
+ */
+ if(similar == "All Similar") {
+
+ anns1 = new Annotation[] { ssE, ssH, ssS, ssE};
+ anns2 = new Annotation[] { ssE, ssH, ssS, ssE};
+
+ }
+
+ /* All secondary structure annotations are dissimilar for each column
+ * Set up
+ * column 1 2 3 4
+ * seq s1 F R K S
+ * ss E E S E
+ *
+ * seq s2 F S J L
+ * ss H E E S
+ */
+ else if(similar == "Not Similar") {
+
+ anns1 = new Annotation[] { ssE, ssE, ssS, ssE};
+ anns2 = new Annotation[] { ssH, ssH, ssE, ssS};
+
+ }
+
+ /* Set up
+ * column 1 2 3 4
+ * seq s1 F R K S
+ * ss H E S E
+ *
+ * seq s2 F S J L
+ * ss H E E S
+ */
+ else {
+
+ anns1 = new Annotation[] { ssH, ssE, ssS, ssE};
+ anns2 = new Annotation[] { ssH, ssE, ssE, ssS};
+ }
+
+
+ AlignmentAnnotation ann1 = new AlignmentAnnotation("Secondary Structure",
+ "Secondary Structure", anns1);
+ AlignmentAnnotation ann2 = new AlignmentAnnotation("Secondary Structure",
+ "Secondary Structure", anns2);
+
+ s1.addAlignmentAnnotation(ann1);
+ s2.addAlignmentAnnotation(ann2);
+
+ AlignmentI al = new Alignment(new SequenceI[] { s1, s2 });
+ AlignFrame af = new AlignFrame(al, 300, 300);
+ af.setShowSeqFeatures(true);
+ af.getFeatureRenderer().findAllFeatures(true);
+ return af;
+ }
+
+
+ /**
+ * <pre>
+ * Set up
+ * column 1 2 3 4
+ * seq s1 F R S
+ * SS H E S
+ *
+ * seq s2 F S J L
+ * ss H E E S
+ * </pre>
+ *
+ * @return
+ */
+ protected AlignFrame setupAlignmentViewWithGap(String similar)
+ {
+
+ SequenceI s1 = new Sequence("s1", "FR S");
+ SequenceI s2 = new Sequence("s2", "FSJL");
+
+ s1.addSequenceFeature(
+ new SequenceFeature("chain", null, 1, 3, 0f, null));
+ s1.addSequenceFeature(
+ new SequenceFeature("domain", null, 1, 3, 0f, null));
+ s2.addSequenceFeature(
+ new SequenceFeature("chain", null, 1, 4, 0f, null));
+ s2.addSequenceFeature(
+ new SequenceFeature("metal", null, 1, 4, 0f, null));
+ s2.addSequenceFeature(
+ new SequenceFeature("Pfam", null, 1, 4, 0f, null));
+
+
+ Annotation ssE = new Annotation("","",'E',0);
+ Annotation ssH = new Annotation("","",'H',0);
+ Annotation ssS = new Annotation(".","",' ',0);
+
+ Annotation[] anns1;
+ Annotation[] anns2;
+
+ anns1 = new Annotation[] { ssH, ssE, ssS};
+ anns2 = new Annotation[] { ssH, ssE, ssE, ssS};
+
+ AlignmentAnnotation ann1 = new AlignmentAnnotation("Secondary Structure",
+ "Secondary Structure", anns1);
+ AlignmentAnnotation ann2 = new AlignmentAnnotation("Secondary Structure",
+ "Secondary Structure", anns2);
+
+ s1.addAlignmentAnnotation(ann1);
+ s2.addAlignmentAnnotation(ann2);
+
+ AlignmentI al = new Alignment(new SequenceI[] { s1, s2 });
+ AlignFrame af = new AlignFrame(al, 300, 300);
+ af.setShowSeqFeatures(true);
+ af.getFeatureRenderer().findAllFeatures(true);
+ return af;
+ }
+
+}