From 4708e769a97588501d41e43df8f756c176b9421d Mon Sep 17 00:00:00 2001 From: Renia Correya Date: Tue, 30 Apr 2024 15:34:18 +0100 Subject: [PATCH] JAL-4386 Comparing secondary structure similarity directly with a basic substitution matrix. Some code refactoring. --- src/jalview/analysis/AlignmentUtils.java | 66 +++++++++++++++++++ .../analysis/scoremodels/DistanceScoreModel.java | 1 + .../analysis/scoremodels/FeatureDistanceModel.java | 9 +-- src/jalview/analysis/scoremodels/PIDModel.java | 6 -- src/jalview/analysis/scoremodels/ScoreMatrix.java | 6 -- .../SecondaryStructureDistanceModel.java | 68 ++++++++++++++------ .../analysis/scoremodels/SmithWatermanModel.java | 6 -- src/jalview/api/analysis/ScoreModelI.java | 6 +- src/jalview/gui/CalculationChooser.java | 62 +++++++----------- .../SecondaryStructureDistanceModelTest.java | 9 +++ 10 files changed, 157 insertions(+), 82 deletions(-) diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index be5133f..d88950c 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -37,6 +37,7 @@ import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; +import jalview.api.AlignCalcWorkerI; import jalview.bin.Console; import jalview.commands.RemoveGapColCommand; import jalview.datamodel.AlignedCodon; @@ -55,6 +56,7 @@ import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; import jalview.datamodel.features.SequenceFeatures; +import jalview.gui.AlignmentPanel; import jalview.io.gff.SequenceOntologyI; import jalview.schemes.ResidueProperties; import jalview.util.Comparison; @@ -62,6 +64,7 @@ import jalview.util.DBRefUtils; import jalview.util.IntRangeComparator; import jalview.util.MapList; import jalview.util.MappingUtils; +import jalview.workers.SecondaryStructureConsensusThread; /** * grab bag of useful alignment manipulation operations Expect these to be @@ -76,6 +79,17 @@ public class AlignmentUtils private static final String SEQUENCE_VARIANT = "sequence_variant:"; + + private static final Map SECONDARY_STRUCTURE_LABELS = new HashMap<>(); + + static { + SECONDARY_STRUCTURE_LABELS.put("Secondary Structure", "3D Structures"); + SECONDARY_STRUCTURE_LABELS.put("jnetpred", "JPred"); + // Add other secondary structure labels here if needed + } + + private static final String SS_ANNOTATION_LABEL = "Secondary Structure"; + /* * the 'id' attribute is provided for variant features fetched from * Ensembl using its REST service with JSON format @@ -1532,6 +1546,21 @@ public class AlignmentUtils } } } + + + public static boolean isSSAnnotationPresent( Map> annotations) { + + for (SequenceI seq : annotations.keySet()) + { + for (AlignmentAnnotation ann : annotations.get(seq)) + { + if(ann.getDescription(false).startsWith(SS_ANNOTATION_LABEL)) { + return true; + } + } + } + return false; + } /** * Make a copy of a reference annotation {@code ann} and add it to an @@ -2830,4 +2859,41 @@ public class AlignmentUtils } return true; } + + + public static List getSecondaryStructureSources(AlignmentAnnotation[] annotations) { + + List ssSources = new ArrayList<>(); + Set addedLabels = new HashSet<>(); // to keep track of added labels + + for (AlignmentAnnotation annotation : annotations) { + String label = annotation.label; + if (SECONDARY_STRUCTURE_LABELS.containsKey(label) && !addedLabels.contains(label)) { + ssSources.add(SECONDARY_STRUCTURE_LABELS.get(label)); + addedLabels.add(label); // Add the label to the set + } + } + + return ssSources; + } + + public static boolean isSecondaryStructurePresent(AlignmentAnnotation[] annotations) + { + boolean ssPresent = false; + + for (AlignmentAnnotation aa : annotations) + { + if(ssPresent) { + break; + } + + if (SECONDARY_STRUCTURE_LABELS.containsKey(aa.label)) { + ssPresent = true; + break; + } + } + + return ssPresent; + + } } diff --git a/src/jalview/analysis/scoremodels/DistanceScoreModel.java b/src/jalview/analysis/scoremodels/DistanceScoreModel.java index 3521757..55510bd 100644 --- a/src/jalview/analysis/scoremodels/DistanceScoreModel.java +++ b/src/jalview/analysis/scoremodels/DistanceScoreModel.java @@ -57,4 +57,5 @@ public abstract class DistanceScoreModel implements ScoreModelI return similarities; } + } diff --git a/src/jalview/analysis/scoremodels/FeatureDistanceModel.java b/src/jalview/analysis/scoremodels/FeatureDistanceModel.java index 604893c..df45a72 100644 --- a/src/jalview/analysis/scoremodels/FeatureDistanceModel.java +++ b/src/jalview/analysis/scoremodels/FeatureDistanceModel.java @@ -235,14 +235,7 @@ public class FeatureDistanceModel extends DistanceScoreModel public boolean isProtein() { return true; - } - - @Override - public boolean isSecondaryStructure() - { - return false; - } - + } @Override public String toString() diff --git a/src/jalview/analysis/scoremodels/PIDModel.java b/src/jalview/analysis/scoremodels/PIDModel.java index 267ef89..ddfe5e4 100644 --- a/src/jalview/analysis/scoremodels/PIDModel.java +++ b/src/jalview/analysis/scoremodels/PIDModel.java @@ -73,12 +73,6 @@ public class PIDModel extends SimilarityScoreModel { return true; } - - @Override - public boolean isSecondaryStructure() - { - return false; - } /** * Answers 1 if c and d are the same residue (ignoring case), and not gap diff --git a/src/jalview/analysis/scoremodels/ScoreMatrix.java b/src/jalview/analysis/scoremodels/ScoreMatrix.java index a1ea35a..aa841ac 100644 --- a/src/jalview/analysis/scoremodels/ScoreMatrix.java +++ b/src/jalview/analysis/scoremodels/ScoreMatrix.java @@ -280,12 +280,6 @@ public class ScoreMatrix extends SimilarityScoreModel { return peptide; } - - @Override - public boolean isSecondaryStructure() - { - return false; - } /** * Returns a copy of the score matrix as used in getPairwiseScore. If using diff --git a/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java b/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java index cd09805..1dcf297 100644 --- a/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java +++ b/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java @@ -151,6 +151,7 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel int noseqs = seqs.length; //no of sequences int cpwidth = 0; // = seqData.getWidth(); double[][] distances = new double[noseqs][noseqs]; //matrix to store distance score + double[][] substitutionMatrix = getSubstitutionMatrix(); //secondary structure source parameter selected by the user from the drop down. String ssSource = params.getSecondaryStructureSource(); @@ -246,30 +247,30 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel boolean gap2 = !seqsWithoutGapAtCol.contains(sc2); //Variable to store secondary structure at the current column - Set secondaryStructure1 = new HashSet(); - Set secondaryStructure2 = new HashSet(); + char ss1 = 'G', ss2 = 'G'; //secondary structure is fetched only if the current column is not //gap for the sequence if(!gap1 && !undefinedSS1) { - secondaryStructure1.addAll( - findSSAnnotationForGivenSeqAndCol(seqs[i], cpos)); + ss1 = + findSSAnnotationForGivenSeqAndCol(seqs[i], cpos); } if(!gap2 && !undefinedSS2) { - secondaryStructure2.addAll( - findSSAnnotationForGivenSeqAndCol(seqs[j], cpos)); + ss2 = + findSSAnnotationForGivenSeqAndCol(seqs[j], cpos); } /* - * gap-gap always scores zero - * ss-ss is always scored - * include gap-ss scores 1 if params say to do so + * gap-gap scores zero + * similar ss-ss scores zero + * different ss-ss scores 1 + * gap-ss scores 1 if params say to do so */ if ((!gap1 && !gap2) || params.includeGaps()) { - int seqDistance = SetUtils.countDisjunction( - secondaryStructure1, secondaryStructure2); + // Calculate distance score based on the substitution matrix + double seqDistance = substitutionMatrix[getSubstitutionMatrixIndex(ss1)][getSubstitutionMatrixIndex(ss2)]; distances[i][j] += seqDistance; } } @@ -389,12 +390,10 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel * (0..) * @return */ - private Set findSSAnnotationForGivenSeqAndCol( + private char findSSAnnotationForGivenSeqAndCol( SeqCigar seq, int columnPosition) - { - Set secondaryStructure = new HashSet(); - - char ss; + { + char ss = 'G'; //fetch the position in sequence for the column and finds the //corresponding secondary structure annotation @@ -419,12 +418,45 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel else { ss = COIL; } - secondaryStructure.add(String.valueOf(ss)); + } - return secondaryStructure; + return ss; + } + + /** + * Retrieve the substitution matrix. + * + * @return The substitution matrix. + */ + private double[][] getSubstitutionMatrix() { + // Defining the substitution matrix + // This matrix map distance scores between secondary structure symbols + + return new double[][]{ + // C E H G + {0.0, 1.0, 1.0, 1.0}, // C - COIL + {1.0, 0.0, 1.0, 1.0}, // E - SHEET + {1.0, 1.0, 0.0, 1.0}, // H - HELIX + {1.0, 1.0, 1.0, 0.0} // G - GAP + + }; } + private int getSubstitutionMatrixIndex(char ss) { + switch (ss) { + case 'C': + return 0; + case 'E': + return 1; + case 'H': + return 2; + case 'G': + return 3; + default: + throw new IllegalArgumentException("Invalid secondary structure character: " + ss); + } + } @Override public String getName() diff --git a/src/jalview/analysis/scoremodels/SmithWatermanModel.java b/src/jalview/analysis/scoremodels/SmithWatermanModel.java index 926781a..ca6d279 100644 --- a/src/jalview/analysis/scoremodels/SmithWatermanModel.java +++ b/src/jalview/analysis/scoremodels/SmithWatermanModel.java @@ -96,12 +96,6 @@ public class SmithWatermanModel extends SimilarityScoreModel { return true; } - - @Override - public boolean isSecondaryStructure() - { - return false; - } @Override public String getDescription() diff --git a/src/jalview/api/analysis/ScoreModelI.java b/src/jalview/api/analysis/ScoreModelI.java index 97920e5..a243c0c 100644 --- a/src/jalview/api/analysis/ScoreModelI.java +++ b/src/jalview/api/analysis/ScoreModelI.java @@ -63,9 +63,13 @@ public interface ScoreModelI // TODO getName, isDNA, isProtein can be static methods in Java 8 - boolean isSecondaryStructure(); + default public boolean isSecondaryStructure() + { + return false; + } /** + * Answers false by default * Answers true if the data has secondary structure (so should be * shown in menus in that context) * diff --git a/src/jalview/gui/CalculationChooser.java b/src/jalview/gui/CalculationChooser.java index d126cc3..7747a6b 100644 --- a/src/jalview/gui/CalculationChooser.java +++ b/src/jalview/gui/CalculationChooser.java @@ -52,6 +52,7 @@ import javax.swing.JRadioButton; import javax.swing.event.InternalFrameAdapter; import javax.swing.event.InternalFrameEvent; +import jalview.analysis.AlignmentUtils; import jalview.analysis.TreeBuilder; import jalview.analysis.scoremodels.ScoreModels; import jalview.analysis.scoremodels.SimilarityParams; @@ -81,9 +82,20 @@ public class CalculationChooser extends JPanel private static final int MIN_PCA_SELECTION = 4; - private static final String SS_ANNOTATION_LABEL = "Secondary Structure"; + private String secondaryStructureModelName; - private static final String SS_ANNOTATION_FROM_JPRED_LABEL = "jnetpred"; + private void getSecondaryStructureModelName() { + + ScoreModels scoreModels = ScoreModels.getInstance(); + for (ScoreModelI sm : scoreModels.getModels()) + { + if (sm.isSecondaryStructure()) + { + secondaryStructureModelName = sm.getName(); + } + } + + } AlignFrame af; @@ -128,6 +140,7 @@ public class CalculationChooser extends JPanel this.af = alignFrame; init(); af.alignPanel.setCalculationDialog(this); + } /** @@ -135,6 +148,7 @@ public class CalculationChooser extends JPanel */ void init() { + getSecondaryStructureModelName(); setLayout(new BorderLayout()); frame = new JInternalFrame(); frame.setFrameIcon(null); @@ -225,12 +239,14 @@ public class CalculationChooser extends JPanel * score models drop-down - with added tooltips! */ modelNames = buildModelOptionsList(); + // Step 3: Show or Hide Dropdown Based on Selection modelNames.addActionListener(new ActionListener() { @Override public void actionPerformed(ActionEvent e) { String selectedModel = modelNames.getSelectedItem().toString(); - if (selectedModel.equals("Secondary Structure Similarity")) { + + if (selectedModel.equals(secondaryStructureModelName)) { ssSourceDropdown.setVisible(true); } else { ssSourceDropdown.setVisible(false); @@ -238,6 +254,7 @@ public class CalculationChooser extends JPanel } }); + JPanel scoreModelPanel = new JPanel(new FlowLayout(FlowLayout.CENTER)); scoreModelPanel.setOpaque(false); scoreModelPanel.add(modelNames); @@ -486,24 +503,9 @@ public class CalculationChooser extends JPanel * select the score models applicable to the alignment type */ boolean nucleotide = af.getViewport().getAlignment().isNucleotide(); - AlignmentAnnotation[] alignmentAnnotation = af.getViewport().getAlignment().getAlignmentAnnotation(); - - boolean ssPresent = false; + AlignmentAnnotation[] alignmentAnnotations = af.getViewport().getAlignment().getAlignmentAnnotation(); - for (AlignmentAnnotation aa : alignmentAnnotation) - - { - if(ssPresent) { - break; - } - - if (aa.label.equals("Secondary Structure") || aa.label.equals("jnetpred")) - - { - ssPresent = true; - break; - } - } + boolean ssPresent = AlignmentUtils.isSecondaryStructurePresent(alignmentAnnotations); List models = getApplicableScoreModels(nucleotide, pca.isSelected(), ssPresent); @@ -541,6 +543,7 @@ public class CalculationChooser extends JPanel } // finally, update the model comboBox.setModel(model); + } /** @@ -588,24 +591,9 @@ public class CalculationChooser extends JPanel protected List getApplicableSecondaryStructureSources() { - List ssSources = new ArrayList<>(); - AlignmentAnnotation[] annotations = af.getViewport().getAlignment().getAlignmentAnnotation(); - boolean has3DStructure = false, hasJPred = false; - if(annotations.length > 0) { - - for (AlignmentAnnotation annotation : annotations) { - has3DStructure |= SS_ANNOTATION_LABEL.equals(annotation.label); - hasJPred |= SS_ANNOTATION_FROM_JPRED_LABEL.equals(annotation.label); - - if (has3DStructure && hasJPred) - break; - } - } - if(has3DStructure) - ssSources.add("3D Structures"); - if(hasJPred) - ssSources.add("JPred"); + + List ssSources = AlignmentUtils.getSecondaryStructureSources(annotations); return ssSources; } diff --git a/test/jalview/analysis/scoremodels/SecondaryStructureDistanceModelTest.java b/test/jalview/analysis/scoremodels/SecondaryStructureDistanceModelTest.java index 772f4b0..68c740c 100644 --- a/test/jalview/analysis/scoremodels/SecondaryStructureDistanceModelTest.java +++ b/test/jalview/analysis/scoremodels/SecondaryStructureDistanceModelTest.java @@ -133,6 +133,7 @@ public class SecondaryStructureDistanceModelTest */ SimilarityParamsI params = new SimilarityParams(false, true, true, true); + params.setSecondaryStructureSource("3D Structures"); MatrixI distances = sm.findDistances(view, params); assertEquals(distances.getValue(0, 0), 0d); assertEquals(distances.getValue(1, 1), 0d); @@ -158,6 +159,7 @@ public class SecondaryStructureDistanceModelTest * score = 0 + 0 + 2 + 2 = 4/4 */ SimilarityParamsI params = new SimilarityParams(false, true, true, true); + params.setSecondaryStructureSource("3D Structures"); MatrixI distances = sm.findDistances(view, params); assertEquals(distances.getValue(0, 0), 0d); assertEquals(distances.getValue(1, 1), 0d); @@ -183,6 +185,7 @@ public class SecondaryStructureDistanceModelTest * score = 2 + 2 + 2 + 2 = 8/4 */ SimilarityParamsI params = new SimilarityParams(false, true, true, true); + params.setSecondaryStructureSource("3D Structures"); MatrixI distances = sm.findDistances(view, params); assertEquals(distances.getValue(0, 0), 0d); assertEquals(distances.getValue(1, 1), 0d); @@ -209,6 +212,7 @@ public class SecondaryStructureDistanceModelTest * score = 2 + 2 + 2 + 2 = 8/4 */ SimilarityParamsI params = new SimilarityParams(false, true, true, true); + params.setSecondaryStructureSource("3D Structures"); MatrixI distances = sm.findDistances(view, params); assertEquals(distances.getValue(0, 0), 0d); assertEquals(distances.getValue(1, 1), 0d); @@ -241,6 +245,7 @@ public class SecondaryStructureDistanceModelTest * score = 0 + 0 + 1 + 0 = 1/4 */ SimilarityParamsI params = new SimilarityParams(false, true, true, true); + params.setSecondaryStructureSource("3D Structures"); MatrixI distances = sm.findDistances(view, params); assertEquals(distances.getValue(0, 0), 0d); assertEquals(distances.getValue(1, 1), 0d); @@ -284,6 +289,7 @@ public class SecondaryStructureDistanceModelTest * score = 0 + 0 + 2 + 2 = 2/4 */ SimilarityParamsI params = new SimilarityParams(false, true, true, true); + params.setSecondaryStructureSource("3D Structures"); MatrixI distances = sm.findDistances(view, params); assertEquals(distances.getValue(0, 0), 0d); assertEquals(distances.getValue(1, 1), 0d); @@ -296,6 +302,7 @@ public class SecondaryStructureDistanceModelTest */ SimilarityParamsI params2 = new SimilarityParams(false, true, false, true); + params2.setSecondaryStructureSource("3D Structures"); MatrixI distances2 = sm.findDistances(view, params2); assertEquals(distances2.getValue(0, 1), 2d); assertEquals(distances2.getValue(1, 0), 2d); @@ -327,6 +334,7 @@ public class SecondaryStructureDistanceModelTest * score = 0 + 0 + 2 + 2 = 2/4 */ SimilarityParamsI params = new SimilarityParams(false, true, true, true); + params.setSecondaryStructureSource("3D Structures"); MatrixI distances = sm.findDistances(view, params); assertEquals(distances.getValue(0, 0), 0d); assertEquals(distances.getValue(1, 1), 0d); @@ -339,6 +347,7 @@ public class SecondaryStructureDistanceModelTest */ SimilarityParamsI params2 = new SimilarityParams(false, true, false, true); + params2.setSecondaryStructureSource("3D Structures"); MatrixI distances2 = sm.findDistances(view, params2); assertEquals(distances2.getValue(0, 1), 0d); assertEquals(distances2.getValue(1, 0), 0d); -- 1.7.10.2