From 32915e0cc33a6386b148db7e1f78ee4c1369fd7b Mon Sep 17 00:00:00 2001 From: Renia Correya Date: Wed, 21 Aug 2024 19:06:56 +0530 Subject: [PATCH] JAL-4386 Merge multiple leaves for ss source --- src/jalview/analysis/AlignmentUtils.java | 59 ++++--- src/jalview/analysis/TreeBuilder.java | 14 ++ .../SecondaryStructureDistanceModel.java | 179 ++++++++++++++------ src/jalview/api/analysis/ScoreModelI.java | 14 ++ src/jalview/datamodel/BinaryNode.java | 32 ++++ src/jalview/datamodel/PDBEntry.java | 3 +- src/jalview/ext/jmol/JalviewJmolBinding.java | 11 ++ src/jalview/gui/TreeCanvas.java | 50 +++++- .../structure/StructureSelectionManager.java | 56 +++--- 9 files changed, 307 insertions(+), 111 deletions(-) diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index d78bcdc..7f5d12f 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -3154,38 +3154,49 @@ public class AlignmentUtils continue; } - for (String label : Constants.SECONDARY_STRUCTURE_LABELS.keySet()) + + if (isSecondaryStructureFrom(selectedSSSource, aa)) { + ssAlignmentAnnotationForSequences + .computeIfAbsent(aa.sequenceRef.getDatasetSequence(), + k -> new ArrayList<>()) + .add(aa); + } + } - if (label.equals(aa.label)) - { + return ssAlignmentAnnotationForSequences; - if (Constants.SS_ALL_PROVIDERS.equals(selectedSSSource)) - { - ssAlignmentAnnotationForSequences - .computeIfAbsent(aa.sequenceRef.getDatasetSequence(), - k -> new ArrayList<>()) - .add(aa); - break; - } + } + + /** + * + * @param selectedSSSource + * @param aa + * @return true if aa is from a provider or all providers as specified by selectedSSSource + */ + public static boolean isSecondaryStructureFrom(String selectedSSSource, + AlignmentAnnotation aa) + { - String ssSource = AlignmentUtils - .extractSSSourceFromAnnotationDescription(aa); - if (ssSource != null && ssSource.equals(selectedSSSource)) - { + for (String label : Constants.SECONDARY_STRUCTURE_LABELS.keySet()) + { - ssAlignmentAnnotationForSequences - .computeIfAbsent(aa.sequenceRef.getDatasetSequence(), - k -> new ArrayList<>()) - .add(aa); - break; - } + if (label.equals(aa.label)) + { + + if (selectedSSSource.equals(Constants.SS_ALL_PROVIDERS)) + { + return true; + } + String ssSource = AlignmentUtils + .extractSSSourceFromAnnotationDescription(aa); + if (ssSource != null && ssSource.equals(selectedSSSource)) + { + return true; } } } - - return ssAlignmentAnnotationForSequences; - + return false; } } diff --git a/src/jalview/analysis/TreeBuilder.java b/src/jalview/analysis/TreeBuilder.java index 61f65ff..d70031a 100644 --- a/src/jalview/analysis/TreeBuilder.java +++ b/src/jalview/analysis/TreeBuilder.java @@ -30,7 +30,9 @@ import jalview.datamodel.SequenceI; import jalview.datamodel.SequenceNode; import jalview.viewmodel.AlignmentViewport; +import java.util.ArrayList; import java.util.BitSet; +import java.util.List; import java.util.Vector; public abstract class TreeBuilder extends TreeEngine @@ -40,6 +42,7 @@ public abstract class TreeBuilder extends TreeEngine public static final String NEIGHBOUR_JOINING = "NJ"; protected SequenceI[] sequences; + protected List labels; public AlignmentView seqData; @@ -121,6 +124,10 @@ public abstract class TreeBuilder extends TreeEngine */ protected void computeTree(ScoreModelI sm, SimilarityParamsI scoreOptions) { + labels = new ArrayList(); + sequences = sm.expandSeqData(sequences, seqData, scoreOptions, labels); + noseqs = sequences.length; + distances = sm.findDistances(seqData, scoreOptions); makeLeaves(); @@ -177,7 +184,14 @@ public abstract class TreeBuilder extends TreeEngine SequenceNode sn = new SequenceNode(); sn.setElement(sequences[i]); + + if (labels.size()==noseqs) + { + sn.setLabel(labels.get(i)); + } + sn.setName(sequences[i].getName()); + node.addElement(sn); BitSet bs = new BitSet(); bs.set(i); diff --git a/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java b/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java index d7859f1..c639866 100644 --- a/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java +++ b/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java @@ -32,10 +32,10 @@ import jalview.datamodel.SequenceI; import jalview.math.Matrix; import jalview.math.MatrixI; import jalview.util.Constants; -import jalview.util.MessageManager; import java.util.ArrayList; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; @@ -88,6 +88,72 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel return true; } + ArrayList ssForSeqs = null; + + @Override + public SequenceI[] expandSeqData(SequenceI[] sequences, + AlignmentView seqData, SimilarityParamsI scoreParams,List labels) + { + ssForSeqs = new ArrayList(); + List newSequences = new ArrayList(); + List newCigs = new ArrayList(); + int sq = 0; + + + + AlignmentAnnotation[] alignAnnotList = fr.getViewport().getAlignment() + .getAlignmentAnnotation(); + + String ssSource = scoreParams.getSecondaryStructureSource(); + if(ssSource == null || ssSource == "") { + ssSource = Constants.SS_ALL_PROVIDERS; + } + + /* + * Add secondary structure annotations that are added to the annotation track + * to the map + */ + Map> ssAlignmentAnnotationForSequences + = AlignmentUtils.getSequenceAssociatedAlignmentAnnotations(alignAnnotList, ssSource); + + for (SeqCigar scig : seqData.getSequences()) + { + // get the next sequence that should be bound to this scig: may be null + SequenceI alSeq = sequences[sq++]; + List ssec = ssAlignmentAnnotationForSequences.get(scig.getRefSeq()); + if (ssec == null) + { + // not defined + newSequences.add(alSeq); + if (alSeq!=null) { + labels.add("No Secondary Structure"); + } + SeqCigar newSeqCigar = scig; //new SeqCigar(scig); + newCigs.add(newSeqCigar); + ssForSeqs.add(null); + } else { + for (int i = 0; i < ssec.size(); i++) + { + if (alSeq != null) + { + labels.add(AlignmentUtils + .extractSSSourceFromAnnotationDescription(ssec.get(i))); + } + //newSequences.add(seq); + newSequences.add(alSeq); + SeqCigar newSeqCigar = scig; // new SeqCigar(scig); + newCigs.add(newSeqCigar); + ssForSeqs.add(ssec.get(i)); + } + } + } + + seqData.setSequences(newCigs.toArray(new SeqCigar[0])); + return newSequences.toArray(new SequenceI[0]); + + } + + /** * Calculates distance score [i][j] between each pair of protein sequences * based on their secondary structure annotations (H, E, C). @@ -106,8 +172,14 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel @Override public MatrixI findDistances(AlignmentView seqData, SimilarityParamsI params) - { - + { + if (ssForSeqs==null || ssForSeqs.size()!=seqData.getSequences().length) + { + // expandSeqData needs to be called to initialise the hash + SequenceI[] sequences = new SequenceI[seqData.getSequences().length]; + // we throw away the new labels in this case.. + expandSeqData(sequences, seqData, params, new ArrayList()); + } SeqCigar[] seqs = seqData.getSequences(); int noseqs = seqs.length; //no of sequences int cpwidth = 0; @@ -122,17 +194,7 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel // need to get real position for view position int[] viscont = seqData.getVisibleContigs(); - - AlignmentAnnotation[] alignAnnotList = fr.getViewport().getAlignment() - .getAlignmentAnnotation(); - - - /* - * Add secondary structure annotations that are added to the annotation track - * to the map - */ - Map> ssAlignmentAnnotationForSequences - = AlignmentUtils.getSequenceAssociatedAlignmentAnnotations(alignAnnotList, ssSource); + /* * scan each column, compute and add to each similarity[i, j] @@ -158,59 +220,64 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel */ for (int i = 0; i < (noseqs - 1); i++) { - //Iterates for each sequences + AlignmentAnnotation aa_i = ssForSeqs.get(i); + boolean undefinedSS1 = aa_i == null; + // check if the sequence contains gap in the current column + boolean gap1 = !seqsWithoutGapAtCol.contains(seqs[i]); + // secondary structure is fetched only if the current column is not + // gap for the sequence + char ss1 = '*'; + if (!gap1 && !undefinedSS1) + { + // fetch the position in sequence for the column and finds the + // corresponding secondary structure annotation + // TO DO - consider based on priority and displayed + int seqPosition_i = seqs[i].findPosition(cpos); + if (aa_i != null) + ss1 = AlignmentUtils.findSSAnnotationForGivenSeqposition(aa_i, + seqPosition_i); + } + // Iterates for each sequences for (int j = i + 1; j < noseqs; j++) { - - //check if ss is defined - boolean undefinedSS1 = ssAlignmentAnnotationForSequences.get(seqs[i].getRefSeq()) == null; - boolean undefinedSS2 = ssAlignmentAnnotationForSequences.get(seqs[j].getRefSeq()) == null; + + // check if ss is defined + AlignmentAnnotation aa_j = ssForSeqs.get(j); + boolean undefinedSS2 = aa_j == null; // Set similarity to max score if both SS are not defined - if (undefinedSS1 && undefinedSS2) { - similarities[i][j] += ssRateMatrix.getMaximumScore(); - continue; - } - + if (undefinedSS1 && undefinedSS2) + { + similarities[i][j] += ssRateMatrix.getMaximumScore(); + continue; + } + // Set similarity to minimum score if either one SS is not defined - else if(undefinedSS1 || undefinedSS2) { - similarities[i][j] += ssRateMatrix.getMinimumScore(); - continue; + else if (undefinedSS1 || undefinedSS2) + { + similarities[i][j] += ssRateMatrix.getMinimumScore(); + continue; } - - //check if the sequence contains gap in the current column - boolean gap1 = !seqsWithoutGapAtCol.contains(seqs[i]); - boolean gap2 = !seqsWithoutGapAtCol.contains(seqs[j]); - - //Variable to store secondary structure at the current column - char ss1 = '*'; + + boolean gap2 = !seqsWithoutGapAtCol.contains(seqs[j]); + + // Variable to store secondary structure at the current column char ss2 = '*'; - - //secondary structure is fetched only if the current column is not - //gap for the sequence - if(!gap1 && !undefinedSS1) { - //fetch the position in sequence for the column and finds the - //corresponding secondary structure annotation - //TO DO - consider based on priority and displayed - int seqPosition = seqs[i].findPosition(cpos); - AlignmentAnnotation aa = ssAlignmentAnnotationForSequences.get(seqs[i].getRefSeq()).get(0); - if(aa!=null) - ss1 = - AlignmentUtils.findSSAnnotationForGivenSeqposition(aa, seqPosition); - } - - if(!gap2 && !undefinedSS2) { + + if (!gap2 && !undefinedSS2) + { int seqPosition = seqs[j].findPosition(cpos); - AlignmentAnnotation aa = ssAlignmentAnnotationForSequences.get(seqs[j].getRefSeq()).get(0); - if(aa!=null) - ss2 = - AlignmentUtils.findSSAnnotationForGivenSeqposition(aa, seqPosition); - } + + if (aa_j != null) + ss2 = AlignmentUtils.findSSAnnotationForGivenSeqposition( + aa_j, seqPosition); + } if ((!gap1 && !gap2) || params.includeGaps()) { // Calculate similarity score based on the substitution matrix - double similarityScore = ssRateMatrix.getPairwiseScore(ss1, ss2); + double similarityScore = ssRateMatrix.getPairwiseScore(ss1, + ss2); similarities[i][j] += similarityScore; } } @@ -233,7 +300,7 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel similarities[j][i] = similarities[i][j]; } } - return ssRateMatrix.similarityToDistance(new Matrix(similarities)); + return SimilarityScoreModel.similarityToDistance(new Matrix(similarities)); } diff --git a/src/jalview/api/analysis/ScoreModelI.java b/src/jalview/api/analysis/ScoreModelI.java index a243c0c..e45f6d7 100644 --- a/src/jalview/api/analysis/ScoreModelI.java +++ b/src/jalview/api/analysis/ScoreModelI.java @@ -20,8 +20,11 @@ */ package jalview.api.analysis; +import java.util.List; + import jalview.api.AlignmentViewPanel; import jalview.datamodel.AlignmentView; +import jalview.datamodel.SequenceI; import jalview.math.MatrixI; public interface ScoreModelI @@ -112,4 +115,15 @@ public interface ScoreModelI * @return */ ScoreModelI getInstance(AlignmentViewPanel avp); + + /** + * Score models may create multiple leaves for a single sequence - implement this method if you do + * @param sequences - sequences to be filtered/expanded set of leaves + * @param seqData - origin + * @param labels - strings to show instead of the SequenceI.getName() for each element of sequences attached to leaves + * @return filtered/expanded set of leaves to be analysed + */ + default SequenceI[] expandSeqData(SequenceI[] sequences, AlignmentView seqData, SimilarityParamsI scoreParams, List labels) { + return sequences; + }; } diff --git a/src/jalview/datamodel/BinaryNode.java b/src/jalview/datamodel/BinaryNode.java index 624c2b9..2b5894a 100755 --- a/src/jalview/datamodel/BinaryNode.java +++ b/src/jalview/datamodel/BinaryNode.java @@ -34,6 +34,8 @@ public class BinaryNode String name; + String label=null; + BinaryNode left; BinaryNode right; @@ -364,6 +366,23 @@ public class BinaryNode return oldstate; } + /** + * check if there's a label to show + * @return true if non-empty/null string + */ + public boolean hasLabel() + { + return label!=null && !label.isEmpty(); + } + public String getLabel() + { + return label; + } + + public void setLabel(String label) + { + this.label = label; + } /** * ascends the tree but doesn't stop until a non-dummy node is discovered. @@ -380,4 +399,17 @@ public class BinaryNode return c; } + + public String getDisplayName() + { + if (name!=null && !name.isBlank()) + { + + if (hasLabel()) { + return getName()+"|"+label; + } + return name; + } + return hasLabel() ? label:""; + } } diff --git a/src/jalview/datamodel/PDBEntry.java b/src/jalview/datamodel/PDBEntry.java index c3906f9..fb9e96a 100755 --- a/src/jalview/datamodel/PDBEntry.java +++ b/src/jalview/datamodel/PDBEntry.java @@ -515,7 +515,8 @@ public class PDBEntry { if (_hasProperty(AUTHORITATIVE_ID)) { - return ((Boolean) getProperty(AUTHORITATIVE_ID)); + Object authId = getProperty(AUTHORITATIVE_ID); + return (authId instanceof Boolean) ? (Boolean) authId : Boolean.valueOf(authId.toString()); } return false; } diff --git a/src/jalview/ext/jmol/JalviewJmolBinding.java b/src/jalview/ext/jmol/JalviewJmolBinding.java index 93a5633..1c19854 100644 --- a/src/jalview/ext/jmol/JalviewJmolBinding.java +++ b/src/jalview/ext/jmol/JalviewJmolBinding.java @@ -785,6 +785,17 @@ public abstract class JalviewJmolBinding extends AAStructureBindingModel } else { + PDBEntry ppe=getPdbEntry(pe); + if (ppe==null) { + Console.warn("Please report under JAL-4440: Unexpected null entry for PDBEntry for a structure (for structure "+fileName+")"); + + continue; + } + if (ppe.getFile()==null) + { + Console.warn("Please report under JAL-4440: Unexpected null entry for file that we just tried to load into Jmol: pdbEntry: "+ppe.toString()+"(for structure "+fileName+")"); + continue; + } File fl = new File(getPdbEntry(pe).getFile()); matches = fl.equals(new File(fileName)); if (matches) diff --git a/src/jalview/gui/TreeCanvas.java b/src/jalview/gui/TreeCanvas.java index ba2417e..588a4f9 100755 --- a/src/jalview/gui/TreeCanvas.java +++ b/src/jalview/gui/TreeCanvas.java @@ -40,6 +40,7 @@ import java.util.ArrayList; import java.util.BitSet; import java.util.HashMap; import java.util.Hashtable; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -116,7 +117,10 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, int labelLength = -1; - Map nameHash = new Hashtable<>(); + /** + * TODO - these rectangle-hash lookups should be optimised for big trees... + */ + Map nameHash = new Hashtable<>(); Map nodeHash = new Hashtable<>(); @@ -240,11 +244,11 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, has_placeholders = true; } - if (longestName.length() < ((Sequence) lf.element()).getName() + if (longestName.length() < lf.getDisplayName() .length()) { longestName = TreeCanvas.PLACEHOLDER - + ((Sequence) lf.element()).getName(); + + lf.getDisplayName(); } if (tp.isColumnWise() && cm != null) { @@ -345,8 +349,8 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, String name = (markPlaceholders && ((node instanceof SequenceNode && ((SequenceNode) node).isPlaceholder()))) - ? (PLACEHOLDER + node.getName()) - : node.getName(); + ? (PLACEHOLDER + node.getDisplayName()) + : node.getDisplayName(); int charWidth = fm.stringWidth(name) + 3; int charHeight = font.getSize(); @@ -354,7 +358,7 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, Rectangle rect = new Rectangle(xend + 10, ypos - charHeight / 2, charWidth, charHeight); - nameHash.put(node.element(), rect); + nameHash.put(node, rect); // Colour selected leaves differently boolean isSelected = false; @@ -437,6 +441,10 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, nodeLabel = nodeLabel + String.valueOf(node.bootstrap); } + if (node.hasLabel()) + { + nodeLabel = node.getLabel()+" "+nodeLabel; + } if (!nodeLabel.equals("")) { @@ -457,14 +465,14 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, */ public Object findElement(int x, int y) { - for (Entry entry : nameHash.entrySet()) + for (Entry entry : nameHash.entrySet()) { Rectangle rect = entry.getValue(); if ((x >= rect.x) && (x <= (rect.x + rect.width)) && (y >= rect.y) && (y <= (rect.y + rect.height))) { - return entry.getKey(); + return entry.getKey().element(); } } @@ -1066,9 +1074,10 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, setColor(groups.get(i), col.brighter()); Vector l = tree.findLeaves(groups.get(i)); + gatherLabelsTo(groups.get(i),l); if (!tp.isColumnWise()) { - createSeqGroupFor(aps, l, col); + createSeqGroupFor(aps, l, col); } else { @@ -1111,6 +1120,29 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, } } + private void gatherLabelsTo(BinaryNode binaryNode, Vector l) + { + LinkedHashSet labelsForNode = new LinkedHashSet(); + for (BinaryNode leaf:l) + { + if (leaf.hasLabel()) + { + labelsForNode.add(leaf.getLabel()); + } + } + StringBuilder sb = new StringBuilder(); + boolean first=true; + for (String label:labelsForNode) + { + if (!first) { + sb.append(" | "); + } + first=false; + sb.append(label); + } + binaryNode.setLabel(sb.toString()); + } + private int parseColumnNode(BinaryNode bn) throws NumberFormatException { return Integer.parseInt( diff --git a/src/jalview/structure/StructureSelectionManager.java b/src/jalview/structure/StructureSelectionManager.java index 7248e47..73c4c0f 100644 --- a/src/jalview/structure/StructureSelectionManager.java +++ b/src/jalview/structure/StructureSelectionManager.java @@ -708,29 +708,43 @@ public class StructureSelectionManager ssAnnotDescriptionInPDB = ssAnnotationsInPDB[0].description; } - //Match the PDB entry using file path in the pdb data model and get the provider - if(seq.getDatasetSequence() != null) { - Vector pdbEntries = seq.getDatasetSequence().getAllPDBEntries(); - if(pdbEntries != null) { - for(PDBEntry pdbEntry : pdbEntries) { - if(pdbEntry.getFile()!=null) { - if(ssStructFilePathNameInPDB.startsWith(pdbEntry.getFile())) { - provider = pdbEntry.getProvider(); - break; - } + // Match the PDB entry using file path in the pdb data model and get the + // provider + if (ssStructFilePathNameInPDB != null + && seq.getDatasetSequence() != null) + { + Vector pdbEntries = seq.getDatasetSequence() + .getAllPDBEntries(); + if (pdbEntries != null) + { + for (PDBEntry pdbEntry : pdbEntries) + { + if (pdbEntry.getFile() != null && ssStructFilePathNameInPDB + .startsWith(pdbEntry.getFile())) + { + provider = pdbEntry.getProvider(); + break; } } - - - //Add provider value as property to the ss annotation - if(provider != null) { - AlignmentAnnotation[] ssAnnotList = ds.getAnnotation(Constants.SS_ANNOTATION_LABEL); - if(ssAnnotList != null) { - for(AlignmentAnnotation ssAnnot : ssAnnotList) { - //Match the annotation description with the annotation in pdb data object - if(ssAnnot.getProperty(Constants.SS_PROVIDER_PROPERTY) == null - && ssAnnot.description.equals(ssAnnotDescriptionInPDB)) { - ssAnnot.setProperty(Constants.SS_PROVIDER_PROPERTY, provider); + // Add provider value as property to the ss annotation + if (provider != null) + { + // TODO - JAL-2880 JAL-4441 this should be applied to all structure derived annotations, not just secondary structure! + AlignmentAnnotation[] ssAnnotList = ds + .getAnnotation(Constants.SS_ANNOTATION_LABEL); + if (ssAnnotList != null) + { + for (AlignmentAnnotation ssAnnot : ssAnnotList) + { + // Match the annotation description with the annotation in pdb + // data object + if (ssAnnot + .getProperty(Constants.SS_PROVIDER_PROPERTY) == null + && ssAnnot.description + .equals(ssAnnotDescriptionInPDB)) + { + ssAnnot.setProperty(Constants.SS_PROVIDER_PROPERTY, + provider); } } } -- 1.7.10.2