JAL-4386 Merge multiple leaves for ss source
authorRenia Correya <rcorreya001@dundee.ac.uk>
Wed, 21 Aug 2024 13:36:56 +0000 (19:06 +0530)
committerRenia Correya <rcorreya001@dundee.ac.uk>
Wed, 21 Aug 2024 13:36:56 +0000 (19:06 +0530)
src/jalview/analysis/AlignmentUtils.java
src/jalview/analysis/TreeBuilder.java
src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java
src/jalview/api/analysis/ScoreModelI.java
src/jalview/datamodel/BinaryNode.java
src/jalview/datamodel/PDBEntry.java
src/jalview/ext/jmol/JalviewJmolBinding.java
src/jalview/gui/TreeCanvas.java
src/jalview/structure/StructureSelectionManager.java

index d78bcdc..7f5d12f 100644 (file)
@@ -3154,38 +3154,49 @@ public class AlignmentUtils
         continue;
       }
       
-      for (String label : Constants.SECONDARY_STRUCTURE_LABELS.keySet())
+
+      if (isSecondaryStructureFrom(selectedSSSource, aa))
       {
+        ssAlignmentAnnotationForSequences
+                .computeIfAbsent(aa.sequenceRef.getDatasetSequence(),
+                        k -> new ArrayList<>())
+                .add(aa);
+      }
+    }
 
-        if (label.equals(aa.label))
-        {
+    return ssAlignmentAnnotationForSequences;
 
-          if (Constants.SS_ALL_PROVIDERS.equals(selectedSSSource))
-          {
-            ssAlignmentAnnotationForSequences
-                    .computeIfAbsent(aa.sequenceRef.getDatasetSequence(),
-                            k -> new ArrayList<>())
-                    .add(aa);
-            break;
-          }
+  }
+  
+  /**
+   * 
+   * @param selectedSSSource
+   * @param aa
+   * @return true if aa is from a provider or all providers as specified by selectedSSSource
+   */
+  public static boolean isSecondaryStructureFrom(String selectedSSSource,
+          AlignmentAnnotation aa)
+  {
 
-          String ssSource = AlignmentUtils
-                  .extractSSSourceFromAnnotationDescription(aa);
-          if (ssSource != null && ssSource.equals(selectedSSSource))
-          {
+    for (String label : Constants.SECONDARY_STRUCTURE_LABELS.keySet())
+    {
 
-            ssAlignmentAnnotationForSequences
-                    .computeIfAbsent(aa.sequenceRef.getDatasetSequence(),
-                            k -> new ArrayList<>())
-                    .add(aa);
-            break;
-          }
+      if (label.equals(aa.label))
+      {
+
+        if (selectedSSSource.equals(Constants.SS_ALL_PROVIDERS))
+        {
+          return true;
+        }
+        String ssSource = AlignmentUtils
+                .extractSSSourceFromAnnotationDescription(aa);
+        if (ssSource != null && ssSource.equals(selectedSSSource))
+        {
+          return true;
         }
       }
     }
-
-    return ssAlignmentAnnotationForSequences;
-
+    return false;
   }
   
 }
index 61f65ff..d70031a 100644 (file)
@@ -30,7 +30,9 @@ import jalview.datamodel.SequenceI;
 import jalview.datamodel.SequenceNode;
 import jalview.viewmodel.AlignmentViewport;
 
+import java.util.ArrayList;
 import java.util.BitSet;
+import java.util.List;
 import java.util.Vector;
 
 public abstract class TreeBuilder extends TreeEngine
@@ -40,6 +42,7 @@ public abstract class TreeBuilder extends TreeEngine
   public static final String NEIGHBOUR_JOINING = "NJ";
 
   protected SequenceI[] sequences;
+  protected List<String> labels;
 
   public AlignmentView seqData;
 
@@ -121,6 +124,10 @@ public abstract class TreeBuilder extends TreeEngine
    */
   protected void computeTree(ScoreModelI sm, SimilarityParamsI scoreOptions)
   {
+    labels = new ArrayList<String>();
+    sequences = sm.expandSeqData(sequences, seqData, scoreOptions, labels);
+    noseqs = sequences.length;
+    
     distances = sm.findDistances(seqData, scoreOptions);
 
     makeLeaves();
@@ -177,7 +184,14 @@ public abstract class TreeBuilder extends TreeEngine
       SequenceNode sn = new SequenceNode();
 
       sn.setElement(sequences[i]);
+      
+      if (labels.size()==noseqs)
+      {
+        sn.setLabel(labels.get(i));
+      }
+      
       sn.setName(sequences[i].getName());
+      
       node.addElement(sn);
       BitSet bs = new BitSet();
       bs.set(i);
index d7859f1..c639866 100644 (file)
@@ -32,10 +32,10 @@ import jalview.datamodel.SequenceI;
 import jalview.math.Matrix;
 import jalview.math.MatrixI;
 import jalview.util.Constants;
-import jalview.util.MessageManager;
 
 import java.util.ArrayList;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
@@ -88,6 +88,72 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel
     return true;
   }
   
+  ArrayList<AlignmentAnnotation> ssForSeqs = null;
+
+  @Override
+  public SequenceI[] expandSeqData(SequenceI[] sequences,
+          AlignmentView seqData, SimilarityParamsI scoreParams,List<String> labels)
+  {
+    ssForSeqs = new ArrayList<AlignmentAnnotation>();
+    List<SequenceI> newSequences = new ArrayList<SequenceI>();
+    List<SeqCigar> newCigs = new ArrayList<SeqCigar>();
+    int sq = 0;
+    
+    
+
+    AlignmentAnnotation[] alignAnnotList = fr.getViewport().getAlignment()
+            .getAlignmentAnnotation();   
+    
+    String ssSource = scoreParams.getSecondaryStructureSource(); 
+    if(ssSource == null || ssSource == "") {
+      ssSource = Constants.SS_ALL_PROVIDERS;
+    }
+    
+    /*
+     * Add secondary structure annotations that are added to the annotation track
+     * to the map
+     */
+    Map<SequenceI, ArrayList<AlignmentAnnotation>> ssAlignmentAnnotationForSequences 
+      = AlignmentUtils.getSequenceAssociatedAlignmentAnnotations(alignAnnotList, ssSource);
+    
+    for (SeqCigar scig : seqData.getSequences())
+    {
+      // get the next sequence that should be bound to this scig: may be null
+      SequenceI alSeq = sequences[sq++];
+      List<AlignmentAnnotation> ssec = ssAlignmentAnnotationForSequences.get(scig.getRefSeq());
+      if (ssec == null)
+      {
+        // not defined
+        newSequences.add(alSeq);
+        if (alSeq!=null) {
+          labels.add("No Secondary Structure");
+        }
+        SeqCigar newSeqCigar = scig; //new SeqCigar(scig);
+        newCigs.add(newSeqCigar);
+        ssForSeqs.add(null);
+      } else {
+        for (int i = 0; i < ssec.size(); i++)
+        {
+          if (alSeq != null)
+          {
+            labels.add(AlignmentUtils
+                    .extractSSSourceFromAnnotationDescription(ssec.get(i)));
+          }
+          //newSequences.add(seq);
+          newSequences.add(alSeq);
+          SeqCigar newSeqCigar = scig; // new SeqCigar(scig);
+          newCigs.add(newSeqCigar);
+          ssForSeqs.add(ssec.get(i));
+        }
+      }
+    }
+
+    seqData.setSequences(newCigs.toArray(new SeqCigar[0]));
+    return newSequences.toArray(new SequenceI[0]);
+
+  }
+
+  
   /**
    * Calculates distance score [i][j] between each pair of protein sequences 
    * based on their secondary structure annotations (H, E, C). 
@@ -106,8 +172,14 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel
   @Override
   public MatrixI findDistances(AlignmentView seqData,
           SimilarityParamsI params)
-  {   
-    
+  { 
+    if (ssForSeqs==null || ssForSeqs.size()!=seqData.getSequences().length)
+    {
+      // expandSeqData needs to be called to initialise the hash
+      SequenceI[] sequences = new SequenceI[seqData.getSequences().length];
+      // we throw away the new labels in this case..
+      expandSeqData(sequences, seqData, params, new ArrayList<String>());
+    }
     SeqCigar[] seqs = seqData.getSequences();
     int noseqs = seqs.length; //no of sequences
     int cpwidth = 0; 
@@ -122,17 +194,7 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel
     // need to get real position for view position
     int[] viscont = seqData.getVisibleContigs();
     
-    
-    AlignmentAnnotation[] alignAnnotList = fr.getViewport().getAlignment()
-            .getAlignmentAnnotation();   
-    
-
-    /*
-     * Add secondary structure annotations that are added to the annotation track
-     * to the map
-     */
-    Map<SequenceI, ArrayList<AlignmentAnnotation>> ssAlignmentAnnotationForSequences 
-      = AlignmentUtils.getSequenceAssociatedAlignmentAnnotations(alignAnnotList, ssSource); 
+       
 
     /*
      * scan each column, compute and add to each similarity[i, j]
@@ -158,59 +220,64 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel
          */
         for (int i = 0; i < (noseqs - 1); i++)
         {
-          //Iterates for each sequences
+          AlignmentAnnotation aa_i = ssForSeqs.get(i);
+          boolean undefinedSS1 = aa_i == null;
+          // check if the sequence contains gap in the current column
+          boolean gap1 = !seqsWithoutGapAtCol.contains(seqs[i]);
+          // secondary structure is fetched only if the current column is not
+          // gap for the sequence
+          char ss1 = '*';
+          if (!gap1 && !undefinedSS1)
+          {
+            // fetch the position in sequence for the column and finds the
+            // corresponding secondary structure annotation
+            // TO DO - consider based on priority and displayed
+            int seqPosition_i = seqs[i].findPosition(cpos);
+            if (aa_i != null)
+              ss1 = AlignmentUtils.findSSAnnotationForGivenSeqposition(aa_i,
+                      seqPosition_i);
+          }
+          // Iterates for each sequences
           for (int j = i + 1; j < noseqs; j++)
           {
-                         
-            //check if ss is defined
-            boolean undefinedSS1 = ssAlignmentAnnotationForSequences.get(seqs[i].getRefSeq()) == null;
-            boolean undefinedSS2 = ssAlignmentAnnotationForSequences.get(seqs[j].getRefSeq()) == null;
+
+            // check if ss is defined
+            AlignmentAnnotation aa_j = ssForSeqs.get(j);
+            boolean undefinedSS2 = aa_j == null;
 
             // Set similarity to max score if both SS are not defined
-            if (undefinedSS1 && undefinedSS2) {
-                similarities[i][j] += ssRateMatrix.getMaximumScore();
-                continue;
-            } 
-            
+            if (undefinedSS1 && undefinedSS2)
+            {
+              similarities[i][j] += ssRateMatrix.getMaximumScore();
+              continue;
+            }
+
             // Set similarity to minimum score if either one SS is not defined
-            else if(undefinedSS1 || undefinedSS2) {
-                similarities[i][j] += ssRateMatrix.getMinimumScore();
-                continue;
+            else if (undefinedSS1 || undefinedSS2)
+            {
+              similarities[i][j] += ssRateMatrix.getMinimumScore();
+              continue;
             }
-            
-            //check if the sequence contains gap in the current column
-            boolean gap1 = !seqsWithoutGapAtCol.contains(seqs[i]);
-            boolean gap2 = !seqsWithoutGapAtCol.contains(seqs[j]);            
-            
-            //Variable to store secondary structure at the current column
-            char ss1 = '*';
+
+            boolean gap2 = !seqsWithoutGapAtCol.contains(seqs[j]);
+
+            // Variable to store secondary structure at the current column
             char ss2 = '*';
-            
-            //secondary structure is fetched only if the current column is not 
-            //gap for the sequence
-            if(!gap1 && !undefinedSS1) {  
-              //fetch the position in sequence for the column and finds the
-              //corresponding secondary structure annotation
-              //TO DO - consider based on priority and displayed
-              int seqPosition = seqs[i].findPosition(cpos);
-              AlignmentAnnotation aa = ssAlignmentAnnotationForSequences.get(seqs[i].getRefSeq()).get(0);
-              if(aa!=null)
-              ss1 = 
-                  AlignmentUtils.findSSAnnotationForGivenSeqposition(aa, seqPosition);              
-            }
-            
-            if(!gap2 && !undefinedSS2) {              
+
+            if (!gap2 && !undefinedSS2)
+            {
               int seqPosition = seqs[j].findPosition(cpos);
-              AlignmentAnnotation aa = ssAlignmentAnnotationForSequences.get(seqs[j].getRefSeq()).get(0);
-              if(aa!=null)
-                ss2 = 
-                  AlignmentUtils.findSSAnnotationForGivenSeqposition(aa, seqPosition);               
-            }           
+
+              if (aa_j != null)
+                ss2 = AlignmentUtils.findSSAnnotationForGivenSeqposition(
+                        aa_j, seqPosition);
+            }
 
             if ((!gap1 && !gap2) || params.includeGaps())
             {
               // Calculate similarity score based on the substitution matrix
-              double similarityScore = ssRateMatrix.getPairwiseScore(ss1, ss2);
+              double similarityScore = ssRateMatrix.getPairwiseScore(ss1,
+                      ss2);
               similarities[i][j] += similarityScore;
             }
           }
@@ -233,7 +300,7 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel
         similarities[j][i] = similarities[i][j];
       }
     }
-    return ssRateMatrix.similarityToDistance(new Matrix(similarities));
+    return SimilarityScoreModel.similarityToDistance(new Matrix(similarities));
     
   }
 
index a243c0c..e45f6d7 100644 (file)
  */
 package jalview.api.analysis;
 
+import java.util.List;
+
 import jalview.api.AlignmentViewPanel;
 import jalview.datamodel.AlignmentView;
+import jalview.datamodel.SequenceI;
 import jalview.math.MatrixI;
 
 public interface ScoreModelI
@@ -112,4 +115,15 @@ public interface ScoreModelI
    * @return
    */
   ScoreModelI getInstance(AlignmentViewPanel avp);
+  
+  /**
+   * Score models may create multiple leaves for a single sequence - implement this method if you do
+   * @param sequences - sequences to be filtered/expanded set of leaves
+   * @param seqData - origin
+   * @param labels - strings to show instead of the SequenceI.getName() for each element of sequences attached to leaves
+   * @return filtered/expanded set of leaves to be analysed
+   */
+  default SequenceI[] expandSeqData(SequenceI[] sequences, AlignmentView seqData, SimilarityParamsI scoreParams, List<String> labels) { 
+    return sequences; 
+  };
 }
index 624c2b9..2b5894a 100755 (executable)
@@ -34,6 +34,8 @@ public class BinaryNode<T>
 
   String name;
 
+  String label=null;
+
   BinaryNode<T> left;
 
   BinaryNode<T> right;
@@ -364,6 +366,23 @@ public class BinaryNode<T>
 
     return oldstate;
   }
+  /**
+   * check if there's a label to show
+   * @return true if non-empty/null string
+   */
+  public boolean hasLabel()
+  {
+    return label!=null && !label.isEmpty();
+  }
+  public String getLabel()
+  {
+    return label;
+  }
+
+  public void setLabel(String label)
+  {
+    this.label = label;
+  }
 
   /**
    * ascends the tree but doesn't stop until a non-dummy node is discovered.
@@ -380,4 +399,17 @@ public class BinaryNode<T>
 
     return c;
   }
+
+  public String getDisplayName()
+  {
+    if (name!=null && !name.isBlank())
+    {
+     
+      if (hasLabel()) {
+        return getName()+"|"+label;
+      }
+      return name;
+    }
+    return hasLabel() ? label:"";
+  }
 }
index c3906f9..fb9e96a 100755 (executable)
@@ -515,7 +515,8 @@ public class PDBEntry
   {
     if (_hasProperty(AUTHORITATIVE_ID))
     {
-      return ((Boolean) getProperty(AUTHORITATIVE_ID));
+      Object authId = getProperty(AUTHORITATIVE_ID);
+      return (authId instanceof Boolean) ? (Boolean) authId : Boolean.valueOf(authId.toString());
     }
     return false;
   }
index 93a5633..1c19854 100644 (file)
@@ -785,6 +785,17 @@ public abstract class JalviewJmolBinding extends AAStructureBindingModel
         }
         else
         {
+          PDBEntry ppe=getPdbEntry(pe);
+          if (ppe==null) {
+            Console.warn("Please report under JAL-4440: Unexpected null entry for PDBEntry for a structure (for structure "+fileName+")");
+            
+            continue;
+          }
+          if (ppe.getFile()==null)
+          {
+            Console.warn("Please report under JAL-4440: Unexpected null entry for file that we just tried to load into Jmol: pdbEntry: "+ppe.toString()+"(for structure "+fileName+")");
+            continue;
+          }
           File fl = new File(getPdbEntry(pe).getFile());
           matches = fl.equals(new File(fileName));
           if (matches)
index ba2417e..588a4f9 100755 (executable)
@@ -40,6 +40,7 @@ import java.util.ArrayList;
 import java.util.BitSet;
 import java.util.HashMap;
 import java.util.Hashtable;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -116,7 +117,10 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable,
 
   int labelLength = -1;
 
-  Map<Object, Rectangle> nameHash = new Hashtable<>();
+  /**
+   * TODO - these rectangle-hash lookups should be optimised for big trees...
+   */
+  Map<BinaryNode, Rectangle> nameHash = new Hashtable<>();
 
   Map<BinaryNode, Rectangle> nodeHash = new Hashtable<>();
 
@@ -240,11 +244,11 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable,
         has_placeholders = true;
       }
 
-      if (longestName.length() < ((Sequence) lf.element()).getName()
+      if (longestName.length() < lf.getDisplayName()
               .length())
       {
         longestName = TreeCanvas.PLACEHOLDER
-                + ((Sequence) lf.element()).getName();
+                + lf.getDisplayName();
       }
       if (tp.isColumnWise() && cm != null)
       {
@@ -345,8 +349,8 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable,
 
       String name = (markPlaceholders && ((node instanceof SequenceNode
               && ((SequenceNode) node).isPlaceholder())))
-                      ? (PLACEHOLDER + node.getName())
-                      : node.getName();
+                      ? (PLACEHOLDER + node.getDisplayName())
+                      : node.getDisplayName();
 
       int charWidth = fm.stringWidth(name) + 3;
       int charHeight = font.getSize();
@@ -354,7 +358,7 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable,
       Rectangle rect = new Rectangle(xend + 10, ypos - charHeight / 2,
               charWidth, charHeight);
 
-      nameHash.put(node.element(), rect);
+      nameHash.put(node, rect);
 
       // Colour selected leaves differently
       boolean isSelected = false;
@@ -437,6 +441,10 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable,
 
         nodeLabel = nodeLabel + String.valueOf(node.bootstrap);
       }
+      if (node.hasLabel())
+      {
+        nodeLabel = node.getLabel()+" "+nodeLabel;
+      }
 
       if (!nodeLabel.equals(""))
       {
@@ -457,14 +465,14 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable,
    */
   public Object findElement(int x, int y)
   {
-    for (Entry<Object, Rectangle> entry : nameHash.entrySet())
+    for (Entry<BinaryNode, Rectangle> entry : nameHash.entrySet())
     {
       Rectangle rect = entry.getValue();
 
       if ((x >= rect.x) && (x <= (rect.x + rect.width)) && (y >= rect.y)
               && (y <= (rect.y + rect.height)))
       {
-        return entry.getKey();
+        return entry.getKey().element();
       }
     }
 
@@ -1066,9 +1074,10 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable,
       setColor(groups.get(i), col.brighter());
 
       Vector<BinaryNode> l = tree.findLeaves(groups.get(i));
+      gatherLabelsTo(groups.get(i),l);
       if (!tp.isColumnWise())
       {
-        createSeqGroupFor(aps, l, col);
+        createSeqGroupFor(aps, l, col);        
       }
       else
       {
@@ -1111,6 +1120,29 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable,
     }
   }
 
+  private void gatherLabelsTo(BinaryNode binaryNode, Vector<BinaryNode> l)
+  {
+    LinkedHashSet<String> labelsForNode = new LinkedHashSet<String>();
+    for (BinaryNode leaf:l)
+    {
+      if (leaf.hasLabel())
+      {
+        labelsForNode.add(leaf.getLabel());
+      }
+    }
+    StringBuilder sb = new StringBuilder();
+    boolean first=true;
+    for (String label:labelsForNode)
+    {
+      if (!first) {
+        sb.append(" | ");
+      }
+      first=false;
+      sb.append(label);
+    }
+    binaryNode.setLabel(sb.toString());
+  }
+
   private int parseColumnNode(BinaryNode bn) throws NumberFormatException
   {
     return Integer.parseInt(
index 7248e47..73c4c0f 100644 (file)
@@ -708,29 +708,43 @@ public class StructureSelectionManager
         ssAnnotDescriptionInPDB = ssAnnotationsInPDB[0].description;
       }
       
-      //Match the PDB entry using file path in the pdb data model and get the provider
-      if(seq.getDatasetSequence() != null) {
-        Vector<PDBEntry> pdbEntries = seq.getDatasetSequence().getAllPDBEntries();
-        if(pdbEntries != null) {
-          for(PDBEntry pdbEntry : pdbEntries) {
-            if(pdbEntry.getFile()!=null) {
-              if(ssStructFilePathNameInPDB.startsWith(pdbEntry.getFile())) {
-                provider = pdbEntry.getProvider();
-                break;
-              }        
+      // Match the PDB entry using file path in the pdb data model and get the
+      // provider
+      if (ssStructFilePathNameInPDB != null
+              && seq.getDatasetSequence() != null)
+      {
+        Vector<PDBEntry> pdbEntries = seq.getDatasetSequence()
+                .getAllPDBEntries();
+        if (pdbEntries != null)
+        {
+          for (PDBEntry pdbEntry : pdbEntries)
+          {
+            if (pdbEntry.getFile() != null && ssStructFilePathNameInPDB
+                    .startsWith(pdbEntry.getFile()))
+            {
+              provider = pdbEntry.getProvider();
+              break;
             }
           }
-          
-          
-          //Add provider value as property to the ss annotation 
-          if(provider != null) {
-            AlignmentAnnotation[] ssAnnotList = ds.getAnnotation(Constants.SS_ANNOTATION_LABEL);
-            if(ssAnnotList != null) {
-              for(AlignmentAnnotation ssAnnot : ssAnnotList) {
-                //Match the annotation description with the annotation in pdb data object  
-                if(ssAnnot.getProperty(Constants.SS_PROVIDER_PROPERTY) == null 
-                        && ssAnnot.description.equals(ssAnnotDescriptionInPDB)) {
-                  ssAnnot.setProperty(Constants.SS_PROVIDER_PROPERTY, provider);
+          // Add provider value as property to the ss annotation
+          if (provider != null)
+          {
+            // TODO - JAL-2880 JAL-4441 this should be applied to all structure derived annotations, not just secondary structure!
+            AlignmentAnnotation[] ssAnnotList = ds
+                    .getAnnotation(Constants.SS_ANNOTATION_LABEL);
+            if (ssAnnotList != null)
+            {
+              for (AlignmentAnnotation ssAnnot : ssAnnotList)
+              {
+                // Match the annotation description with the annotation in pdb
+                // data object
+                if (ssAnnot
+                        .getProperty(Constants.SS_PROVIDER_PROPERTY) == null
+                        && ssAnnot.description
+                                .equals(ssAnnotDescriptionInPDB))
+                {
+                  ssAnnot.setProperty(Constants.SS_PROVIDER_PROPERTY,
+                          provider);
                 }
               }
             }