From cc3341f9d1465b0226d184be4216d022a62e5cee Mon Sep 17 00:00:00 2001 From: James Procter Date: Thu, 18 May 2023 17:12:01 +0100 Subject: [PATCH] JAL-4134 column grouping model and methods moved to their own object held by by ContactMatrixI implementors --- schemas/vamsas.xsd | 2 +- src/jalview/datamodel/ContactMatrix.java | 47 +---- src/jalview/datamodel/ContactMatrixI.java | 62 ++++++- src/jalview/datamodel/GroupSet.java | 191 ++++++++++++++++++++ src/jalview/datamodel/GroupSetI.java | 34 ++++ .../datamodel/SeqDistanceContactMatrix.java | 36 ---- src/jalview/gui/AnnotationLabels.java | 2 +- src/jalview/project/Jalview2XML.java | 51 ++++-- .../datamodel/alphafold/MappableContactMatrix.java | 17 ++ .../ws/datamodel/alphafold/PAEContactMatrix.java | 133 +------------- src/jalview/ws/dbsources/EBIAlfaFold.java | 5 +- test/jalview/project/Jalview2xmlTests.java | 31 +++- 12 files changed, 384 insertions(+), 227 deletions(-) create mode 100644 src/jalview/datamodel/GroupSet.java create mode 100644 src/jalview/datamodel/GroupSetI.java diff --git a/schemas/vamsas.xsd b/schemas/vamsas.xsd index b72e3c7..fedbae3 100755 --- a/schemas/vamsas.xsd +++ b/schemas/vamsas.xsd @@ -318,7 +318,7 @@ - Comma separated series of BigIntegers formed from + Comma separated series of longs formed from bitsets defining partitions on the rows/columns of the matrix diff --git a/src/jalview/datamodel/ContactMatrix.java b/src/jalview/datamodel/ContactMatrix.java index 65fd01c..8f0afdf 100644 --- a/src/jalview/datamodel/ContactMatrix.java +++ b/src/jalview/datamodel/ContactMatrix.java @@ -160,53 +160,18 @@ public abstract class ContactMatrix implements ContactMatrixI { return "Contact Matrix"; } - List groups=null; + GroupSet grps = new GroupSet(); @Override - public void updateGroups(List colGroups) + public GroupSetI getGroupSet() { - groups = colGroups; - colorMap=new HashMap<>(); + return grps; } @Override - public boolean hasGroups() + public void setGroupSet(GroupSet makeGroups) { - return groups!=null && groups.size()>0; - } - @Override - public List getGroups() - { - return groups; - } - @Override - public BitSet getGroupsFor(int column) - { - for (BitSet gp:groups) { - if (gp.get(column)) - { - return gp; - } - } - return ContactMatrixI.super.getGroupsFor(column); - } - HashMap colorMap = new HashMap<>(); - @Override - public Color getColourForGroup(BitSet bs) - { - if (bs==null) { - return Color.white; - } - Color groupCol=colorMap.get(bs); - if (groupCol==null) - { - return Color.white; - } - return groupCol; - } - @Override - public void setColorForGroup(BitSet bs,Color color) - { - colorMap.put(bs,color); + grps = makeGroups; } + public static String contactToFloatString(ContactMatrixI cm) { StringBuilder sb = new StringBuilder(); diff --git a/src/jalview/datamodel/ContactMatrixI.java b/src/jalview/datamodel/ContactMatrixI.java index ba2ee48..256d43d 100644 --- a/src/jalview/datamodel/ContactMatrixI.java +++ b/src/jalview/datamodel/ContactMatrixI.java @@ -27,21 +27,41 @@ public interface ContactMatrixI int getWidth(); int getHeight(); + public GroupSetI getGroupSet(); + + /// proxy methods to simplify use of the interface + /// Mappable contact matrices can override these to perform mapping + + default public boolean hasGroupSet() + { + return getGroupSet()!=null; + } + default boolean hasGroups() { - return false; + return hasGroupSet() && getGroupSet().hasGroups(); } + + default BitSet getGroupsFor(int column) { + if (!hasGroupSet()) + { BitSet colbitset = new BitSet(); colbitset.set(column); return colbitset; + } + return getGroupSet().getGroupsFor(column); } default List getGroups() { + if (!hasGroupSet()) + { return Arrays.asList(); + } + return getGroupSet().getGroups(); } default boolean hasTree() { - return false; + return hasGroupSet() ? getGroupSet().hasTree() : false; } /** @@ -49,25 +69,49 @@ public interface ContactMatrixI * @return null unless hasTree is true */ default String getNewick() { - return null; + return hasGroupSet() ? getGroupSet().getNewick(): null; } default String getTreeMethod() { - return null; + return hasGroupSet() ? getGroupSet().getTreeMethod() :null; } default boolean hasCutHeight() { - return false; + return hasGroupSet() ? getGroupSet().hasCutHeight() : false; } default double getCutHeight() { - return 0; + return hasGroupSet() ? getGroupSet().getCutHeight():0; } - void updateGroups(List colGroups); + default void updateGroups(List colGroups) + { + if (hasGroupSet()) + { + getGroupSet().updateGroups(colGroups); + } + } - void setColorForGroup(BitSet bs, Color color); + default void setColorForGroup(BitSet bs, Color color) + { + if (hasGroupSet()) + { + getGroupSet().setColorForGroup(bs, color); + } + } + + default Color getColourForGroup(BitSet bs) + { + if (hasGroupSet()) + { + return getGroupSet().getColourForGroup(bs); + } + else + { + return Color.white; + } + } - default Color getColourForGroup(BitSet bs) { return Color.white;} + void setGroupSet(GroupSet makeGroups); } diff --git a/src/jalview/datamodel/GroupSet.java b/src/jalview/datamodel/GroupSet.java new file mode 100644 index 0000000..b4d3e64 --- /dev/null +++ b/src/jalview/datamodel/GroupSet.java @@ -0,0 +1,191 @@ +package jalview.datamodel; + +import java.awt.Color; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.HashMap; +import java.util.List; + +import jalview.analysis.AverageDistanceEngine; +import jalview.bin.Console; + +public class GroupSet implements GroupSetI +{ + List groups = Arrays.asList(); + + public GroupSet(GroupSet grps) + { + abs=grps.abs; + colorMap=new HashMap(grps.colorMap); + groups=new ArrayList(grps.groups); + newick=grps.newick; + thresh=grps.thresh; + treeType=grps.treeType; + } + + public GroupSet() + { + // TODO Auto-generated constructor stub + } + + public GroupSet(boolean abs2, float thresh2, List groups2, + String treeType2, String newick2) + { + abs = abs2; + thresh = thresh2; + groups = groups2; + treeType = treeType2; + newick=newick2; + } + + @Override + public boolean hasGroups() + { + return groups != null; + } + + String newick = null; + + @Override + public String getNewick() + { + return newick; + } + + @Override + public boolean hasTree() + { + return newick != null && newick.length() > 0; + } + + boolean abs=false; + + double thresh=0; + + String treeType = null; + + @Override + public void updateGroups(List colGroups) + { + if (colGroups != null) + { + groups = colGroups; + } + } + + @Override + public BitSet getGroupsFor(int column) + { + if (groups != null) + { + for (BitSet gp : groups) + { + if (gp.get(column)) + { + return gp; + } + } + } + // return singleton set; + BitSet bs = new BitSet(); + bs.set(column); + return bs; + } + + HashMap colorMap = new HashMap<>(); + + @Override + public Color getColourForGroup(BitSet bs) + { + if (bs == null) + { + return Color.white; + } + Color groupCol = colorMap.get(bs); + if (groupCol == null) + { + return Color.white; + } + return groupCol; + } + + @Override + public void setColorForGroup(BitSet bs, Color color) + { + colorMap.put(bs, color); + } + + @Override + public void restoreGroups(List newgroups, String treeMethod, + String tree, double thresh2) + { + treeType = treeMethod; + groups = newgroups; + thresh = thresh2; + newick = tree; + + } + + @Override + public boolean hasCutHeight() + { + return groups != null && thresh != 0; + } + + @Override + public double getCutHeight() + { + return thresh; + } + + @Override + public String getTreeMethod() + { + return treeType; + } + + public static GroupSet makeGroups(ContactMatrixI matrix, float thresh, + boolean abs) + { + AverageDistanceEngine clusterer = new AverageDistanceEngine(null, null, + matrix); + double height = clusterer.findHeight(clusterer.getTopNode()); + String newick = new jalview.io.NewickFile(clusterer.getTopNode(), false, + true).print(); + String treeType = "UPGMA"; + Console.trace("Newick string\n" + newick); + + List nodegroups; + if (abs ? height > thresh : 0 < thresh && thresh < 1) + { + float cut = abs ? (float) (thresh / height) : thresh; + Console.debug("Threshold " + cut + " for height=" + height); + + nodegroups = clusterer.groupNodes(cut); + } + else + { + nodegroups = new ArrayList(); + nodegroups.add(clusterer.getTopNode()); + } + List groups = new ArrayList<>(); + for (BinaryNode root : nodegroups) + { + BitSet gpset = new BitSet(); + for (BinaryNode leaf : clusterer.findLeaves(root)) + { + gpset.set((Integer) leaf.element()); + } + groups.add(gpset); + } + GroupSet grps = new GroupSet(abs,thresh,groups,treeType, newick); + return grps; + } + + @Override + public List getGroups() + { + return groups; + } +} diff --git a/src/jalview/datamodel/GroupSetI.java b/src/jalview/datamodel/GroupSetI.java new file mode 100644 index 0000000..7c086a5 --- /dev/null +++ b/src/jalview/datamodel/GroupSetI.java @@ -0,0 +1,34 @@ +package jalview.datamodel; + +import java.awt.Color; +import java.util.BitSet; +import java.util.List; + +public interface GroupSetI +{ + boolean hasGroups(); + + String getNewick(); + + boolean hasTree(); + + void updateGroups(List colGroups); + + BitSet getGroupsFor(int column); + + Color getColourForGroup(BitSet bs); + + void setColorForGroup(BitSet bs, Color color); + + void restoreGroups(List newgroups, String treeMethod, String tree, + double thresh2); + + boolean hasCutHeight(); + + double getCutHeight(); + + String getTreeMethod(); + + List getGroups(); + +} diff --git a/src/jalview/datamodel/SeqDistanceContactMatrix.java b/src/jalview/datamodel/SeqDistanceContactMatrix.java index f6377b1..ec19b25 100644 --- a/src/jalview/datamodel/SeqDistanceContactMatrix.java +++ b/src/jalview/datamodel/SeqDistanceContactMatrix.java @@ -111,42 +111,6 @@ public class SeqDistanceContactMatrix extends MappableContactMatrix groups=null; - @Override - public void updateGroups(List colGroups) - { - groups = colGroups; - } - @Override - public boolean hasGroups() - { - return groups!=null; - } - @Override - public List getGroups() - { - return groups; - } - - HashMap colorMap = new HashMap<>(); - @Override - public Color getColourForGroup(BitSet bs) - { - if (bs==null) { - return Color.white; - } - Color groupCol=colorMap.get(bs); - if (groupCol==null) - { - return Color.white; - } - return groupCol; - } - @Override - public void setColorForGroup(BitSet bs,Color color) - { - colorMap.put(bs,color); - } @Override protected double getElementAt(int _column, int i) { diff --git a/src/jalview/gui/AnnotationLabels.java b/src/jalview/gui/AnnotationLabels.java index 52a6066..d781ebb 100755 --- a/src/jalview/gui/AnnotationLabels.java +++ b/src/jalview/gui/AnnotationLabels.java @@ -428,7 +428,7 @@ public class AnnotationLabels extends JPanel { final PAEContactMatrix cm = (PAEContactMatrix) av .getContactMatrix(aa[selectedRow]); - if (cm!=null && cm.getNewick()!=null && cm.getNewick().length()>0) + if (cm!=null && cm.hasTree()) { item = new JMenuItem("Show Tree for Matrix"); item.addActionListener(new ActionListener() diff --git a/src/jalview/project/Jalview2XML.java b/src/jalview/project/Jalview2XML.java index 333c62b..a2ded9f 100644 --- a/src/jalview/project/Jalview2XML.java +++ b/src/jalview/project/Jalview2XML.java @@ -96,6 +96,7 @@ import jalview.datamodel.ContactMatrixI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.GeneLocus; import jalview.datamodel.GraphLine; +import jalview.datamodel.GroupSet; import jalview.datamodel.PDBEntry; import jalview.datamodel.Point; import jalview.datamodel.RnaViewerModel; @@ -2323,8 +2324,7 @@ public class Jalview2XML { for (BitSet gp: cm.getGroups()) { - BigInteger val = new BigInteger(gp.toByteArray()); - xmlmat.getGroups().add(val.toString()); + xmlmat.getGroups().add(stringifyBitset(gp)); } } if (cm.hasTree()) @@ -2478,6 +2478,38 @@ public class Jalview2XML } + private String stringifyBitset(BitSet gp) { + StringBuilder sb = new StringBuilder(); + for (long val:gp.toLongArray()) + { + if (sb.length()>0) { + sb.append(","); + } + sb.append(val); + } + return sb.toString(); + } + private BitSet deStringifyBitset(String stringified) { + if ("".equals(stringified) || stringified==null) + { + return new BitSet(); + } + String[] longvals = stringified.split(","); + long[] newlongvals = new long[longvals.length]; + for (int lv=0;lv newgroups=new ArrayList(); if (xmlmat.getGroups().size()>0) { for (String sgroup:xmlmat.getGroups()) { - try { - BigInteger group = new BigInteger(sgroup); - newgroups.add(BitSet.valueOf(group.toByteArray())); - } catch (NumberFormatException nfe) - { - Console.error("Problem parsing groups for a contact matrix (\""+sgroup+"\"",nfe); - } + newgroups.add(deStringifyBitset(sgroup)); } } String nwk=xmlmat.getNewick().size()>0 ? xmlmat.getNewick().get(0):null; @@ -4055,7 +4079,10 @@ public class Jalview2XML String treeMethod = xmlmat.getTreeMethod(); double thresh = xmlmat.getCutHeight()!=null ? xmlmat.getCutHeight() : 0; - newpae.restoreGroups(newgroups, treeMethod, nwk, thresh); + GroupSet grpset = new GroupSet(); + grpset.restoreGroups(newgroups, treeMethod, nwk, thresh); + PAEContactMatrix newpae = new PAEContactMatrix( + jaa.sequenceRef, mapping, elements,grpset); jaa.sequenceRef.addContactListFor(jaa, newpae); } } diff --git a/src/jalview/ws/datamodel/alphafold/MappableContactMatrix.java b/src/jalview/ws/datamodel/alphafold/MappableContactMatrix.java index 55a136a..27665d4 100644 --- a/src/jalview/ws/datamodel/alphafold/MappableContactMatrix.java +++ b/src/jalview/ws/datamodel/alphafold/MappableContactMatrix.java @@ -5,6 +5,8 @@ import java.util.ArrayList; import jalview.datamodel.ContactListI; import jalview.datamodel.ContactListImpl; import jalview.datamodel.ContactListProviderI; +import jalview.datamodel.GroupSet; +import jalview.datamodel.GroupSetI; import jalview.datamodel.Mapping; import jalview.datamodel.SequenceI; import jalview.util.MapList; @@ -32,6 +34,21 @@ public abstract class MappableContactMatrix> { return refSeq; } + /** + * container for groups - defined on matrix columns + */ + GroupSet grps=new GroupSet(); + @Override + public GroupSetI getGroupSet() + { + return grps; + }; + @Override + public void setGroupSet(GroupSet makeGroups) + { + grps = makeGroups; + } + @Override public MapList getMapFor(SequenceI mapSeq) { diff --git a/src/jalview/ws/datamodel/alphafold/PAEContactMatrix.java b/src/jalview/ws/datamodel/alphafold/PAEContactMatrix.java index d48d539..0ba9584 100644 --- a/src/jalview/ws/datamodel/alphafold/PAEContactMatrix.java +++ b/src/jalview/ws/datamodel/alphafold/PAEContactMatrix.java @@ -23,6 +23,8 @@ import jalview.datamodel.ContactListI; import jalview.datamodel.ContactListImpl; import jalview.datamodel.ContactListProviderI; import jalview.datamodel.ContactMatrixI; +import jalview.datamodel.GroupSet; +import jalview.datamodel.GroupSetI; import jalview.datamodel.Mapping; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceI; @@ -94,12 +96,14 @@ public class PAEContactMatrix extends MappableContactMatrix im * @param newRefSeq * @param newFromMapList * @param elements2 + * @param grps2 */ public PAEContactMatrix(SequenceI newRefSeq, - MapList newFromMapList, float[][] elements2) + MapList newFromMapList, float[][] elements2, GroupSet grps2) { this(newRefSeq,elements2); toSeq = newFromMapList; + grps= grps2; } /** @@ -307,127 +311,7 @@ public class PAEContactMatrix extends MappableContactMatrix im { return length; } - List groups=null; - @Override - public boolean hasGroups() - { - return groups!=null; - } - String newick=null; - @Override - public String getNewick() - { - return newick; - } - @Override - public boolean hasTree() - { - return newick!=null && newick.length()>0; - } - boolean abs; - double thresh; - String treeType=null; - public void makeGroups(float thresh,boolean abs) - { - AverageDistanceEngine clusterer = new AverageDistanceEngine(null, null, this); - double height = clusterer.findHeight(clusterer.getTopNode()); - newick = new jalview.io.NewickFile(clusterer.getTopNode(),false,true).print(); - treeType = "UPGMA"; - Console.trace("Newick string\n"+newick); - - List nodegroups; - if (abs ? height > thresh : 0 < thresh && thresh < 1) - { - float cut = abs ? (float) (thresh / height) : thresh; - Console.debug("Threshold "+cut+" for height="+height); - - nodegroups = clusterer.groupNodes(cut); - } - else - { - nodegroups = new ArrayList(); - nodegroups.add(clusterer.getTopNode()); - } - this.abs=abs; - this.thresh=thresh; - groups = new ArrayList<>(); - for (BinaryNode root:nodegroups) - { - BitSet gpset=new BitSet(); - for (BinaryNode leaf:clusterer.findLeaves(root)) - { - gpset.set((Integer)leaf.element()); - } - groups.add(gpset); - } - } - @Override - public void updateGroups(List colGroups) - { - if (colGroups!=null) - { - groups=colGroups; - } - } - @Override - public BitSet getGroupsFor(int column) - { - if (groups != null) - { - for (BitSet gp : groups) - { - if (gp.get(column)) - { - return gp; - } - } - } - return super.getGroupsFor(column); - } - - HashMap colorMap = new HashMap<>(); - @Override - public Color getColourForGroup(BitSet bs) - { - if (bs==null) { - return Color.white; - } - Color groupCol=colorMap.get(bs); - if (groupCol==null) - { - return Color.white; - } - return groupCol; - } - @Override - public void setColorForGroup(BitSet bs,Color color) - { - colorMap.put(bs,color); - } - public void restoreGroups(List newgroups, String treeMethod, - String tree, double thresh2) - { - treeType=treeMethod; - groups = newgroups; - thresh=thresh2; - newick =tree; - - } - @Override - public boolean hasCutHeight() { - return groups!=null && thresh!=0; - } - @Override - public double getCutHeight() - { - return thresh; - } - @Override - public String getTreeMethod() - { - return treeType; - } - + public static void validateContactMatrixFile(String fileName) throws FileFormatException,IOException { FileInputStream infile=null; @@ -458,7 +342,8 @@ public class PAEContactMatrix extends MappableContactMatrix im protected PAEContactMatrix newMappableContactMatrix( SequenceI newRefSeq, MapList newFromMapList) { - return new PAEContactMatrix(newRefSeq, newFromMapList, - elements); + PAEContactMatrix pae=new PAEContactMatrix(newRefSeq, newFromMapList, + elements, new GroupSet(grps)); + return pae; } } diff --git a/src/jalview/ws/dbsources/EBIAlfaFold.java b/src/jalview/ws/dbsources/EBIAlfaFold.java index d9cbbd9..5d88e89 100644 --- a/src/jalview/ws/dbsources/EBIAlfaFold.java +++ b/src/jalview/ws/dbsources/EBIAlfaFold.java @@ -44,6 +44,7 @@ import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.ContactMatrixI; import jalview.datamodel.DBRefEntry; +import jalview.datamodel.GroupSet; import jalview.datamodel.PDBEntry; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; @@ -442,7 +443,7 @@ public class EBIAlfaFold extends EbiFileRetrievedProxy } ContactMatrixI matrix = new PAEContactMatrix(sequence, (Map) paeDict); - ((PAEContactMatrix) matrix).makeGroups(5f, true); + matrix.setGroupSet(GroupSet.makeGroups(matrix, 5f, true)); AlignmentAnnotation cmannot = sequence.addContactList(matrix); pdbAlignment.addAnnotation(cmannot); @@ -496,7 +497,7 @@ public class EBIAlfaFold extends EbiFileRetrievedProxy ContactMatrixI matrix = new PAEContactMatrix(sm.getSequence(), (Map) pae_obj); - ((PAEContactMatrix) matrix).makeGroups(5f, true); + matrix.setGroupSet(GroupSet.makeGroups(matrix,5f, true)); AlignmentAnnotation cmannot = sm.getSequence().addContactList(matrix); sm.getSequence().addAlignmentAnnotation(cmannot); diff --git a/test/jalview/project/Jalview2xmlTests.java b/test/jalview/project/Jalview2xmlTests.java index 343df05..d038c78 100644 --- a/test/jalview/project/Jalview2xmlTests.java +++ b/test/jalview/project/Jalview2xmlTests.java @@ -32,7 +32,9 @@ import java.awt.Color; import java.awt.Rectangle; import java.io.File; import java.io.IOException; +import java.math.BigInteger; import java.util.ArrayList; +import java.util.BitSet; import java.util.HashMap; import java.util.List; import java.util.Locale; @@ -59,6 +61,7 @@ import jalview.datamodel.ContactMatrix; import jalview.datamodel.ContactMatrixI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.GeneLocus; +import jalview.datamodel.GroupSet; import jalview.datamodel.HiddenSequences; import jalview.datamodel.Mapping; import jalview.datamodel.PDBEntry; @@ -1568,10 +1571,36 @@ public class Jalview2xmlTests extends Jalview2xmlBase Assert.assertTrue(content.contains("\t1.")); // at least one element must be 1 float[][] vals = ContactMatrix.fromFloatStringToContacts(content, sq.getLength(), sq.getLength()); assertEquals(vals[3][4],paevals[3][4]); - dummyMat.makeGroups(0.5f, false); + dummyMat.setGroupSet(GroupSet.makeGroups(dummyMat,0.5f, false)); Assert.assertNotSame(dummyMat.getNewick(), ""); AlignmentAnnotation paeCm = sq.addContactList(dummyMat); al.addAnnotation(paeCm); + // verify store/restore of group bitsets + for (BitSet gp:dummyMat.getGroups()) + { + StringBuilder sb = new StringBuilder(); + for (long val:gp.toLongArray()) + { + if (sb.length()>0) { + sb.append(","); + } + sb.append(val); + } + String[] longvals = sb.toString().split(","); + long[] newlongvals = new long[longvals.length]; + for (int lv=0;lv