package jalview.ws.datamodel.alphafold;
+import java.util.ArrayList;
+import java.util.BitSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import jalview.analysis.AverageDistanceEngine;
+import jalview.bin.Console;
+import jalview.datamodel.BinaryNode;
import jalview.datamodel.ContactListI;
import jalview.datamodel.ContactListImpl;
import jalview.datamodel.ContactListProviderI;
import jalview.datamodel.ContactMatrixI;
-import jalview.datamodel.ContactRange;
import jalview.datamodel.SequenceI;
+import jalview.util.MapUtils;
public class PAEContactMatrix implements ContactMatrixI
{
- SequenceI refSeq=null;
- int maxrow=0,maxcol=0;
- int[] indices1,indices2;
+ SequenceI refSeq = null;
+
+ /**
+ * the length that refSeq is expected to be (excluding gaps, of course)
+ */
+ int length;
+
+ int maxrow = 0, maxcol = 0;
+
+ int[] indices1, indices2;
+
float[][] elements;
+
float maxscore;
-
- @SuppressWarnings("unchecked")
- public PAEContactMatrix(SequenceI _refSeq, Map<String, Object> pae_obj) throws Exception
+
+ private void setRefSeq(SequenceI _refSeq)
{
refSeq = _refSeq;
- while (refSeq.getDatasetSequence()!=null)
+ while (refSeq.getDatasetSequence() != null)
{
- refSeq=refSeq.getDatasetSequence();
+ refSeq = refSeq.getDatasetSequence();
}
+ length = _refSeq.getEnd() - _refSeq.getStart() + 1;
+ }
+
+ @SuppressWarnings("unchecked")
+ public PAEContactMatrix(SequenceI _refSeq, Map<String, Object> pae_obj)
+ {
+ setRefSeq(_refSeq);
// convert the lists to primitive arrays and store
- int length = _refSeq.getEnd()-_refSeq.getStart()+1;
-
- // assume indices are with respect to range defined by _refSeq on the dataset refSeq
- Iterator<Long> rows = ((List<Long>)pae_obj.get("residue1")).iterator();
- Iterator<Long> cols = ((List<Long>)pae_obj.get("residue2")).iterator();
- Iterator<Double> scores = ((List<Double>)pae_obj.get("distance")).iterator();
-
- elements=new float[length][length];
- while (scores.hasNext()) {
- float escore=scores.next().floatValue();
- int row=rows.next().intValue();
- int col=cols.next().intValue();
- if (maxrow<row)
+
+ if (!MapUtils.containsAKey(pae_obj, "predicted_aligned_error", "pae"))
+ {
+ parse_version_1_pAE(pae_obj);
+ return;
+ }
+ else
+ {
+ parse_version_2_pAE(pae_obj);
+ }
+ }
+
+ /**
+ * construct a sequence associated PAE matrix directly from a float array
+ *
+ * @param _refSeq
+ * @param matrix
+ */
+ public PAEContactMatrix(SequenceI _refSeq, float[][] matrix)
+ {
+ setRefSeq(_refSeq);
+ maxcol = 0;
+ for (float[] row : matrix)
+ {
+ if (row.length > maxcol)
{
- maxrow=row;
+ maxcol = row.length;
}
- if (maxcol<col)
+ maxscore = row[0];
+ for (float f : row)
{
- maxcol=col;
+ if (maxscore < f)
+ {
+ maxscore = f;
+ }
}
- elements[row-1][col-1]=escore;
}
-
- maxscore = ((Double) pae_obj.get("max_predicted_aligned_error")).floatValue();
+ maxrow = matrix.length;
+ elements = matrix;
+
+ }
+
+ /**
+ * parse a sane JSON representation of the pAE
+ *
+ * @param pae_obj
+ */
+ @SuppressWarnings("unchecked")
+ private void parse_version_2_pAE(Map<String, Object> pae_obj)
+ {
+ // this is never going to be reached by the integer rounding.. or is it ?
+ maxscore = ((Double) MapUtils.getFirst(pae_obj,
+ "max_predicted_aligned_error", "max_pae")).floatValue();
+ List<List<Long>> scoreRows = ((List<List<Long>>) MapUtils
+ .getFirst(pae_obj, "predicted_aligned_error", "pae"));
+ elements = new float[scoreRows.size()][scoreRows.size()];
+ int row = 0, col = 0;
+ for (List<Long> scoreRow : scoreRows)
+ {
+ Iterator<Long> scores = scoreRow.iterator();
+ while (scores.hasNext())
+ {
+ Object d = scores.next();
+ if (d instanceof Double)
+ elements[row][col++] = ((Double) d).longValue();
+ else
+ elements[row][col++] = (float) ((Long) d).longValue();
+ }
+ row++;
+ col = 0;
+ }
+ maxcol = length;
+ maxrow = length;
+ }
+
+ /**
+ * v1 format got ditched 28th July 2022 see
+ * https://alphafold.ebi.ac.uk/faq#:~:text=We%20updated%20the%20PAE%20JSON%20file%20format%20on%2028th%20July%202022
+ *
+ * @param pae_obj
+ */
+ @SuppressWarnings("unchecked")
+ private void parse_version_1_pAE(Map<String, Object> pae_obj)
+ {
+ // assume indices are with respect to range defined by _refSeq on the
+ // dataset refSeq
+ Iterator<Long> rows = ((List<Long>) pae_obj.get("residue1")).iterator();
+ Iterator<Long> cols = ((List<Long>) pae_obj.get("residue2")).iterator();
+ Iterator<Double> scores = ((List<Double>) pae_obj.get("distance"))
+ .iterator();
+ // assume square matrix
+ elements = new float[length][length];
+ while (scores.hasNext())
+ {
+ float escore = scores.next().floatValue();
+ int row = rows.next().intValue();
+ int col = cols.next().intValue();
+ if (maxrow < row)
+ {
+ maxrow = row;
+ }
+ if (maxcol < col)
+ {
+ maxcol = col;
+ }
+ elements[row - 1][col - 1] = escore;
+ }
+
+ maxscore = ((Double) MapUtils.getFirst(pae_obj,
+ "max_predicted_aligned_error", "max_pae")).floatValue();
}
@Override
public ContactListI getContactList(final int _column)
{
-
- return new ContactListImpl(new ContactListProviderI()
+ if (_column < 0 || _column >= elements.length)
+ {
+ return null;
+ }
+
+ return new ContactListImpl(new ContactListProviderI()
{
@Override
+ public int getPosition()
+ {
+ return _column;
+ }
+
+ @Override
public int getContactHeight()
{
- return maxcol-1;
+ return maxcol - 1;
}
-
+
@Override
public double getContactAt(int column)
{
- if (column<0 || column>=elements[_column].length)
+ if (column < 0 || column >= elements[_column].length)
{
return -1;
}
- // TODO Auto-generated method stub
return elements[_column][column];
}
});
@Override
public boolean hasReferenceSeq()
{
- return (refSeq!=null);
+ return (refSeq != null);
}
@Override
return refSeq;
}
+ @Override
+ public String getAnnotDescr()
+ {
+ return "Predicted Alignment Error"+((refSeq==null) ? "" : (" for " + refSeq.getName()));
+ }
+
+ @Override
+ public String getAnnotLabel()
+ {
+ StringBuilder label = new StringBuilder("PAE Matrix");
+ //if (this.getReferenceSeq() != null)
+ //{
+ // label.append(":").append(this.getReferenceSeq().getDisplayId(false));
+ //}
+ return label.toString();
+ }
+
+ public static final String PAEMATRIX = "PAE_MATRIX";
+
+ @Override
+ public String getType()
+ {
+ return PAEMATRIX;
+ }
+
+ @Override
+ public int getWidth()
+ {
+ return length;
+ }
+
+ @Override
+ public int getHeight()
+ {
+ return length;
+ }
+ List<BitSet> groups=null;
+ @Override
+ public boolean hasGroups()
+ {
+ return groups!=null;
+ }
+ String newick=null;
+ @Override
+ public String getNewick()
+ {
+ return newick;
+ }
+ boolean abs;
+ double thresh;
+ String treeType=null;
+ public void makeGroups(float thresh,boolean abs)
+ {
+ AverageDistanceEngine clusterer = new AverageDistanceEngine(null, null, this);
+ double height = clusterer.findHeight(clusterer.getTopNode());
+ newick = new jalview.io.NewickFile(clusterer.getTopNode(),false,true).print();
+ treeType = "UPGMA";
+ Console.trace("Newick string\n"+newick);
+
+ List<BinaryNode> nodegroups;
+ if (abs ? height > thresh : 0 < thresh && thresh < 1)
+ {
+ float cut = abs ? (float) (thresh / height) : thresh;
+ Console.debug("Threshold "+cut+" for height="+height);
+
+ nodegroups = clusterer.groupNodes(cut);
+ }
+ else
+ {
+ nodegroups = new ArrayList<BinaryNode>();
+ nodegroups.add(clusterer.getTopNode());
+ }
+ this.abs=abs;
+ this.thresh=thresh;
+ groups = new ArrayList<>();
+ for (BinaryNode root:nodegroups)
+ {
+ BitSet gpset=new BitSet();
+ for (BinaryNode leaf:clusterer.findLeaves(root))
+ {
+ gpset.set((Integer)leaf.element());
+ }
+ groups.add(gpset);
+ }
+ }
+
+ @Override
+ public BitSet getGroupsFor(int column)
+ {
+ for (BitSet gp:groups) {
+ if (gp.get(column))
+ {
+ return gp;
+ }
+ }
+ return ContactMatrixI.super.getGroupsFor(column);
+ }
+
+ public void restoreGroups(List<BitSet> newgroups, String treeMethod,
+ String tree, double thresh2)
+ {
+ treeType=treeMethod;
+ groups = newgroups;
+ thresh=thresh2;
+ newick =tree;
+
+ }
+ @Override
+ public boolean hasCutHeight() {
+ return groups!=null && thresh!=0;
+ }
+ @Override
+ public double getCutHeight()
+ {
+ return thresh;
+ }
+ @Override
+ public String getTreeMethod()
+ {
+ return treeType;
+ }
}