package jalview.ws.datamodel.alphafold;
-import java.awt.Color;
-import java.util.ArrayList;
-import java.util.BitSet;
-import java.util.HashMap;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import jalview.analysis.AverageDistanceEngine;
-import jalview.bin.Console;
-import jalview.datamodel.BinaryNode;
+import org.json.simple.JSONObject;
+
import jalview.datamodel.ContactListI;
import jalview.datamodel.ContactListImpl;
import jalview.datamodel.ContactListProviderI;
import jalview.datamodel.ContactMatrixI;
+import jalview.datamodel.FloatContactMatrix;
+import jalview.datamodel.GroupSet;
+import jalview.datamodel.SequenceDummy;
import jalview.datamodel.SequenceI;
+import jalview.io.FileFormatException;
+import jalview.util.MapList;
import jalview.util.MapUtils;
-
-public class PAEContactMatrix implements ContactMatrixI
+import jalview.ws.dbsources.EBIAlfaFold;
+
+/**
+ * routines and class for holding predicted alignment error matrices as produced
+ * by alphafold et al.
+ *
+ * getContactList(column) returns the vector of predicted alignment errors for
+ * reference position given by column getElementAt(column, i) returns the
+ * predicted superposition error for the ith position when column is used as
+ * reference
+ *
+ * Many thanks to Ora Schueler Furman for noticing that earlier development
+ * versions did not show the PAE oriented correctly
+ *
+ * @author jprocter
+ *
+ */
+public class PAEContactMatrix extends
+ MappableContactMatrix<PAEContactMatrix> implements ContactMatrixI
{
- SequenceI refSeq = null;
-
- /**
- * the length that refSeq is expected to be (excluding gaps, of course)
- */
- int length;
-
- int maxrow = 0, maxcol = 0;
-
- int[] indices1, indices2;
-
- float[][] elements;
-
- float maxscore;
-
- private void setRefSeq(SequenceI _refSeq)
- {
- refSeq = _refSeq;
- while (refSeq.getDatasetSequence() != null)
- {
- refSeq = refSeq.getDatasetSequence();
- }
- length = _refSeq.getEnd() - _refSeq.getStart() + 1;
- }
@SuppressWarnings("unchecked")
public PAEContactMatrix(SequenceI _refSeq, Map<String, Object> pae_obj)
+ throws FileFormatException
{
setRefSeq(_refSeq);
// convert the lists to primitive arrays and store
*/
public PAEContactMatrix(SequenceI _refSeq, float[][] matrix)
{
+ mappedMatrix=new FloatContactMatrix(matrix);
setRefSeq(_refSeq);
- maxcol = 0;
- for (float[] row : matrix)
- {
- if (row.length > maxcol)
- {
- maxcol = row.length;
- }
- maxscore = row[0];
- for (float f : row)
- {
- if (maxscore < f)
- {
- maxscore = f;
- }
- }
- }
- maxrow = matrix.length;
- elements = matrix;
+ }
+
+ /**
+ * new matrix with specific mapping to a reference sequence
+ *
+ * @param newRefSeq
+ * @param newFromMapList
+ * @param elements2
+ * @param grps2
+ */
+ public PAEContactMatrix(SequenceI newRefSeq, MapList newFromMapList,
+ float[][] elements2, GroupSet grps2)
+ {
+ this(newRefSeq, new FloatContactMatrix(elements2,grps2));
+ toSeq = newFromMapList;
+ }
+ public PAEContactMatrix(SequenceI _refSeq,
+ ContactMatrixI floatContactMatrix)
+ {
+ mappedMatrix = floatContactMatrix;
+ setRefSeq(_refSeq);
+ }
+ public PAEContactMatrix(SequenceI _refSeq, MapList newFromMapList,
+ ContactMatrixI floatContactMatrix)
+ {
+ mappedMatrix = floatContactMatrix;
+ setRefSeq(_refSeq);
+ toSeq = newFromMapList;
+ }
+
+ @Override
+ protected PAEContactMatrix newMappableContactMatrix(SequenceI newRefSeq,
+ MapList newFromMapList)
+ {
+ return new PAEContactMatrix(newRefSeq, newFromMapList, mappedMatrix);
}
/**
- * parse a sane JSON representation of the pAE
+ * parse a sane JSON representation of the pAE and update the mappedMatrix
*
* @param pae_obj
*/
@SuppressWarnings("unchecked")
private void parse_version_2_pAE(Map<String, Object> pae_obj)
{
- // this is never going to be reached by the integer rounding.. or is it ?
- maxscore = ((Double) MapUtils.getFirst(pae_obj,
- "max_predicted_aligned_error", "max_pae")).floatValue();
+ float maxscore = -1;
+ // look for a maxscore element - if there is one...
+ try
+ {
+ // this is never going to be reached by the integer rounding.. or is it ?
+ maxscore = ((Double) MapUtils.getFirst(pae_obj,
+ "max_predicted_aligned_error", "max_pae")).floatValue();
+ } catch (Throwable t)
+ {
+ // ignore if a key is not found.
+ }
List<List<Long>> scoreRows = ((List<List<Long>>) MapUtils
.getFirst(pae_obj, "predicted_aligned_error", "pae"));
- elements = new float[scoreRows.size()][scoreRows.size()];
+ float[][] elements = new float[scoreRows.size()][scoreRows.size()];
int row = 0, col = 0;
for (List<Long> scoreRow : scoreRows)
{
{
Object d = scores.next();
if (d instanceof Double)
- elements[row][col++] = ((Double) d).longValue();
+ {
+ elements[col][row] = ((Double) d).longValue();
+ }
else
- elements[row][col++] = (float) ((Long) d).longValue();
+ {
+ elements[col][row] = (float) ((Long) d).longValue();
+ }
+
+ if (maxscore < elements[col][row])
+ {
+ maxscore = elements[col][row];
+ }
+ col++;
}
row++;
col = 0;
}
- maxcol = length;
- maxrow = length;
+ mappedMatrix=new FloatContactMatrix(elements);
}
/**
// dataset refSeq
Iterator<Long> rows = ((List<Long>) pae_obj.get("residue1")).iterator();
Iterator<Long> cols = ((List<Long>) pae_obj.get("residue2")).iterator();
- Iterator<Double> scores = ((List<Double>) pae_obj.get("distance"))
- .iterator();
- // assume square matrix
- elements = new float[length][length];
- while (scores.hasNext())
+ // two pass - to allocate the elements array
+
+ int maxrow=-1,maxcol=-1;
+ while (rows.hasNext())
{
- float escore = scores.next().floatValue();
int row = rows.next().intValue();
int col = cols.next().intValue();
if (maxrow < row)
{
maxcol = col;
}
- elements[row - 1][col - 1] = escore;
- }
-
- maxscore = ((Double) MapUtils.getFirst(pae_obj,
- "max_predicted_aligned_error", "max_pae")).floatValue();
- }
- @Override
- public ContactListI getContactList(final int _column)
- {
- if (_column < 0 || _column >= elements.length)
- {
- return null;
}
-
- return new ContactListImpl(new ContactListProviderI()
+ rows = ((List<Long>) pae_obj.get("residue1")).iterator();
+ cols = ((List<Long>) pae_obj.get("residue2")).iterator();
+ Iterator<Double> scores = ((List<Double>) pae_obj.get("distance"))
+ .iterator();
+ float[][] elements = new float[maxcol][maxrow];
+ while (scores.hasNext())
{
- @Override
- public int getPosition()
- {
- return _column;
- }
-
- @Override
- public int getContactHeight()
+ float escore = scores.next().floatValue();
+ int row = rows.next().intValue();
+ int col = cols.next().intValue();
+ if (maxrow < row)
{
- return maxcol - 1;
+ maxrow = row;
}
-
- @Override
- public double getContactAt(int column)
+ if (maxcol < col)
{
- if (column < 0 || column >= elements[_column].length)
- {
- return -1;
- }
- return elements[_column][column];
+ maxcol = col;
}
- });
- }
-
- @Override
- public float getMin()
- {
- return 0;
- }
-
- @Override
- public float getMax()
- {
- return maxscore;
- }
-
- @Override
- public boolean hasReferenceSeq()
- {
- return (refSeq != null);
- }
+ elements[col - 1][row-1] = escore;
+ }
- @Override
- public SequenceI getReferenceSeq()
- {
- return refSeq;
+ mappedMatrix=new FloatContactMatrix(elements);
}
@Override
public String getAnnotDescr()
{
- return "Predicted Alignment Error"+((refSeq==null) ? "" : (" for " + refSeq.getName()));
+ return "Predicted Alignment Error"
+ + ((refSeq == null) ? "" : (" for " + refSeq.getName()));
}
@Override
public String getAnnotLabel()
{
StringBuilder label = new StringBuilder("PAE Matrix");
- //if (this.getReferenceSeq() != null)
- //{
- // label.append(":").append(this.getReferenceSeq().getDisplayId(false));
- //}
+ // if (this.getReferenceSeq() != null)
+ // {
+ // label.append(":").append(this.getReferenceSeq().getDisplayId(false));
+ // }
return label.toString();
}
return PAEMATRIX;
}
- @Override
- public int getWidth()
- {
- return length;
- }
- @Override
- public int getHeight()
+ public static void validateContactMatrixFile(String fileName)
+ throws FileFormatException, IOException
{
- return length;
- }
- List<BitSet> groups=null;
- @Override
- public boolean hasGroups()
- {
- return groups!=null;
- }
- String newick=null;
- @Override
- public String getNewick()
- {
- return newick;
- }
- @Override
- public boolean hasTree()
- {
- return newick!=null && newick.length()>0;
- }
- boolean abs;
- double thresh;
- String treeType=null;
- public void makeGroups(float thresh,boolean abs)
- {
- AverageDistanceEngine clusterer = new AverageDistanceEngine(null, null, this);
- double height = clusterer.findHeight(clusterer.getTopNode());
- newick = new jalview.io.NewickFile(clusterer.getTopNode(),false,true).print();
- treeType = "UPGMA";
- Console.trace("Newick string\n"+newick);
-
- List<BinaryNode> nodegroups;
- if (abs ? height > thresh : 0 < thresh && thresh < 1)
+ FileInputStream infile = null;
+ try
{
- float cut = abs ? (float) (thresh / height) : thresh;
- Console.debug("Threshold "+cut+" for height="+height);
-
- nodegroups = clusterer.groupNodes(cut);
- }
- else
+ infile = new FileInputStream(new File(fileName));
+ } catch (Throwable t)
{
- nodegroups = new ArrayList<BinaryNode>();
- nodegroups.add(clusterer.getTopNode());
+ new IOException("Couldn't open " + fileName, t);
}
- this.abs=abs;
- this.thresh=thresh;
- groups = new ArrayList<>();
- for (BinaryNode root:nodegroups)
+ JSONObject paeDict = null;
+ try
{
- BitSet gpset=new BitSet();
- for (BinaryNode leaf:clusterer.findLeaves(root))
- {
- gpset.set((Integer)leaf.element());
- }
- groups.add(gpset);
- }
- }
- @Override
- public void updateGroups(List<BitSet> colGroups)
- {
- if (colGroups!=null)
+ paeDict = EBIAlfaFold.parseJSONtoPAEContactMatrix(infile);
+ } catch (Throwable t)
{
- groups=colGroups;
- }
- }
- @Override
- public BitSet getGroupsFor(int column)
- {
- for (BitSet gp:groups) {
- if (gp.get(column))
- {
- return gp;
- }
+ new FileFormatException("Couldn't parse " + fileName
+ + " as a JSON dict or array containing a dict");
}
- return ContactMatrixI.super.getGroupsFor(column);
- }
- HashMap<BitSet,Color> colorMap = new HashMap<>();
- @Override
- public Color getColourForGroup(BitSet bs)
- {
- if (bs==null) {
- return Color.white;
- }
- Color groupCol=colorMap.get(bs);
- if (groupCol==null)
+ PAEContactMatrix matrix = new PAEContactMatrix(
+ new SequenceDummy("Predicted"), (Map<String, Object>) paeDict);
+ if (matrix.getWidth() <= 0)
{
- return Color.white;
+ throw new FileFormatException(
+ "No data in PAE matrix read from '" + fileName + "'");
}
- return groupCol;
- }
- @Override
- public void setColorForGroup(BitSet bs,Color color)
- {
- colorMap.put(bs,color);
- }
- public void restoreGroups(List<BitSet> newgroups, String treeMethod,
- String tree, double thresh2)
- {
- treeType=treeMethod;
- groups = newgroups;
- thresh=thresh2;
- newick =tree;
-
- }
- @Override
- public boolean hasCutHeight() {
- return groups!=null && thresh!=0;
}
@Override
- public double getCutHeight()
+ public boolean equals(Object obj)
{
- return thresh;
+ return super.equals(obj);
}
@Override
- public String getTreeMethod()
+ public int hashCode()
{
- return treeType;
+ return super.hashCode();
}
}