package jalview.ws.datamodel.alphafold;
-import java.awt.Color;
-import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.BitSet;
-import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.Map.Entry;
import org.json.simple.JSONObject;
-import jalview.analysis.AverageDistanceEngine;
-import jalview.bin.Console;
-import jalview.datamodel.Annotation;
-import jalview.datamodel.BinaryNode;
import jalview.datamodel.ContactListI;
import jalview.datamodel.ContactListImpl;
import jalview.datamodel.ContactListProviderI;
import jalview.datamodel.ContactMatrixI;
-import jalview.datamodel.Mapping;
+import jalview.datamodel.FloatContactMatrix;
+import jalview.datamodel.GroupSet;
import jalview.datamodel.SequenceDummy;
import jalview.datamodel.SequenceI;
-import jalview.io.DataSourceType;
import jalview.io.FileFormatException;
-import jalview.io.FileParse;
import jalview.util.MapList;
import jalview.util.MapUtils;
import jalview.ws.dbsources.EBIAlfaFold;
-public class PAEContactMatrix extends MappableContactMatrix<PAEContactMatrix> implements ContactMatrixI
+/**
+ * routines and class for holding predicted alignment error matrices as produced
+ * by alphafold et al.
+ *
+ * getContactList(column) returns the vector of predicted alignment errors for
+ * reference position given by column getElementAt(column, i) returns the
+ * predicted superposition error for the ith position when column is used as
+ * reference
+ *
+ * Many thanks to Ora Schueler Furman for noticing that earlier development
+ * versions did not show the PAE oriented correctly
+ *
+ * @author jprocter
+ *
+ */
+public class PAEContactMatrix extends
+ MappableContactMatrix<PAEContactMatrix> implements ContactMatrixI
{
- int maxrow = 0, maxcol = 0;
-
- float[][] elements;
-
- float maxscore;
@SuppressWarnings("unchecked")
- public PAEContactMatrix(SequenceI _refSeq, Map<String, Object> pae_obj) throws FileFormatException
+ public PAEContactMatrix(SequenceI _refSeq, Map<String, Object> pae_obj)
+ throws FileFormatException
{
setRefSeq(_refSeq);
// convert the lists to primitive arrays and store
*/
public PAEContactMatrix(SequenceI _refSeq, float[][] matrix)
{
+ mappedMatrix=new FloatContactMatrix(matrix);
setRefSeq(_refSeq);
- maxcol = 0;
- for (float[] row : matrix)
- {
- if (row.length > maxcol)
- {
- maxcol = row.length;
- }
- maxscore = row[0];
- for (float f : row)
- {
- if (maxscore < f)
- {
- maxscore = f;
- }
- }
- }
- maxrow = matrix.length;
- elements = matrix;
-
}
/**
* new matrix with specific mapping to a reference sequence
+ *
* @param newRefSeq
* @param newFromMapList
* @param elements2
+ * @param grps2
*/
- public PAEContactMatrix(SequenceI newRefSeq,
- MapList newFromMapList, float[][] elements2)
+ public PAEContactMatrix(SequenceI newRefSeq, MapList newFromMapList,
+ float[][] elements2, GroupSet grps2)
{
- this(newRefSeq,elements2);
+ this(newRefSeq, new FloatContactMatrix(elements2,grps2));
toSeq = newFromMapList;
}
+ public PAEContactMatrix(SequenceI _refSeq,
+ ContactMatrixI floatContactMatrix)
+ {
+ mappedMatrix = floatContactMatrix;
+ setRefSeq(_refSeq);
+ }
+ public PAEContactMatrix(SequenceI _refSeq, MapList newFromMapList,
+ ContactMatrixI floatContactMatrix)
+ {
+ mappedMatrix = floatContactMatrix;
+ setRefSeq(_refSeq);
+ toSeq = newFromMapList;
+ }
+
+ @Override
+ protected PAEContactMatrix newMappableContactMatrix(SequenceI newRefSeq,
+ MapList newFromMapList)
+ {
+ return new PAEContactMatrix(newRefSeq, newFromMapList, mappedMatrix);
+ }
+
/**
- * parse a sane JSON representation of the pAE
+ * parse a sane JSON representation of the pAE and update the mappedMatrix
*
* @param pae_obj
*/
@SuppressWarnings("unchecked")
private void parse_version_2_pAE(Map<String, Object> pae_obj)
{
- maxscore = -1;
+ float maxscore = -1;
// look for a maxscore element - if there is one...
try
{
}
List<List<Long>> scoreRows = ((List<List<Long>>) MapUtils
.getFirst(pae_obj, "predicted_aligned_error", "pae"));
- elements = new float[scoreRows.size()][scoreRows.size()];
+ float[][] elements = new float[scoreRows.size()][scoreRows.size()];
int row = 0, col = 0;
for (List<Long> scoreRow : scoreRows)
{
while (scores.hasNext())
{
Object d = scores.next();
-
if (d instanceof Double)
{
- elements[row][col++] = ((Double) d).longValue();
+ elements[col][row] = ((Double) d).longValue();
}
else
{
- elements[row][col++] = (float) ((Long) d).longValue();
+ elements[col][row] = (float) ((Long) d).longValue();
}
-
- if (maxscore < elements[row][col - 1])
+
+ if (maxscore < elements[col][row])
{
- maxscore = elements[row][col - 1];
+ maxscore = elements[col][row];
}
+ col++;
}
row++;
col = 0;
}
- maxcol = length;
- maxrow = length;
+ mappedMatrix=new FloatContactMatrix(elements);
}
/**
Iterator<Long> rows = ((List<Long>) pae_obj.get("residue1")).iterator();
Iterator<Long> cols = ((List<Long>) pae_obj.get("residue2")).iterator();
// two pass - to allocate the elements array
+
+ int maxrow=-1,maxcol=-1;
while (rows.hasNext())
{
int row = rows.next().intValue();
cols = ((List<Long>) pae_obj.get("residue2")).iterator();
Iterator<Double> scores = ((List<Double>) pae_obj.get("distance"))
.iterator();
- elements = new float[maxrow][maxcol];
+ float[][] elements = new float[maxcol][maxrow];
while (scores.hasNext())
{
float escore = scores.next().floatValue();
{
maxcol = col;
}
- elements[row - 1][col - 1] = escore;
- }
-
- maxscore = ((Double) MapUtils.getFirst(pae_obj,
- "max_predicted_aligned_error", "max_pae")).floatValue();
- }
-
- @Override
- public ContactListI getContactList(final int column)
- {
-// final int _column;
-// if (toSeq != null)
-// {
-// int[] word = toSeq.locateInTo(column, column);
-// if (word == null)
-// {
-// return null;
-// }
-// _column = word[0];
-// }
-// else
-// {
-// _column = column;
-// }
- if (column < 0 || column >= elements.length)
- {
- return null;
+ elements[col - 1][row-1] = escore;
}
- return new ContactListImpl(new ContactListProviderI()
- {
- @Override
- public int getPosition()
- {
- return column;
- }
-
- @Override
- public int getContactHeight()
- {
- return maxcol - 1;
- }
-
- @Override
- public double getContactAt(int mcolumn)
- {
- if (mcolumn < 0 || mcolumn >= elements[column].length)
- {
- return -1;
- }
- return elements[column][mcolumn];
- }
- });
- }
-
- @Override
- protected double getElementAt(int _column, int i)
- {
- return elements[_column][i];
- }
- @Override
- public float getMin()
- {
- return 0;
- }
-
- @Override
- public float getMax()
- {
- return maxscore;
+ mappedMatrix=new FloatContactMatrix(elements);
}
@Override
public String getAnnotDescr()
{
- return "Predicted Alignment Error"+((refSeq==null) ? "" : (" for " + refSeq.getName()));
+ return "Predicted Alignment Error"
+ + ((refSeq == null) ? "" : (" for " + refSeq.getName()));
}
@Override
public String getAnnotLabel()
{
StringBuilder label = new StringBuilder("PAE Matrix");
- //if (this.getReferenceSeq() != null)
- //{
- // label.append(":").append(this.getReferenceSeq().getDisplayId(false));
- //}
+ // if (this.getReferenceSeq() != null)
+ // {
+ // label.append(":").append(this.getReferenceSeq().getDisplayId(false));
+ // }
return label.toString();
}
return PAEMATRIX;
}
- @Override
- public int getWidth()
- {
- return length;
- }
- @Override
- public int getHeight()
- {
- return length;
- }
- List<BitSet> groups=null;
- @Override
- public boolean hasGroups()
+ public static void validateContactMatrixFile(String fileName)
+ throws FileFormatException, IOException
{
- return groups!=null;
- }
- String newick=null;
- @Override
- public String getNewick()
- {
- return newick;
- }
- @Override
- public boolean hasTree()
- {
- return newick!=null && newick.length()>0;
- }
- boolean abs;
- double thresh;
- String treeType=null;
- public void makeGroups(float thresh,boolean abs)
- {
- AverageDistanceEngine clusterer = new AverageDistanceEngine(null, null, this);
- double height = clusterer.findHeight(clusterer.getTopNode());
- newick = new jalview.io.NewickFile(clusterer.getTopNode(),false,true).print();
- treeType = "UPGMA";
- Console.trace("Newick string\n"+newick);
-
- List<BinaryNode> nodegroups;
- if (abs ? height > thresh : 0 < thresh && thresh < 1)
- {
- float cut = abs ? (float) (thresh / height) : thresh;
- Console.debug("Threshold "+cut+" for height="+height);
-
- nodegroups = clusterer.groupNodes(cut);
- }
- else
+ FileInputStream infile = null;
+ try
{
- nodegroups = new ArrayList<BinaryNode>();
- nodegroups.add(clusterer.getTopNode());
- }
- this.abs=abs;
- this.thresh=thresh;
- groups = new ArrayList<>();
- for (BinaryNode root:nodegroups)
+ infile = new FileInputStream(new File(fileName));
+ } catch (Throwable t)
{
- BitSet gpset=new BitSet();
- for (BinaryNode leaf:clusterer.findLeaves(root))
- {
- gpset.set((Integer)leaf.element());
- }
- groups.add(gpset);
+ new IOException("Couldn't open " + fileName, t);
}
- }
- @Override
- public void updateGroups(List<BitSet> colGroups)
- {
- if (colGroups!=null)
+ JSONObject paeDict = null;
+ try
{
- groups=colGroups;
- }
- }
- @Override
- public BitSet getGroupsFor(int column)
- {
- if (groups != null)
+ paeDict = EBIAlfaFold.parseJSONtoPAEContactMatrix(infile);
+ } catch (Throwable t)
{
- for (BitSet gp : groups)
- {
- if (gp.get(column))
- {
- return gp;
- }
- }
+ new FileFormatException("Couldn't parse " + fileName
+ + " as a JSON dict or array containing a dict");
}
- return ContactMatrixI.super.getGroupsFor(column);
- }
- HashMap<BitSet,Color> colorMap = new HashMap<>();
- @Override
- public Color getColourForGroup(BitSet bs)
- {
- if (bs==null) {
- return Color.white;
- }
- Color groupCol=colorMap.get(bs);
- if (groupCol==null)
+ PAEContactMatrix matrix = new PAEContactMatrix(
+ new SequenceDummy("Predicted"), (Map<String, Object>) paeDict);
+ if (matrix.getWidth() <= 0)
{
- return Color.white;
+ throw new FileFormatException(
+ "No data in PAE matrix read from '" + fileName + "'");
}
- return groupCol;
- }
- @Override
- public void setColorForGroup(BitSet bs,Color color)
- {
- colorMap.put(bs,color);
- }
- public void restoreGroups(List<BitSet> newgroups, String treeMethod,
- String tree, double thresh2)
- {
- treeType=treeMethod;
- groups = newgroups;
- thresh=thresh2;
- newick =tree;
-
- }
- @Override
- public boolean hasCutHeight() {
- return groups!=null && thresh!=0;
}
@Override
- public double getCutHeight()
+ public boolean equals(Object obj)
{
- return thresh;
+ return super.equals(obj);
}
@Override
- public String getTreeMethod()
+ public int hashCode()
{
- return treeType;
- }
-
- public static void validateContactMatrixFile(String fileName) throws FileFormatException,IOException
- {
- FileInputStream infile=null;
- try {
- infile = new FileInputStream(new File(fileName));
- } catch (Throwable t)
- {
- new IOException("Couldn't open "+fileName,t);
- }
-
-
- JSONObject paeDict=null;
- try {
- paeDict = EBIAlfaFold.parseJSONtoPAEContactMatrix(infile);
- } catch (Throwable t)
- {
- new FileFormatException("Couldn't parse "+fileName+" as a JSON dict or array containing a dict");
- }
-
- PAEContactMatrix matrix = new PAEContactMatrix(new SequenceDummy("Predicted"), (Map<String,Object>)paeDict);
- if (matrix.getWidth()<=0)
- {
- throw new FileFormatException("No data in PAE matrix read from '"+fileName+"'");
- }
+ return super.hashCode();
}
-
- @Override
- protected PAEContactMatrix newMappableContactMatrix(
- SequenceI newRefSeq, MapList newFromMapList)
- {
- return new PAEContactMatrix(newRefSeq, newFromMapList,
- elements);
- }
}