package jalview.ws.datamodel.alphafold;
-import java.awt.Color;
-import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.BitSet;
-import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.Map.Entry;
import org.json.simple.JSONObject;
-import jalview.analysis.AverageDistanceEngine;
-import jalview.bin.Console;
-import jalview.datamodel.Annotation;
-import jalview.datamodel.BinaryNode;
import jalview.datamodel.ContactListI;
import jalview.datamodel.ContactListImpl;
import jalview.datamodel.ContactListProviderI;
import jalview.datamodel.ContactMatrixI;
-import jalview.datamodel.Mapping;
+import jalview.datamodel.GroupSet;
import jalview.datamodel.SequenceDummy;
import jalview.datamodel.SequenceI;
-import jalview.io.DataSourceType;
import jalview.io.FileFormatException;
-import jalview.io.FileParse;
import jalview.util.MapList;
import jalview.util.MapUtils;
import jalview.ws.dbsources.EBIAlfaFold;
-public class PAEContactMatrix extends MappableContactMatrix<PAEContactMatrix> implements ContactMatrixI
+/**
+ * routines and class for holding predicted alignment error matrices as produced
+ * by alphafold et al.
+ *
+ * getContactList(column) returns the vector of predicted alignment errors for
+ * reference position given by column getElementAt(column, i) returns the
+ * predicted superposition error for the ith position when column is used as
+ * reference
+ *
+ * Many thanks to Ora Schueler Furman for noticing that earlier development
+ * versions did not show the PAE oriented correctly
+ *
+ * @author jprocter
+ *
+ */
+public class PAEContactMatrix extends
+ MappableContactMatrix<PAEContactMatrix> implements ContactMatrixI
{
+
+
int maxrow = 0, maxcol = 0;
+
float[][] elements;
float maxscore;
@SuppressWarnings("unchecked")
- public PAEContactMatrix(SequenceI _refSeq, Map<String, Object> pae_obj) throws FileFormatException
+ public PAEContactMatrix(SequenceI _refSeq, Map<String, Object> pae_obj)
+ throws FileFormatException
{
setRefSeq(_refSeq);
// convert the lists to primitive arrays and store
/**
* new matrix with specific mapping to a reference sequence
+ *
* @param newRefSeq
* @param newFromMapList
* @param elements2
+ * @param grps2
*/
- public PAEContactMatrix(SequenceI newRefSeq,
- MapList newFromMapList, float[][] elements2)
+ public PAEContactMatrix(SequenceI newRefSeq, MapList newFromMapList,
+ float[][] elements2, GroupSet grps2)
{
- this(newRefSeq,elements2);
+ this(newRefSeq, elements2);
toSeq = newFromMapList;
+ grps = grps2;
}
/**
while (scores.hasNext())
{
Object d = scores.next();
-
if (d instanceof Double)
{
- elements[row][col++] = ((Double) d).longValue();
+ elements[col][row] = ((Double) d).longValue();
}
else
{
- elements[row][col++] = (float) ((Long) d).longValue();
+ elements[col][row] = (float) ((Long) d).longValue();
}
-
- if (maxscore < elements[row][col - 1])
+
+ if (maxscore < elements[col][row])
{
- maxscore = elements[row][col - 1];
+ maxscore = elements[col][row];
}
+ col++;
}
row++;
col = 0;
cols = ((List<Long>) pae_obj.get("residue2")).iterator();
Iterator<Double> scores = ((List<Double>) pae_obj.get("distance"))
.iterator();
- elements = new float[maxrow][maxcol];
+ elements = new float[maxcol][maxrow];
while (scores.hasNext())
{
float escore = scores.next().floatValue();
{
maxcol = col;
}
- elements[row - 1][col - 1] = escore;
+ elements[col - 1][row-1] = escore;
}
maxscore = ((Double) MapUtils.getFirst(pae_obj,
"max_predicted_aligned_error", "max_pae")).floatValue();
}
+ /**
+ * getContactList(column) @returns the vector of predicted alignment errors
+ * for reference position given by column
+ */
@Override
public ContactListI getContactList(final int column)
{
- final int _column;
- if (toSeq != null)
- {
- int[] word = toSeq.locateInTo(column, column);
- if (word == null)
- {
- return null;
- }
- _column = word[0];
- }
- else
- {
- _column = column;
- }
- if (_column < 0 || _column >= elements.length)
+ if (column < 0 || column >= elements.length)
{
return null;
}
@Override
public int getPosition()
{
- return _column;
+ return column;
}
@Override
@Override
public double getContactAt(int mcolumn)
{
- int[] column=(toSeq==null) ? new int[] {mcolumn} : toSeq.locateInTo(mcolumn,mcolumn);
- if (column==null || column[0] < 0 || column[0] >= elements[_column].length)
+ if (mcolumn < 0 || mcolumn >= elements[column].length)
{
return -1;
}
- return elements[_column][column[0]];
+ return elements[column][mcolumn];
}
});
}
+ /**
+ * getElementAt(column, i) @returns the predicted superposition error for the
+ * ith position when column is used as reference
+ */
@Override
protected double getElementAt(int _column, int i)
{
return elements[_column][i];
}
+
@Override
public float getMin()
{
@Override
public String getAnnotDescr()
{
- return "Predicted Alignment Error"+((refSeq==null) ? "" : (" for " + refSeq.getName()));
+ return "Predicted Alignment Error"
+ + ((refSeq == null) ? "" : (" for " + refSeq.getName()));
}
@Override
public String getAnnotLabel()
{
StringBuilder label = new StringBuilder("PAE Matrix");
- //if (this.getReferenceSeq() != null)
- //{
- // label.append(":").append(this.getReferenceSeq().getDisplayId(false));
- //}
+ // if (this.getReferenceSeq() != null)
+ // {
+ // label.append(":").append(this.getReferenceSeq().getDisplayId(false));
+ // }
return label.toString();
}
@Override
public int getWidth()
{
- return length;
+ return maxcol;
}
@Override
public int getHeight()
{
- return length;
- }
- List<BitSet> groups=null;
- @Override
- public boolean hasGroups()
- {
- return groups!=null;
- }
- String newick=null;
- @Override
- public String getNewick()
- {
- return newick;
- }
- @Override
- public boolean hasTree()
- {
- return newick!=null && newick.length()>0;
- }
- boolean abs;
- double thresh;
- String treeType=null;
- public void makeGroups(float thresh,boolean abs)
- {
- AverageDistanceEngine clusterer = new AverageDistanceEngine(null, null, this);
- double height = clusterer.findHeight(clusterer.getTopNode());
- newick = new jalview.io.NewickFile(clusterer.getTopNode(),false,true).print();
- treeType = "UPGMA";
- Console.trace("Newick string\n"+newick);
-
- List<BinaryNode> nodegroups;
- if (abs ? height > thresh : 0 < thresh && thresh < 1)
- {
- float cut = abs ? (float) (thresh / height) : thresh;
- Console.debug("Threshold "+cut+" for height="+height);
-
- nodegroups = clusterer.groupNodes(cut);
- }
- else
- {
- nodegroups = new ArrayList<BinaryNode>();
- nodegroups.add(clusterer.getTopNode());
- }
- this.abs=abs;
- this.thresh=thresh;
- groups = new ArrayList<>();
- for (BinaryNode root:nodegroups)
- {
- BitSet gpset=new BitSet();
- for (BinaryNode leaf:clusterer.findLeaves(root))
- {
- gpset.set((Integer)leaf.element());
- }
- groups.add(gpset);
- }
- }
- @Override
- public void updateGroups(List<BitSet> colGroups)
- {
- if (colGroups!=null)
- {
- groups=colGroups;
- }
+ return maxrow;
}
- @Override
- public BitSet getGroupsFor(int column)
+ public static void validateContactMatrixFile(String fileName)
+ throws FileFormatException, IOException
{
- if (groups != null)
- {
- for (BitSet gp : groups)
- {
- if (gp.get(column))
- {
- return gp;
- }
- }
- }
- return ContactMatrixI.super.getGroupsFor(column);
- }
-
- HashMap<BitSet,Color> colorMap = new HashMap<>();
- @Override
- public Color getColourForGroup(BitSet bs)
- {
- if (bs==null) {
- return Color.white;
- }
- Color groupCol=colorMap.get(bs);
- if (groupCol==null)
+ FileInputStream infile = null;
+ try
{
- return Color.white;
- }
- return groupCol;
- }
- @Override
- public void setColorForGroup(BitSet bs,Color color)
- {
- colorMap.put(bs,color);
- }
- public void restoreGroups(List<BitSet> newgroups, String treeMethod,
- String tree, double thresh2)
- {
- treeType=treeMethod;
- groups = newgroups;
- thresh=thresh2;
- newick =tree;
-
- }
- @Override
- public boolean hasCutHeight() {
- return groups!=null && thresh!=0;
- }
- @Override
- public double getCutHeight()
- {
- return thresh;
- }
- @Override
- public String getTreeMethod()
- {
- return treeType;
- }
-
- public static void validateContactMatrixFile(String fileName) throws FileFormatException,IOException
- {
- FileInputStream infile=null;
- try {
infile = new FileInputStream(new File(fileName));
} catch (Throwable t)
{
- new IOException("Couldn't open "+fileName,t);
+ new IOException("Couldn't open " + fileName, t);
}
-
-
- JSONObject paeDict=null;
- try {
+ JSONObject paeDict = null;
+ try
+ {
paeDict = EBIAlfaFold.parseJSONtoPAEContactMatrix(infile);
} catch (Throwable t)
{
- new FileFormatException("Couldn't parse "+fileName+" as a JSON dict or array containing a dict");
+ new FileFormatException("Couldn't parse " + fileName
+ + " as a JSON dict or array containing a dict");
}
-
- PAEContactMatrix matrix = new PAEContactMatrix(new SequenceDummy("Predicted"), (Map<String,Object>)paeDict);
- if (matrix.getWidth()<=0)
+
+ PAEContactMatrix matrix = new PAEContactMatrix(
+ new SequenceDummy("Predicted"), (Map<String, Object>) paeDict);
+ if (matrix.getWidth() <= 0)
{
- throw new FileFormatException("No data in PAE matrix read from '"+fileName+"'");
+ throw new FileFormatException(
+ "No data in PAE matrix read from '" + fileName + "'");
}
}
-
@Override
- protected PAEContactMatrix newMappableContactMatrix(
- SequenceI newRefSeq, MapList newFromMapList)
+ protected PAEContactMatrix newMappableContactMatrix(SequenceI newRefSeq,
+ MapList newFromMapList)
{
- return new PAEContactMatrix(newRefSeq, newFromMapList,
- elements);
- }
+ PAEContactMatrix pae = new PAEContactMatrix(newRefSeq, newFromMapList,
+ elements, new GroupSet(grps));
+ return pae;
+ }
}