package jalview.ws.datamodel.alphafold; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.Iterator; import java.util.List; import java.util.Map; import org.json.simple.JSONObject; import jalview.datamodel.ContactListI; import jalview.datamodel.ContactListImpl; import jalview.datamodel.ContactListProviderI; import jalview.datamodel.ContactMatrixI; import jalview.datamodel.FloatContactMatrix; import jalview.datamodel.GroupSet; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceI; import jalview.io.FileFormatException; import jalview.util.MapList; import jalview.util.MapUtils; import jalview.ws.dbsources.EBIAlfaFold; /** * routines and class for holding predicted alignment error matrices as produced * by alphafold et al. * * getContactList(column) returns the vector of predicted alignment errors for * reference position given by column getElementAt(column, i) returns the * predicted superposition error for the ith position when column is used as * reference * * Many thanks to Ora Schueler Furman for noticing that earlier development * versions did not show the PAE oriented correctly * * @author jprocter * */ public class PAEContactMatrix extends MappableContactMatrix implements ContactMatrixI { @SuppressWarnings("unchecked") public PAEContactMatrix(SequenceI _refSeq, Map pae_obj) throws FileFormatException { setRefSeq(_refSeq); // convert the lists to primitive arrays and store if (!MapUtils.containsAKey(pae_obj, "predicted_aligned_error", "pae")) { parse_version_1_pAE(pae_obj); return; } else { parse_version_2_pAE(pae_obj); } } /** * construct a sequence associated PAE matrix directly from a float array * * @param _refSeq * @param matrix */ public PAEContactMatrix(SequenceI _refSeq, float[][] matrix) { mappedMatrix = new FloatContactMatrix(matrix); setRefSeq(_refSeq); } /** * new matrix with specific mapping to a reference sequence * * @param newRefSeq * @param newFromMapList * @param elements2 * @param grps2 */ public PAEContactMatrix(SequenceI newRefSeq, MapList newFromMapList, float[][] elements2, GroupSet grps2) { this(newRefSeq, new FloatContactMatrix(elements2, grps2)); toSeq = newFromMapList; } public PAEContactMatrix(SequenceI _refSeq, ContactMatrixI floatContactMatrix) { mappedMatrix = floatContactMatrix; setRefSeq(_refSeq); } public PAEContactMatrix(SequenceI _refSeq, MapList newFromMapList, ContactMatrixI floatContactMatrix) { mappedMatrix = floatContactMatrix; setRefSeq(_refSeq); toSeq = newFromMapList; } @Override protected PAEContactMatrix newMappableContactMatrix(SequenceI newRefSeq, MapList newFromMapList) { return new PAEContactMatrix(newRefSeq, newFromMapList, mappedMatrix); } /** * parse a sane JSON representation of the pAE and update the mappedMatrix * * @param pae_obj */ @SuppressWarnings("unchecked") private void parse_version_2_pAE(Map pae_obj) { float maxscore = -1; // look for a maxscore element - if there is one... try { // this is never going to be reached by the integer rounding.. or is it ? maxscore = ((Double) MapUtils.getFirst(pae_obj, "max_predicted_aligned_error", "max_pae")).floatValue(); } catch (Throwable t) { // ignore if a key is not found. } List> scoreRows = ((List>) MapUtils .getFirst(pae_obj, "predicted_aligned_error", "pae")); float[][] elements = new float[scoreRows.size()][scoreRows.size()]; int row = 0, col = 0; for (List scoreRow : scoreRows) { Iterator scores = scoreRow.iterator(); while (scores.hasNext()) { Object d = scores.next(); if (d instanceof Double) { elements[col][row] = ((Double) d).longValue(); } else { elements[col][row] = (float) ((Long) d).longValue(); } if (maxscore < elements[col][row]) { maxscore = elements[col][row]; } col++; } row++; col = 0; } mappedMatrix = new FloatContactMatrix(elements); } /** * v1 format got ditched 28th July 2022 see * https://alphafold.ebi.ac.uk/faq#:~:text=We%20updated%20the%20PAE%20JSON%20file%20format%20on%2028th%20July%202022 * * @param pae_obj */ @SuppressWarnings("unchecked") private void parse_version_1_pAE(Map pae_obj) { // assume indices are with respect to range defined by _refSeq on the // dataset refSeq Iterator rows = ((List) pae_obj.get("residue1")).iterator(); Iterator cols = ((List) pae_obj.get("residue2")).iterator(); // two pass - to allocate the elements array int maxrow = -1, maxcol = -1; while (rows.hasNext()) { int row = rows.next().intValue(); int col = cols.next().intValue(); if (maxrow < row) { maxrow = row; } if (maxcol < col) { maxcol = col; } } rows = ((List) pae_obj.get("residue1")).iterator(); cols = ((List) pae_obj.get("residue2")).iterator(); Iterator scores = ((List) pae_obj.get("distance")) .iterator(); float[][] elements = new float[maxcol][maxrow]; while (scores.hasNext()) { float escore = scores.next().floatValue(); int row = rows.next().intValue(); int col = cols.next().intValue(); if (maxrow < row) { maxrow = row; } if (maxcol < col) { maxcol = col; } elements[col - 1][row - 1] = escore; } mappedMatrix = new FloatContactMatrix(elements); } @Override public String getAnnotDescr() { return "Predicted Alignment Error" + ((refSeq == null) ? "" : (" for " + refSeq.getName())); } @Override public String getAnnotLabel() { StringBuilder label = new StringBuilder("PAE Matrix"); // if (this.getReferenceSeq() != null) // { // label.append(":").append(this.getReferenceSeq().getDisplayId(false)); // } return label.toString(); } public static final String PAEMATRIX = "PAE_MATRIX"; @Override public String getType() { return PAEMATRIX; } public static void validateContactMatrixFile(String fileName) throws FileFormatException, IOException { FileInputStream infile = null; try { infile = new FileInputStream(new File(fileName)); } catch (Throwable t) { new IOException("Couldn't open " + fileName, t); } JSONObject paeDict = null; try { paeDict = EBIAlfaFold.parseJSONtoPAEContactMatrix(infile); } catch (Throwable t) { new FileFormatException("Couldn't parse " + fileName + " as a JSON dict or array containing a dict"); } PAEContactMatrix matrix = new PAEContactMatrix( new SequenceDummy("Predicted"), (Map) paeDict); if (matrix.getWidth() <= 0) { throw new FileFormatException( "No data in PAE matrix read from '" + fileName + "'"); } } @Override public boolean equals(Object obj) { return super.equals(obj); } @Override public int hashCode() { return super.hashCode(); } }