1 package jalview.ws.datamodel.alphafold;
4 import java.io.FileInputStream;
5 import java.io.IOException;
6 import java.util.Iterator;
10 import org.json.simple.JSONObject;
12 import jalview.datamodel.ContactListI;
13 import jalview.datamodel.ContactListImpl;
14 import jalview.datamodel.ContactListProviderI;
15 import jalview.datamodel.ContactMatrixI;
16 import jalview.datamodel.GroupSet;
17 import jalview.datamodel.SequenceDummy;
18 import jalview.datamodel.SequenceI;
19 import jalview.io.FileFormatException;
20 import jalview.util.MapList;
21 import jalview.util.MapUtils;
22 import jalview.ws.dbsources.EBIAlfaFold;
25 * routines and class for holding predicted alignment error matrices as produced
28 * getContactList(column) returns the vector of predicted alignment errors for
29 * reference position given by column getElementAt(column, i) returns the
30 * predicted superposition error for the ith position when column is used as
33 * Many thanks to Ora Schueler Furman for noticing that earlier development
34 * versions did not show the PAE oriented correctly
39 public class PAEContactMatrix extends
40 MappableContactMatrix<PAEContactMatrix> implements ContactMatrixI
44 int maxrow = 0, maxcol = 0;
52 @SuppressWarnings("unchecked")
53 public PAEContactMatrix(SequenceI _refSeq, Map<String, Object> pae_obj)
54 throws FileFormatException
57 // convert the lists to primitive arrays and store
59 if (!MapUtils.containsAKey(pae_obj, "predicted_aligned_error", "pae"))
61 parse_version_1_pAE(pae_obj);
66 parse_version_2_pAE(pae_obj);
71 * construct a sequence associated PAE matrix directly from a float array
76 public PAEContactMatrix(SequenceI _refSeq, float[][] matrix)
80 for (float[] row : matrix)
82 if (row.length > maxcol)
95 maxrow = matrix.length;
101 * new matrix with specific mapping to a reference sequence
104 * @param newFromMapList
108 public PAEContactMatrix(SequenceI newRefSeq, MapList newFromMapList,
109 float[][] elements2, GroupSet grps2)
111 this(newRefSeq, elements2);
112 toSeq = newFromMapList;
117 * parse a sane JSON representation of the pAE
121 @SuppressWarnings("unchecked")
122 private void parse_version_2_pAE(Map<String, Object> pae_obj)
125 // look for a maxscore element - if there is one...
128 // this is never going to be reached by the integer rounding.. or is it ?
129 maxscore = ((Double) MapUtils.getFirst(pae_obj,
130 "max_predicted_aligned_error", "max_pae")).floatValue();
131 } catch (Throwable t)
133 // ignore if a key is not found.
135 List<List<Long>> scoreRows = ((List<List<Long>>) MapUtils
136 .getFirst(pae_obj, "predicted_aligned_error", "pae"));
137 elements = new float[scoreRows.size()][scoreRows.size()];
138 int row = 0, col = 0;
139 for (List<Long> scoreRow : scoreRows)
141 Iterator<Long> scores = scoreRow.iterator();
142 while (scores.hasNext())
144 Object d = scores.next();
145 if (d instanceof Double)
147 elements[col][row] = ((Double) d).longValue();
151 elements[col][row] = (float) ((Long) d).longValue();
154 if (maxscore < elements[col][row])
156 maxscore = elements[col][row];
168 * v1 format got ditched 28th July 2022 see
169 * https://alphafold.ebi.ac.uk/faq#:~:text=We%20updated%20the%20PAE%20JSON%20file%20format%20on%2028th%20July%202022
173 @SuppressWarnings("unchecked")
174 private void parse_version_1_pAE(Map<String, Object> pae_obj)
176 // assume indices are with respect to range defined by _refSeq on the
178 Iterator<Long> rows = ((List<Long>) pae_obj.get("residue1")).iterator();
179 Iterator<Long> cols = ((List<Long>) pae_obj.get("residue2")).iterator();
180 // two pass - to allocate the elements array
181 while (rows.hasNext())
183 int row = rows.next().intValue();
184 int col = cols.next().intValue();
195 rows = ((List<Long>) pae_obj.get("residue1")).iterator();
196 cols = ((List<Long>) pae_obj.get("residue2")).iterator();
197 Iterator<Double> scores = ((List<Double>) pae_obj.get("distance"))
199 elements = new float[maxcol][maxrow];
200 while (scores.hasNext())
202 float escore = scores.next().floatValue();
203 int row = rows.next().intValue();
204 int col = cols.next().intValue();
213 elements[col - 1][row-1] = escore;
216 maxscore = ((Double) MapUtils.getFirst(pae_obj,
217 "max_predicted_aligned_error", "max_pae")).floatValue();
221 * getContactList(column) @returns the vector of predicted alignment errors
222 * for reference position given by column
225 public ContactListI getContactList(final int column)
227 if (column < 0 || column >= elements.length)
232 return new ContactListImpl(new ContactListProviderI()
235 public int getPosition()
241 public int getContactHeight()
247 public double getContactAt(int mcolumn)
249 if (mcolumn < 0 || mcolumn >= elements[column].length)
253 return elements[column][mcolumn];
259 * getElementAt(column, i) @returns the predicted superposition error for the
260 * ith position when column is used as reference
263 protected double getElementAt(int _column, int i)
265 return elements[_column][i];
269 public float getMin()
275 public float getMax()
281 public String getAnnotDescr()
283 return "Predicted Alignment Error"
284 + ((refSeq == null) ? "" : (" for " + refSeq.getName()));
288 public String getAnnotLabel()
290 StringBuilder label = new StringBuilder("PAE Matrix");
291 // if (this.getReferenceSeq() != null)
293 // label.append(":").append(this.getReferenceSeq().getDisplayId(false));
295 return label.toString();
298 public static final String PAEMATRIX = "PAE_MATRIX";
301 public String getType()
307 public int getWidth()
313 public int getHeight()
317 public static void validateContactMatrixFile(String fileName)
318 throws FileFormatException, IOException
320 FileInputStream infile = null;
323 infile = new FileInputStream(new File(fileName));
324 } catch (Throwable t)
326 new IOException("Couldn't open " + fileName, t);
328 JSONObject paeDict = null;
331 paeDict = EBIAlfaFold.parseJSONtoPAEContactMatrix(infile);
332 } catch (Throwable t)
334 new FileFormatException("Couldn't parse " + fileName
335 + " as a JSON dict or array containing a dict");
338 PAEContactMatrix matrix = new PAEContactMatrix(
339 new SequenceDummy("Predicted"), (Map<String, Object>) paeDict);
340 if (matrix.getWidth() <= 0)
342 throw new FileFormatException(
343 "No data in PAE matrix read from '" + fileName + "'");
347 protected PAEContactMatrix newMappableContactMatrix(SequenceI newRefSeq,
348 MapList newFromMapList)
350 PAEContactMatrix pae = new PAEContactMatrix(newRefSeq, newFromMapList,
351 elements, new GroupSet(grps));