1 package jalview.ws.datamodel.alphafold;
4 import java.io.FileInputStream;
5 import java.io.IOException;
6 import java.util.Iterator;
10 import org.json.simple.JSONObject;
12 import jalview.datamodel.ContactListI;
13 import jalview.datamodel.ContactListImpl;
14 import jalview.datamodel.ContactListProviderI;
15 import jalview.datamodel.ContactMatrixI;
16 import jalview.datamodel.GroupSet;
17 import jalview.datamodel.SequenceDummy;
18 import jalview.datamodel.SequenceI;
19 import jalview.io.FileFormatException;
20 import jalview.util.MapList;
21 import jalview.util.MapUtils;
22 import jalview.ws.dbsources.EBIAlfaFold;
24 public class PAEContactMatrix extends
25 MappableContactMatrix<PAEContactMatrix> implements ContactMatrixI
29 int maxrow = 0, maxcol = 0;
37 @SuppressWarnings("unchecked")
38 public PAEContactMatrix(SequenceI _refSeq, Map<String, Object> pae_obj)
39 throws FileFormatException
42 // convert the lists to primitive arrays and store
44 if (!MapUtils.containsAKey(pae_obj, "predicted_aligned_error", "pae"))
46 parse_version_1_pAE(pae_obj);
51 parse_version_2_pAE(pae_obj);
56 * construct a sequence associated PAE matrix directly from a float array
61 public PAEContactMatrix(SequenceI _refSeq, float[][] matrix)
65 for (float[] row : matrix)
67 if (row.length > maxcol)
80 maxrow = matrix.length;
86 * new matrix with specific mapping to a reference sequence
89 * @param newFromMapList
93 public PAEContactMatrix(SequenceI newRefSeq, MapList newFromMapList,
94 float[][] elements2, GroupSet grps2)
96 this(newRefSeq, elements2);
97 toSeq = newFromMapList;
102 * parse a sane JSON representation of the pAE
106 @SuppressWarnings("unchecked")
107 private void parse_version_2_pAE(Map<String, Object> pae_obj)
110 // look for a maxscore element - if there is one...
113 // this is never going to be reached by the integer rounding.. or is it ?
114 maxscore = ((Double) MapUtils.getFirst(pae_obj,
115 "max_predicted_aligned_error", "max_pae")).floatValue();
116 } catch (Throwable t)
118 // ignore if a key is not found.
120 List<List<Long>> scoreRows = ((List<List<Long>>) MapUtils
121 .getFirst(pae_obj, "predicted_aligned_error", "pae"));
122 elements = new float[scoreRows.size()][scoreRows.size()];
123 int row = 0, col = 0;
124 for (List<Long> scoreRow : scoreRows)
126 Iterator<Long> scores = scoreRow.iterator();
127 while (scores.hasNext())
129 Object d = scores.next();
130 if (d instanceof Double)
132 elements[row][col++] = ((Double) d).longValue();
136 elements[row][col++] = (float) ((Long) d).longValue();
139 if (maxscore < elements[row][col - 1])
141 maxscore = elements[row][col - 1];
152 * v1 format got ditched 28th July 2022 see
153 * https://alphafold.ebi.ac.uk/faq#:~:text=We%20updated%20the%20PAE%20JSON%20file%20format%20on%2028th%20July%202022
157 @SuppressWarnings("unchecked")
158 private void parse_version_1_pAE(Map<String, Object> pae_obj)
160 // assume indices are with respect to range defined by _refSeq on the
162 Iterator<Long> rows = ((List<Long>) pae_obj.get("residue1")).iterator();
163 Iterator<Long> cols = ((List<Long>) pae_obj.get("residue2")).iterator();
164 // two pass - to allocate the elements array
165 while (rows.hasNext())
167 int row = rows.next().intValue();
168 int col = cols.next().intValue();
179 rows = ((List<Long>) pae_obj.get("residue1")).iterator();
180 cols = ((List<Long>) pae_obj.get("residue2")).iterator();
181 Iterator<Double> scores = ((List<Double>) pae_obj.get("distance"))
183 elements = new float[maxrow][maxcol];
184 while (scores.hasNext())
186 float escore = scores.next().floatValue();
187 int row = rows.next().intValue();
188 int col = cols.next().intValue();
197 elements[row - 1][col - 1] = escore;
200 maxscore = ((Double) MapUtils.getFirst(pae_obj,
201 "max_predicted_aligned_error", "max_pae")).floatValue();
205 public ContactListI getContactList(final int column)
207 if (column < 0 || column >= elements.length)
212 return new ContactListImpl(new ContactListProviderI()
215 public int getPosition()
221 public int getContactHeight()
227 public double getContactAt(int mcolumn)
229 if (mcolumn < 0 || mcolumn >= elements[column].length)
233 return elements[column][mcolumn];
239 protected double getElementAt(int _column, int i)
241 return elements[_column][i];
245 public float getMin()
251 public float getMax()
257 public String getAnnotDescr()
259 return "Predicted Alignment Error"
260 + ((refSeq == null) ? "" : (" for " + refSeq.getName()));
264 public String getAnnotLabel()
266 StringBuilder label = new StringBuilder("PAE Matrix");
267 // if (this.getReferenceSeq() != null)
269 // label.append(":").append(this.getReferenceSeq().getDisplayId(false));
271 return label.toString();
274 public static final String PAEMATRIX = "PAE_MATRIX";
277 public String getType()
283 public int getWidth()
289 public int getHeight()
293 public static void validateContactMatrixFile(String fileName)
294 throws FileFormatException, IOException
296 FileInputStream infile = null;
299 infile = new FileInputStream(new File(fileName));
300 } catch (Throwable t)
302 new IOException("Couldn't open " + fileName, t);
304 JSONObject paeDict = null;
307 paeDict = EBIAlfaFold.parseJSONtoPAEContactMatrix(infile);
308 } catch (Throwable t)
310 new FileFormatException("Couldn't parse " + fileName
311 + " as a JSON dict or array containing a dict");
314 PAEContactMatrix matrix = new PAEContactMatrix(
315 new SequenceDummy("Predicted"), (Map<String, Object>) paeDict);
316 if (matrix.getWidth() <= 0)
318 throw new FileFormatException(
319 "No data in PAE matrix read from '" + fileName + "'");
323 protected PAEContactMatrix newMappableContactMatrix(SequenceI newRefSeq,
324 MapList newFromMapList)
326 PAEContactMatrix pae = new PAEContactMatrix(newRefSeq, newFromMapList,
327 elements, new GroupSet(grps));