1 package jalview.ws.datamodel.alphafold;
4 import java.io.BufferedInputStream;
6 import java.io.FileInputStream;
7 import java.io.IOException;
8 import java.util.ArrayList;
9 import java.util.BitSet;
10 import java.util.HashMap;
11 import java.util.Iterator;
12 import java.util.List;
14 import java.util.Map.Entry;
16 import org.json.simple.JSONObject;
18 import jalview.analysis.AverageDistanceEngine;
19 import jalview.bin.Console;
20 import jalview.datamodel.Annotation;
21 import jalview.datamodel.BinaryNode;
22 import jalview.datamodel.ContactListI;
23 import jalview.datamodel.ContactListImpl;
24 import jalview.datamodel.ContactListProviderI;
25 import jalview.datamodel.ContactMatrixI;
26 import jalview.datamodel.GroupSet;
27 import jalview.datamodel.GroupSetI;
28 import jalview.datamodel.Mapping;
29 import jalview.datamodel.SequenceDummy;
30 import jalview.datamodel.SequenceI;
31 import jalview.io.DataSourceType;
32 import jalview.io.FileFormatException;
33 import jalview.io.FileParse;
34 import jalview.util.MapList;
35 import jalview.util.MapUtils;
36 import jalview.ws.dbsources.EBIAlfaFold;
38 public class PAEContactMatrix extends
39 MappableContactMatrix<PAEContactMatrix> implements ContactMatrixI
41 int maxrow = 0, maxcol = 0;
47 @SuppressWarnings("unchecked")
48 public PAEContactMatrix(SequenceI _refSeq, Map<String, Object> pae_obj)
49 throws FileFormatException
52 // convert the lists to primitive arrays and store
54 if (!MapUtils.containsAKey(pae_obj, "predicted_aligned_error", "pae"))
56 parse_version_1_pAE(pae_obj);
61 parse_version_2_pAE(pae_obj);
66 * construct a sequence associated PAE matrix directly from a float array
71 public PAEContactMatrix(SequenceI _refSeq, float[][] matrix)
75 for (float[] row : matrix)
77 if (row.length > maxcol)
90 maxrow = matrix.length;
96 * new matrix with specific mapping to a reference sequence
99 * @param newFromMapList
103 public PAEContactMatrix(SequenceI newRefSeq, MapList newFromMapList,
104 float[][] elements2, GroupSet grps2)
106 this(newRefSeq, elements2);
107 toSeq = newFromMapList;
112 * parse a sane JSON representation of the pAE
116 @SuppressWarnings("unchecked")
117 private void parse_version_2_pAE(Map<String, Object> pae_obj)
120 // look for a maxscore element - if there is one...
123 // this is never going to be reached by the integer rounding.. or is it ?
124 maxscore = ((Double) MapUtils.getFirst(pae_obj,
125 "max_predicted_aligned_error", "max_pae")).floatValue();
126 } catch (Throwable t)
128 // ignore if a key is not found.
130 List<List<Long>> scoreRows = ((List<List<Long>>) MapUtils
131 .getFirst(pae_obj, "predicted_aligned_error", "pae"));
132 elements = new float[scoreRows.size()][scoreRows.size()];
133 int row = 0, col = 0;
134 for (List<Long> scoreRow : scoreRows)
136 Iterator<Long> scores = scoreRow.iterator();
137 while (scores.hasNext())
139 Object d = scores.next();
141 if (d instanceof Double)
143 elements[row][col++] = ((Double) d).longValue();
147 elements[row][col++] = (float) ((Long) d).longValue();
150 if (maxscore < elements[row][col - 1])
152 maxscore = elements[row][col - 1];
163 * v1 format got ditched 28th July 2022 see
164 * https://alphafold.ebi.ac.uk/faq#:~:text=We%20updated%20the%20PAE%20JSON%20file%20format%20on%2028th%20July%202022
168 @SuppressWarnings("unchecked")
169 private void parse_version_1_pAE(Map<String, Object> pae_obj)
171 // assume indices are with respect to range defined by _refSeq on the
173 Iterator<Long> rows = ((List<Long>) pae_obj.get("residue1")).iterator();
174 Iterator<Long> cols = ((List<Long>) pae_obj.get("residue2")).iterator();
175 // two pass - to allocate the elements array
176 while (rows.hasNext())
178 int row = rows.next().intValue();
179 int col = cols.next().intValue();
190 rows = ((List<Long>) pae_obj.get("residue1")).iterator();
191 cols = ((List<Long>) pae_obj.get("residue2")).iterator();
192 Iterator<Double> scores = ((List<Double>) pae_obj.get("distance"))
194 elements = new float[maxrow][maxcol];
195 while (scores.hasNext())
197 float escore = scores.next().floatValue();
198 int row = rows.next().intValue();
199 int col = cols.next().intValue();
208 elements[row - 1][col - 1] = escore;
211 maxscore = ((Double) MapUtils.getFirst(pae_obj,
212 "max_predicted_aligned_error", "max_pae")).floatValue();
216 public ContactListI getContactList(final int column)
218 // final int _column;
219 // if (toSeq != null)
221 // int[] word = toSeq.locateInTo(column, column);
226 // _column = word[0];
232 if (column < 0 || column >= elements.length)
237 return new ContactListImpl(new ContactListProviderI()
240 public int getPosition()
246 public int getContactHeight()
252 public double getContactAt(int mcolumn)
254 if (mcolumn < 0 || mcolumn >= elements[column].length)
258 return elements[column][mcolumn];
264 protected double getElementAt(int _column, int i)
266 return elements[_column][i];
270 public float getMin()
276 public float getMax()
282 public String getAnnotDescr()
284 return "Predicted Alignment Error"
285 + ((refSeq == null) ? "" : (" for " + refSeq.getName()));
289 public String getAnnotLabel()
291 StringBuilder label = new StringBuilder("PAE Matrix");
292 // if (this.getReferenceSeq() != null)
294 // label.append(":").append(this.getReferenceSeq().getDisplayId(false));
296 return label.toString();
299 public static final String PAEMATRIX = "PAE_MATRIX";
302 public String getType()
308 public int getWidth()
314 public int getHeight()
319 public static void validateContactMatrixFile(String fileName)
320 throws FileFormatException, IOException
322 FileInputStream infile = null;
325 infile = new FileInputStream(new File(fileName));
326 } catch (Throwable t)
328 new IOException("Couldn't open " + fileName, t);
331 JSONObject paeDict = null;
334 paeDict = EBIAlfaFold.parseJSONtoPAEContactMatrix(infile);
335 } catch (Throwable t)
337 new FileFormatException("Couldn't parse " + fileName
338 + " as a JSON dict or array containing a dict");
341 PAEContactMatrix matrix = new PAEContactMatrix(
342 new SequenceDummy("Predicted"), (Map<String, Object>) paeDict);
343 if (matrix.getWidth() <= 0)
345 throw new FileFormatException(
346 "No data in PAE matrix read from '" + fileName + "'");
351 protected PAEContactMatrix newMappableContactMatrix(SequenceI newRefSeq,
352 MapList newFromMapList)
354 PAEContactMatrix pae = new PAEContactMatrix(newRefSeq, newFromMapList,
355 elements, new GroupSet(grps));