1 package jalview.ws.datamodel.alphafold;
4 import java.io.FileInputStream;
5 import java.io.IOException;
6 import java.util.Iterator;
10 import org.json.simple.JSONObject;
12 import jalview.datamodel.ContactListI;
13 import jalview.datamodel.ContactListImpl;
14 import jalview.datamodel.ContactListProviderI;
15 import jalview.datamodel.ContactMatrixI;
16 import jalview.datamodel.FloatContactMatrix;
17 import jalview.datamodel.GroupSet;
18 import jalview.datamodel.SequenceDummy;
19 import jalview.datamodel.SequenceI;
20 import jalview.io.FileFormatException;
21 import jalview.util.MapList;
22 import jalview.util.MapUtils;
23 import jalview.ws.dbsources.EBIAlfaFold;
26 * routines and class for holding predicted alignment error matrices as produced
29 * getContactList(column) returns the vector of predicted alignment errors for
30 * reference position given by column getElementAt(column, i) returns the
31 * predicted superposition error for the ith position when column is used as
34 * Many thanks to Ora Schueler Furman for noticing that earlier development
35 * versions did not show the PAE oriented correctly
40 public class PAEContactMatrix extends
41 MappableContactMatrix<PAEContactMatrix> implements ContactMatrixI
44 @SuppressWarnings("unchecked")
45 public PAEContactMatrix(SequenceI _refSeq, Map<String, Object> pae_obj)
46 throws FileFormatException
49 // convert the lists to primitive arrays and store
51 if (!MapUtils.containsAKey(pae_obj, "predicted_aligned_error", "pae"))
53 parse_version_1_pAE(pae_obj);
58 parse_version_2_pAE(pae_obj);
63 * construct a sequence associated PAE matrix directly from a float array
68 public PAEContactMatrix(SequenceI _refSeq, float[][] matrix)
70 mappedMatrix = new FloatContactMatrix(matrix);
75 * new matrix with specific mapping to a reference sequence
78 * @param newFromMapList
82 public PAEContactMatrix(SequenceI newRefSeq, MapList newFromMapList,
83 float[][] elements2, GroupSet grps2)
85 this(newRefSeq, new FloatContactMatrix(elements2, grps2));
86 toSeq = newFromMapList;
89 public PAEContactMatrix(SequenceI _refSeq,
90 ContactMatrixI floatContactMatrix)
92 mappedMatrix = floatContactMatrix;
96 public PAEContactMatrix(SequenceI _refSeq, MapList newFromMapList,
97 ContactMatrixI floatContactMatrix)
99 mappedMatrix = floatContactMatrix;
101 toSeq = newFromMapList;
105 protected PAEContactMatrix newMappableContactMatrix(SequenceI newRefSeq,
106 MapList newFromMapList)
108 return new PAEContactMatrix(newRefSeq, newFromMapList, mappedMatrix);
112 * parse a sane JSON representation of the pAE and update the mappedMatrix
116 @SuppressWarnings("unchecked")
117 private void parse_version_2_pAE(Map<String, Object> pae_obj)
120 // look for a maxscore element - if there is one...
123 // this is never going to be reached by the integer rounding.. or is it ?
124 maxscore = ((Double) MapUtils.getFirst(pae_obj,
125 "max_predicted_aligned_error", "max_pae")).floatValue();
126 } catch (Throwable t)
128 // ignore if a key is not found.
130 List<List<Long>> scoreRows = ((List<List<Long>>) MapUtils
131 .getFirst(pae_obj, "predicted_aligned_error", "pae"));
132 float[][] elements = new float[scoreRows.size()][scoreRows.size()];
133 int row = 0, col = 0;
134 for (List<Long> scoreRow : scoreRows)
136 Iterator<Long> scores = scoreRow.iterator();
137 while (scores.hasNext())
139 Object d = scores.next();
140 if (d instanceof Double)
142 elements[col][row] = ((Double) d).longValue();
146 elements[col][row] = (float) ((Long) d).longValue();
149 if (maxscore < elements[col][row])
151 maxscore = elements[col][row];
158 mappedMatrix = new FloatContactMatrix(elements);
162 * v1 format got ditched 28th July 2022 see
163 * https://alphafold.ebi.ac.uk/faq#:~:text=We%20updated%20the%20PAE%20JSON%20file%20format%20on%2028th%20July%202022
167 @SuppressWarnings("unchecked")
168 private void parse_version_1_pAE(Map<String, Object> pae_obj)
170 // assume indices are with respect to range defined by _refSeq on the
172 Iterator<Long> rows = ((List<Long>) pae_obj.get("residue1")).iterator();
173 Iterator<Long> cols = ((List<Long>) pae_obj.get("residue2")).iterator();
174 // two pass - to allocate the elements array
176 int maxrow = -1, maxcol = -1;
177 while (rows.hasNext())
179 int row = rows.next().intValue();
180 int col = cols.next().intValue();
191 rows = ((List<Long>) pae_obj.get("residue1")).iterator();
192 cols = ((List<Long>) pae_obj.get("residue2")).iterator();
193 Iterator<Double> scores = ((List<Double>) pae_obj.get("distance"))
195 float[][] elements = new float[maxcol][maxrow];
196 while (scores.hasNext())
198 float escore = scores.next().floatValue();
199 int row = rows.next().intValue();
200 int col = cols.next().intValue();
209 elements[col - 1][row - 1] = escore;
212 mappedMatrix = new FloatContactMatrix(elements);
216 public String getAnnotDescr()
218 return "Predicted Alignment Error"
219 + ((refSeq == null) ? "" : (" for " + refSeq.getName()));
223 public String getAnnotLabel()
225 StringBuilder label = new StringBuilder("PAE Matrix");
226 // if (this.getReferenceSeq() != null)
228 // label.append(":").append(this.getReferenceSeq().getDisplayId(false));
230 return label.toString();
233 public static final String PAEMATRIX = "PAE_MATRIX";
236 public String getType()
241 public static void validateContactMatrixFile(String fileName)
242 throws FileFormatException, IOException
244 FileInputStream infile = null;
247 infile = new FileInputStream(new File(fileName));
248 } catch (Throwable t)
250 new IOException("Couldn't open " + fileName, t);
252 JSONObject paeDict = null;
255 paeDict = EBIAlfaFold.parseJSONtoPAEContactMatrix(infile);
256 } catch (Throwable t)
258 new FileFormatException("Couldn't parse " + fileName
259 + " as a JSON dict or array containing a dict");
262 PAEContactMatrix matrix = new PAEContactMatrix(
263 new SequenceDummy("Predicted"), (Map<String, Object>) paeDict);
264 if (matrix.getWidth() <= 0)
266 throw new FileFormatException(
267 "No data in PAE matrix read from '" + fileName + "'");
272 public boolean equals(Object obj)
274 return super.equals(obj);
278 public int hashCode()
280 return super.hashCode();