1 package jalview.ws.datamodel.alphafold;
4 import java.io.BufferedInputStream;
6 import java.io.FileInputStream;
7 import java.io.IOException;
8 import java.util.ArrayList;
9 import java.util.BitSet;
10 import java.util.HashMap;
11 import java.util.Iterator;
12 import java.util.List;
14 import java.util.Map.Entry;
16 import org.json.simple.JSONObject;
18 import jalview.analysis.AverageDistanceEngine;
19 import jalview.bin.Console;
20 import jalview.datamodel.Annotation;
21 import jalview.datamodel.BinaryNode;
22 import jalview.datamodel.ContactListI;
23 import jalview.datamodel.ContactListImpl;
24 import jalview.datamodel.ContactListProviderI;
25 import jalview.datamodel.ContactMatrixI;
26 import jalview.datamodel.Mapping;
27 import jalview.datamodel.SequenceDummy;
28 import jalview.datamodel.SequenceI;
29 import jalview.io.DataSourceType;
30 import jalview.io.FileFormatException;
31 import jalview.io.FileParse;
32 import jalview.util.MapList;
33 import jalview.util.MapUtils;
34 import jalview.ws.dbsources.EBIAlfaFold;
36 public class PAEContactMatrix extends MappableContactMatrix<PAEContactMatrix> implements ContactMatrixI
38 int maxrow = 0, maxcol = 0;
45 @SuppressWarnings("unchecked")
46 public PAEContactMatrix(SequenceI _refSeq, Map<String, Object> pae_obj) throws FileFormatException
49 // convert the lists to primitive arrays and store
51 if (!MapUtils.containsAKey(pae_obj, "predicted_aligned_error", "pae"))
53 parse_version_1_pAE(pae_obj);
58 parse_version_2_pAE(pae_obj);
63 * construct a sequence associated PAE matrix directly from a float array
68 public PAEContactMatrix(SequenceI _refSeq, float[][] matrix)
72 for (float[] row : matrix)
74 if (row.length > maxcol)
87 maxrow = matrix.length;
93 * new matrix with specific mapping to a reference sequence
95 * @param newFromMapList
98 public PAEContactMatrix(SequenceI newRefSeq,
99 MapList newFromMapList, float[][] elements2)
101 this(newRefSeq,elements2);
102 toSeq = newFromMapList;
106 * parse a sane JSON representation of the pAE
110 @SuppressWarnings("unchecked")
111 private void parse_version_2_pAE(Map<String, Object> pae_obj)
114 // look for a maxscore element - if there is one...
117 // this is never going to be reached by the integer rounding.. or is it ?
118 maxscore = ((Double) MapUtils.getFirst(pae_obj,
119 "max_predicted_aligned_error", "max_pae")).floatValue();
120 } catch (Throwable t)
122 // ignore if a key is not found.
124 List<List<Long>> scoreRows = ((List<List<Long>>) MapUtils
125 .getFirst(pae_obj, "predicted_aligned_error", "pae"));
126 elements = new float[scoreRows.size()][scoreRows.size()];
127 int row = 0, col = 0;
128 for (List<Long> scoreRow : scoreRows)
130 Iterator<Long> scores = scoreRow.iterator();
131 while (scores.hasNext())
133 Object d = scores.next();
135 if (d instanceof Double)
137 elements[row][col++] = ((Double) d).longValue();
141 elements[row][col++] = (float) ((Long) d).longValue();
144 if (maxscore < elements[row][col - 1])
146 maxscore = elements[row][col - 1];
157 * v1 format got ditched 28th July 2022 see
158 * https://alphafold.ebi.ac.uk/faq#:~:text=We%20updated%20the%20PAE%20JSON%20file%20format%20on%2028th%20July%202022
162 @SuppressWarnings("unchecked")
163 private void parse_version_1_pAE(Map<String, Object> pae_obj)
165 // assume indices are with respect to range defined by _refSeq on the
167 Iterator<Long> rows = ((List<Long>) pae_obj.get("residue1")).iterator();
168 Iterator<Long> cols = ((List<Long>) pae_obj.get("residue2")).iterator();
169 // two pass - to allocate the elements array
170 while (rows.hasNext())
172 int row = rows.next().intValue();
173 int col = cols.next().intValue();
184 rows = ((List<Long>) pae_obj.get("residue1")).iterator();
185 cols = ((List<Long>) pae_obj.get("residue2")).iterator();
186 Iterator<Double> scores = ((List<Double>) pae_obj.get("distance"))
188 elements = new float[maxrow][maxcol];
189 while (scores.hasNext())
191 float escore = scores.next().floatValue();
192 int row = rows.next().intValue();
193 int col = cols.next().intValue();
202 elements[row - 1][col - 1] = escore;
205 maxscore = ((Double) MapUtils.getFirst(pae_obj,
206 "max_predicted_aligned_error", "max_pae")).floatValue();
210 public ContactListI getContactList(final int column)
215 int[] word = toSeq.locateInTo(column, column);
226 if (_column < 0 || _column >= elements.length)
231 return new ContactListImpl(new ContactListProviderI()
234 public int getPosition()
240 public int getContactHeight()
246 public double getContactAt(int mcolumn)
248 int[] column=(toSeq==null) ? new int[] {mcolumn} : toSeq.locateInTo(mcolumn,mcolumn);
249 if (column==null || column[0] < 0 || column[0] >= elements[_column].length)
253 return elements[_column][column[0]];
259 protected double getElementAt(int _column, int i)
261 return elements[_column][i];
264 public float getMin()
270 public float getMax()
276 public String getAnnotDescr()
278 return "Predicted Alignment Error"+((refSeq==null) ? "" : (" for " + refSeq.getName()));
282 public String getAnnotLabel()
284 StringBuilder label = new StringBuilder("PAE Matrix");
285 //if (this.getReferenceSeq() != null)
287 // label.append(":").append(this.getReferenceSeq().getDisplayId(false));
289 return label.toString();
292 public static final String PAEMATRIX = "PAE_MATRIX";
295 public String getType()
301 public int getWidth()
307 public int getHeight()
311 List<BitSet> groups=null;
313 public boolean hasGroups()
319 public String getNewick()
324 public boolean hasTree()
326 return newick!=null && newick.length()>0;
330 String treeType=null;
331 public void makeGroups(float thresh,boolean abs)
333 AverageDistanceEngine clusterer = new AverageDistanceEngine(null, null, this);
334 double height = clusterer.findHeight(clusterer.getTopNode());
335 newick = new jalview.io.NewickFile(clusterer.getTopNode(),false,true).print();
337 Console.trace("Newick string\n"+newick);
339 List<BinaryNode> nodegroups;
340 if (abs ? height > thresh : 0 < thresh && thresh < 1)
342 float cut = abs ? (float) (thresh / height) : thresh;
343 Console.debug("Threshold "+cut+" for height="+height);
345 nodegroups = clusterer.groupNodes(cut);
349 nodegroups = new ArrayList<BinaryNode>();
350 nodegroups.add(clusterer.getTopNode());
354 groups = new ArrayList<>();
355 for (BinaryNode root:nodegroups)
357 BitSet gpset=new BitSet();
358 for (BinaryNode leaf:clusterer.findLeaves(root))
360 gpset.set((Integer)leaf.element());
366 public void updateGroups(List<BitSet> colGroups)
374 public BitSet getGroupsFor(int column)
378 for (BitSet gp : groups)
386 return ContactMatrixI.super.getGroupsFor(column);
389 HashMap<BitSet,Color> colorMap = new HashMap<>();
391 public Color getColourForGroup(BitSet bs)
396 Color groupCol=colorMap.get(bs);
404 public void setColorForGroup(BitSet bs,Color color)
406 colorMap.put(bs,color);
408 public void restoreGroups(List<BitSet> newgroups, String treeMethod,
409 String tree, double thresh2)
418 public boolean hasCutHeight() {
419 return groups!=null && thresh!=0;
422 public double getCutHeight()
427 public String getTreeMethod()
432 public static void validateContactMatrixFile(String fileName) throws FileFormatException,IOException
434 FileInputStream infile=null;
436 infile = new FileInputStream(new File(fileName));
437 } catch (Throwable t)
439 new IOException("Couldn't open "+fileName,t);
443 JSONObject paeDict=null;
445 paeDict = EBIAlfaFold.parseJSONtoPAEContactMatrix(infile);
446 } catch (Throwable t)
448 new FileFormatException("Couldn't parse "+fileName+" as a JSON dict or array containing a dict");
451 PAEContactMatrix matrix = new PAEContactMatrix(new SequenceDummy("Predicted"), (Map<String,Object>)paeDict);
452 if (matrix.getWidth()<=0)
454 throw new FileFormatException("No data in PAE matrix read from '"+fileName+"'");
459 protected PAEContactMatrix newMappableContactMatrix(
460 SequenceI newRefSeq, MapList newFromMapList)
462 return new PAEContactMatrix(newRefSeq, newFromMapList,