X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=inline;f=src%2Fjalview%2Fws%2Fdatamodel%2Falphafold%2FPAEContactMatrix.java;h=22884f1608c8f7c4b6f17702971b75c9d0529f2e;hb=8a2a0cda7067530f8481c2aec203e18d555f2dfd;hp=d1a2e9daf81c4f40804782be8b82d216b3eebf4c;hpb=8fce85002e6f6a5957dc95662c6a3d80fcce8e82;p=jalview.git diff --git a/src/jalview/ws/datamodel/alphafold/PAEContactMatrix.java b/src/jalview/ws/datamodel/alphafold/PAEContactMatrix.java index d1a2e9d..22884f1 100644 --- a/src/jalview/ws/datamodel/alphafold/PAEContactMatrix.java +++ b/src/jalview/ws/datamodel/alphafold/PAEContactMatrix.java @@ -1,46 +1,57 @@ package jalview.ws.datamodel.alphafold; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; import java.util.Iterator; import java.util.List; import java.util.Map; +import org.json.simple.JSONObject; + import jalview.datamodel.ContactListI; import jalview.datamodel.ContactListImpl; import jalview.datamodel.ContactListProviderI; import jalview.datamodel.ContactMatrixI; +import jalview.datamodel.GroupSet; +import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceI; +import jalview.io.FileFormatException; +import jalview.util.MapList; import jalview.util.MapUtils; +import jalview.ws.dbsources.EBIAlfaFold; -public class PAEContactMatrix implements ContactMatrixI +/** + * routines and class for holding predicted alignment error matrices as produced + * by alphafold et al. + * + * getContactList(column) returns the vector of predicted alignment errors for + * reference position given by column getElementAt(column, i) returns the + * predicted superposition error for the ith position when column is used as + * reference + * + * Many thanks to Ora Schueler Furman for noticing that earlier development + * versions did not show the PAE oriented correctly + * + * @author jprocter + * + */ +public class PAEContactMatrix extends + MappableContactMatrix implements ContactMatrixI { - SequenceI refSeq = null; - - /** - * the length that refSeq is expected to be (excluding gaps, of course) - */ - int length; int maxrow = 0, maxcol = 0; - int[] indices1, indices2; float[][] elements; float maxscore; - private void setRefSeq(SequenceI _refSeq) - { - refSeq = _refSeq; - while (refSeq.getDatasetSequence() != null) - { - refSeq = refSeq.getDatasetSequence(); - } - length = _refSeq.getEnd() - _refSeq.getStart() + 1; - } @SuppressWarnings("unchecked") public PAEContactMatrix(SequenceI _refSeq, Map pae_obj) + throws FileFormatException { setRefSeq(_refSeq); // convert the lists to primitive arrays and store @@ -87,6 +98,22 @@ public class PAEContactMatrix implements ContactMatrixI } /** + * new matrix with specific mapping to a reference sequence + * + * @param newRefSeq + * @param newFromMapList + * @param elements2 + * @param grps2 + */ + public PAEContactMatrix(SequenceI newRefSeq, MapList newFromMapList, + float[][] elements2, GroupSet grps2) + { + this(newRefSeq, elements2); + toSeq = newFromMapList; + grps = grps2; + } + + /** * parse a sane JSON representation of the pAE * * @param pae_obj @@ -94,9 +121,17 @@ public class PAEContactMatrix implements ContactMatrixI @SuppressWarnings("unchecked") private void parse_version_2_pAE(Map pae_obj) { - // this is never going to be reached by the integer rounding.. or is it ? - maxscore = ((Double) MapUtils.getFirst(pae_obj, - "max_predicted_aligned_error", "max_pae")).floatValue(); + maxscore = -1; + // look for a maxscore element - if there is one... + try + { + // this is never going to be reached by the integer rounding.. or is it ? + maxscore = ((Double) MapUtils.getFirst(pae_obj, + "max_predicted_aligned_error", "max_pae")).floatValue(); + } catch (Throwable t) + { + // ignore if a key is not found. + } List> scoreRows = ((List>) MapUtils .getFirst(pae_obj, "predicted_aligned_error", "pae")); elements = new float[scoreRows.size()][scoreRows.size()]; @@ -108,9 +143,19 @@ public class PAEContactMatrix implements ContactMatrixI { Object d = scores.next(); if (d instanceof Double) - elements[row][col++] = ((Double) d).longValue(); + { + elements[col][row] = ((Double) d).longValue(); + } else - elements[row][col++] = (float) ((Long) d).longValue(); + { + elements[col][row] = (float) ((Long) d).longValue(); + } + + if (maxscore < elements[col][row]) + { + maxscore = elements[col][row]; + } + col++; } row++; col = 0; @@ -132,10 +177,26 @@ public class PAEContactMatrix implements ContactMatrixI // dataset refSeq Iterator rows = ((List) pae_obj.get("residue1")).iterator(); Iterator cols = ((List) pae_obj.get("residue2")).iterator(); + // two pass - to allocate the elements array + while (rows.hasNext()) + { + int row = rows.next().intValue(); + int col = cols.next().intValue(); + if (maxrow < row) + { + maxrow = row; + } + if (maxcol < col) + { + maxcol = col; + } + + } + rows = ((List) pae_obj.get("residue1")).iterator(); + cols = ((List) pae_obj.get("residue2")).iterator(); Iterator scores = ((List) pae_obj.get("distance")) .iterator(); - // assume square matrix - elements = new float[length][length]; + elements = new float[maxcol][maxrow]; while (scores.hasNext()) { float escore = scores.next().floatValue(); @@ -149,17 +210,21 @@ public class PAEContactMatrix implements ContactMatrixI { maxcol = col; } - elements[row - 1][col - 1] = escore; + elements[col - 1][row-1] = escore; } maxscore = ((Double) MapUtils.getFirst(pae_obj, "max_predicted_aligned_error", "max_pae")).floatValue(); } + /** + * getContactList(column) @returns the vector of predicted alignment errors + * for reference position given by column + */ @Override - public ContactListI getContactList(final int _column) + public ContactListI getContactList(final int column) { - if (_column < 0 || _column >= elements.length) + if (column < 0 || column >= elements.length) { return null; } @@ -169,7 +234,7 @@ public class PAEContactMatrix implements ContactMatrixI @Override public int getPosition() { - return _column; + return column; } @Override @@ -179,53 +244,54 @@ public class PAEContactMatrix implements ContactMatrixI } @Override - public double getContactAt(int column) + public double getContactAt(int mcolumn) { - if (column < 0 || column >= elements[_column].length) + if (mcolumn < 0 || mcolumn >= elements[column].length) { return -1; } - return elements[_column][column]; + return elements[column][mcolumn]; } }); } + /** + * getElementAt(column, i) @returns the predicted superposition error for the + * ith position when column is used as reference + */ @Override - public float getMin() - { - return 0; - } - - @Override - public float getMax() + protected double getElementAt(int _column, int i) { - return maxscore; + return elements[_column][i]; } @Override - public boolean hasReferenceSeq() + public float getMin() { - return (refSeq != null); + return 0; } @Override - public SequenceI getReferenceSeq() + public float getMax() { - return refSeq; + return maxscore; } @Override public String getAnnotDescr() { - return "Predicted Alignment Error for " + refSeq.getName(); + return "Predicted Alignment Error" + + ((refSeq == null) ? "" : (" for " + refSeq.getName())); } @Override public String getAnnotLabel() { - StringBuilder label = new StringBuilder("pAE Matrix"); - if (this.getReferenceSeq() != null) - label.append(":").append(this.getReferenceSeq().getDisplayId(false)); + StringBuilder label = new StringBuilder("PAE Matrix"); + // if (this.getReferenceSeq() != null) + // { + // label.append(":").append(this.getReferenceSeq().getDisplayId(false)); + // } return label.toString(); } @@ -240,12 +306,49 @@ public class PAEContactMatrix implements ContactMatrixI @Override public int getWidth() { - return length; + return maxcol; } @Override public int getHeight() { - return length; + return maxrow; + } + public static void validateContactMatrixFile(String fileName) + throws FileFormatException, IOException + { + FileInputStream infile = null; + try + { + infile = new FileInputStream(new File(fileName)); + } catch (Throwable t) + { + new IOException("Couldn't open " + fileName, t); + } + JSONObject paeDict = null; + try + { + paeDict = EBIAlfaFold.parseJSONtoPAEContactMatrix(infile); + } catch (Throwable t) + { + new FileFormatException("Couldn't parse " + fileName + + " as a JSON dict or array containing a dict"); + } + + PAEContactMatrix matrix = new PAEContactMatrix( + new SequenceDummy("Predicted"), (Map) paeDict); + if (matrix.getWidth() <= 0) + { + throw new FileFormatException( + "No data in PAE matrix read from '" + fileName + "'"); + } + } + @Override + protected PAEContactMatrix newMappableContactMatrix(SequenceI newRefSeq, + MapList newFromMapList) + { + PAEContactMatrix pae = new PAEContactMatrix(newRefSeq, newFromMapList, + elements, new GroupSet(grps)); + return pae; } }