X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fdatamodel%2Falphafold%2FPAEContactMatrix.java;h=0ba958455f1b021dd7d8b09fc3601d4f71a4f8a4;hb=cc3341f9d1465b0226d184be4216d022a62e5cee;hp=8b6771e26cb402fae71ec519a92c13739b89aa6e;hpb=a3dae2a49b205b6700dd478739905f9ae7dc38d8;p=jalview.git diff --git a/src/jalview/ws/datamodel/alphafold/PAEContactMatrix.java b/src/jalview/ws/datamodel/alphafold/PAEContactMatrix.java index 8b6771e..0ba9584 100644 --- a/src/jalview/ws/datamodel/alphafold/PAEContactMatrix.java +++ b/src/jalview/ws/datamodel/alphafold/PAEContactMatrix.java @@ -1,46 +1,56 @@ package jalview.ws.datamodel.alphafold; +import java.awt.Color; +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; +import org.json.simple.JSONObject; + +import jalview.analysis.AverageDistanceEngine; +import jalview.bin.Console; +import jalview.datamodel.Annotation; +import jalview.datamodel.BinaryNode; import jalview.datamodel.ContactListI; import jalview.datamodel.ContactListImpl; import jalview.datamodel.ContactListProviderI; import jalview.datamodel.ContactMatrixI; +import jalview.datamodel.GroupSet; +import jalview.datamodel.GroupSetI; +import jalview.datamodel.Mapping; +import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceI; +import jalview.io.DataSourceType; +import jalview.io.FileFormatException; +import jalview.io.FileParse; +import jalview.util.MapList; +import jalview.util.MapUtils; +import jalview.ws.dbsources.EBIAlfaFold; -public class PAEContactMatrix implements ContactMatrixI +public class PAEContactMatrix extends MappableContactMatrix implements ContactMatrixI { - - SequenceI refSeq = null; - - /** - * the length that refSeq is expected to be (excluding gaps, of course) - */ - int length; - int maxrow = 0, maxcol = 0; - int[] indices1, indices2; - float[][] elements; float maxscore; + @SuppressWarnings("unchecked") - public PAEContactMatrix(SequenceI _refSeq, Map pae_obj) - throws Exception + public PAEContactMatrix(SequenceI _refSeq, Map pae_obj) throws FileFormatException { - refSeq = _refSeq; - while (refSeq.getDatasetSequence() != null) - { - refSeq = refSeq.getDatasetSequence(); - } + setRefSeq(_refSeq); // convert the lists to primitive arrays and store - length = _refSeq.getEnd() - _refSeq.getStart() + 1; - if (!pae_obj.containsKey("predicted_aligned_error")) + if (!MapUtils.containsAKey(pae_obj, "predicted_aligned_error", "pae")) { parse_version_1_pAE(pae_obj); return; @@ -52,29 +62,99 @@ public class PAEContactMatrix implements ContactMatrixI } /** + * construct a sequence associated PAE matrix directly from a float array + * + * @param _refSeq + * @param matrix + */ + public PAEContactMatrix(SequenceI _refSeq, float[][] matrix) + { + setRefSeq(_refSeq); + maxcol = 0; + for (float[] row : matrix) + { + if (row.length > maxcol) + { + maxcol = row.length; + } + maxscore = row[0]; + for (float f : row) + { + if (maxscore < f) + { + maxscore = f; + } + } + } + maxrow = matrix.length; + elements = matrix; + + } + + /** + * new matrix with specific mapping to a reference sequence + * @param newRefSeq + * @param newFromMapList + * @param elements2 + * @param grps2 + */ + public PAEContactMatrix(SequenceI newRefSeq, + MapList newFromMapList, float[][] elements2, GroupSet grps2) + { + this(newRefSeq,elements2); + toSeq = newFromMapList; + grps= grps2; + } + + /** * parse a sane JSON representation of the pAE * * @param pae_obj */ + @SuppressWarnings("unchecked") private void parse_version_2_pAE(Map pae_obj) { - elements = new float[length][length]; - // this is never going to be reached by the integer rounding.. or is it ? - maxscore = ((Double) pae_obj.get("max_predicted_aligned_error")) - .floatValue(); - Iterator> scoreRows = ((List>) pae_obj - .get("predicted_aligned_error")).iterator(); + maxscore = -1; + // look for a maxscore element - if there is one... + try + { + // this is never going to be reached by the integer rounding.. or is it ? + maxscore = ((Double) MapUtils.getFirst(pae_obj, + "max_predicted_aligned_error", "max_pae")).floatValue(); + } catch (Throwable t) + { + // ignore if a key is not found. + } + List> scoreRows = ((List>) MapUtils + .getFirst(pae_obj, "predicted_aligned_error", "pae")); + elements = new float[scoreRows.size()][scoreRows.size()]; int row = 0, col = 0; - while (scoreRows.hasNext()) + for (List scoreRow : scoreRows) { - Iterator scores = scoreRows.next().iterator(); + Iterator scores = scoreRow.iterator(); while (scores.hasNext()) { - elements[row][col++] = scores.next(); + Object d = scores.next(); + + if (d instanceof Double) + { + elements[row][col++] = ((Double) d).longValue(); + } + else + { + elements[row][col++] = (float) ((Long) d).longValue(); + } + + if (maxscore < elements[row][col - 1]) + { + maxscore = elements[row][col - 1]; + } } row++; col = 0; } + maxcol = length; + maxrow = length; } /** @@ -83,16 +163,33 @@ public class PAEContactMatrix implements ContactMatrixI * * @param pae_obj */ + @SuppressWarnings("unchecked") private void parse_version_1_pAE(Map pae_obj) { // assume indices are with respect to range defined by _refSeq on the // dataset refSeq Iterator rows = ((List) pae_obj.get("residue1")).iterator(); Iterator cols = ((List) pae_obj.get("residue2")).iterator(); + // two pass - to allocate the elements array + while (rows.hasNext()) + { + int row = rows.next().intValue(); + int col = cols.next().intValue(); + if (maxrow < row) + { + maxrow = row; + } + if (maxcol < col) + { + maxcol = col; + } + + } + rows = ((List) pae_obj.get("residue1")).iterator(); + cols = ((List) pae_obj.get("residue2")).iterator(); Iterator scores = ((List) pae_obj.get("distance")) .iterator(); - - elements = new float[length][length]; + elements = new float[maxrow][maxcol]; while (scores.hasNext()) { float escore = scores.next().floatValue(); @@ -109,14 +206,28 @@ public class PAEContactMatrix implements ContactMatrixI elements[row - 1][col - 1] = escore; } - maxscore = ((Double) pae_obj.get("max_predicted_aligned_error")) - .floatValue(); + maxscore = ((Double) MapUtils.getFirst(pae_obj, + "max_predicted_aligned_error", "max_pae")).floatValue(); } @Override - public ContactListI getContactList(final int _column) + public ContactListI getContactList(final int column) { - if (_column < 0 || _column >= elements.length) +// final int _column; +// if (toSeq != null) +// { +// int[] word = toSeq.locateInTo(column, column); +// if (word == null) +// { +// return null; +// } +// _column = word[0]; +// } +// else +// { +// _column = column; +// } + if (column < 0 || column >= elements.length) { return null; } @@ -124,25 +235,35 @@ public class PAEContactMatrix implements ContactMatrixI return new ContactListImpl(new ContactListProviderI() { @Override + public int getPosition() + { + return column; + } + + @Override public int getContactHeight() { return maxcol - 1; } @Override - public double getContactAt(int column) + public double getContactAt(int mcolumn) { - if (column < 0 || column >= elements[_column].length) + if (mcolumn < 0 || mcolumn >= elements[column].length) { return -1; } - // TODO Auto-generated method stub - return elements[_column][column]; + return elements[column][mcolumn]; } }); } @Override + protected double getElementAt(int _column, int i) + { + return elements[_column][i]; + } + @Override public float getMin() { return 0; @@ -155,15 +276,74 @@ public class PAEContactMatrix implements ContactMatrixI } @Override - public boolean hasReferenceSeq() + public String getAnnotDescr() { - return (refSeq != null); + return "Predicted Alignment Error"+((refSeq==null) ? "" : (" for " + refSeq.getName())); } @Override - public SequenceI getReferenceSeq() + public String getAnnotLabel() { - return refSeq; + StringBuilder label = new StringBuilder("PAE Matrix"); + //if (this.getReferenceSeq() != null) + //{ + // label.append(":").append(this.getReferenceSeq().getDisplayId(false)); + //} + return label.toString(); } + public static final String PAEMATRIX = "PAE_MATRIX"; + + @Override + public String getType() + { + return PAEMATRIX; + } + + @Override + public int getWidth() + { + return length; + } + + @Override + public int getHeight() + { + return length; + } + + public static void validateContactMatrixFile(String fileName) throws FileFormatException,IOException + { + FileInputStream infile=null; + try { + infile = new FileInputStream(new File(fileName)); + } catch (Throwable t) + { + new IOException("Couldn't open "+fileName,t); + } + + + JSONObject paeDict=null; + try { + paeDict = EBIAlfaFold.parseJSONtoPAEContactMatrix(infile); + } catch (Throwable t) + { + new FileFormatException("Couldn't parse "+fileName+" as a JSON dict or array containing a dict"); + } + + PAEContactMatrix matrix = new PAEContactMatrix(new SequenceDummy("Predicted"), (Map)paeDict); + if (matrix.getWidth()<=0) + { + throw new FileFormatException("No data in PAE matrix read from '"+fileName+"'"); + } + } + + @Override + protected PAEContactMatrix newMappableContactMatrix( + SequenceI newRefSeq, MapList newFromMapList) + { + PAEContactMatrix pae=new PAEContactMatrix(newRefSeq, newFromMapList, + elements, new GroupSet(grps)); + return pae; + } }