JAL-2375 JAL-2376 First commit of implementation for Phyre2 result browsing, template...
[jalview.git] / src / jalview / ws / phyre2 / Phyre2Client.java
diff --git a/src/jalview/ws/phyre2/Phyre2Client.java b/src/jalview/ws/phyre2/Phyre2Client.java
new file mode 100644 (file)
index 0000000..0a29412
--- /dev/null
@@ -0,0 +1,466 @@
+package jalview.ws.phyre2;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.SequenceI;
+import jalview.fts.core.DecimalFormatTableCellRenderer;
+import jalview.io.DataSourceType;
+import jalview.io.FileFormat;
+import jalview.io.FormatAdapter;
+import jalview.io.StructureFile;
+import jalview.schemes.ResidueProperties;
+import jalview.structure.StructureMapping;
+import jalview.structure.StructureMappingClient;
+import jalview.structures.models.MappingOutputModel;
+import jalview.util.Comparison;
+import jalview.util.Format;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import javax.swing.JTable;
+import javax.swing.table.DefaultTableModel;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+public class Phyre2Client extends StructureMappingClient
+{
+  private final static String NEWLINE = System.lineSeparator();
+
+  public static final int UNASSIGNED = -1;
+
+  private final static String PATH_SEPARATOR = File.separator;
+
+  public Phyre2Client(StructureFile structureFile)
+  {
+    this.structureFile = structureFile;
+  }
+
+  @Override
+  public StructureMapping getStructureMapping(SequenceI seq,
+          String pdbFile, String chain)
+  {
+    final StringBuilder mappingDetails = new StringBuilder(128);
+    PrintStream ps = new PrintStream(System.out)
+    {
+      @Override
+      public void print(String x)
+      {
+        mappingDetails.append(x);
+      }
+
+      @Override
+      public void println()
+      {
+        mappingDetails.append(NEWLINE);
+      }
+    };
+    HashMap<Integer, int[]> mapping = getPhyre2FastaMapping(seq, ps);
+
+    String mappingOutput = mappingDetails.toString();
+    StructureMapping phyre2ModelMapping = new StructureMapping(seq,
+            pdbFile, structureFile.getId(), chain, mapping, mappingOutput);
+    return phyre2ModelMapping;
+  }
+
+  public HashMap<Integer, int[]> getPhyre2FastaMapping(SequenceI inputSeq,
+          java.io.PrintStream os)
+  {
+    HashMap<Integer, int[]> mapping = new HashMap<Integer, int[]>();
+    AlignmentI seq2Phyre2ModelFastaMapping = null;
+    try
+    {
+      seq2Phyre2ModelFastaMapping = new FormatAdapter().readFile(
+              getFastaMappingFile(), DataSourceType.FILE, FileFormat.Fasta);
+    } catch (IOException e1)
+    {
+      e1.printStackTrace();
+    }
+    SequenceI[] seqs = seq2Phyre2ModelFastaMapping.getSequencesArray();
+    SequenceI tSequenceRes = seqs[0];
+    SequenceI tStructureRes = seqs[1];
+
+    // Essential to resolve fastaAlignment to input sequence and model sequence
+    // coordinates
+    tSequenceRes.setStart(inputSeq.getStart());
+    tSequenceRes.setEnd(inputSeq.getEnd());
+
+    tStructureRes.setStart(structureFile.getSeqsAsArray()[0].getStart());
+    tStructureRes.setEnd(structureFile.getSeqsAsArray()[0].getEnd());
+    try
+    {
+      int sequenceResLenght = tSequenceRes.getLength();
+      int structureResLenght = tStructureRes.getLength();
+      if (sequenceResLenght == structureResLenght)
+      {
+        int prevStructResNum = -1;
+        int alignmentLenght = sequenceResLenght + tSequenceRes.getStart();
+        for (int x = 0; x < alignmentLenght; x++)
+        {
+          int alignSeqResidueIndex = tSequenceRes.findIndex(x);
+          int structResNum = tStructureRes
+                  .findPosition(alignSeqResidueIndex);
+          int sequenceResNum = tSequenceRes
+                  .findPosition(alignSeqResidueIndex - 1);
+          boolean sameResNum = (structResNum == prevStructResNum);
+          // System.out.println(sequenceResNum + " : "
+          // + (sameResNum ? -1 : prevStructResNum));
+          mapping.put(sequenceResNum, new int[] {
+              sameResNum ? -1 : prevStructResNum, -1 });
+          prevStructResNum = structResNum;
+        }
+      }
+    } catch (Exception e)
+    {
+      e.printStackTrace();
+    }
+
+    try
+    {
+      populateAtomPositions(" ", mapping);
+    } catch (IllegalArgumentException e)
+    {
+      e.printStackTrace();
+    } catch (StructureMappingException e)
+    {
+      e.printStackTrace();
+    }
+
+    if (os != null)
+    {
+      MappingOutputModel mop = new MappingOutputModel();
+      mop.setSeqStart(tSequenceRes.getStart());
+      mop.setSeqEnd(tSequenceRes.getEnd());
+      mop.setSeqName(tSequenceRes.getName());
+      mop.setSeqResidue(tSequenceRes.getSequenceAsString());
+
+      mop.setStrStart(tStructureRes.getStart());
+      mop.setStrEnd(tStructureRes.getEnd());
+      mop.setStrName(tStructureRes.getName());
+      mop.setStrResidue(tStructureRes.getSequenceAsString());
+
+      mop.setType("pep");
+      try
+      {
+        os.print(getMappingOutput(mop).toString());
+      } catch (Exception e)
+      {
+        e.printStackTrace();
+      }
+      os.println();
+    }
+    return mapping;
+  }
+
+  private String getFastaMappingFile()
+  {
+    File phyre2ModelFile = new File(structureFile.getDataName());
+    String phyre2ModelResultDir = phyre2ModelFile.getParent();
+    String modelId = structureFile.getId().substring(0,
+            structureFile.getId().lastIndexOf(".pdb"));
+    return phyre2ModelResultDir + PATH_SEPARATOR + modelId + ".fasta";
+  }
+
+  @Override
+  public StringBuffer getMappingOutput(MappingOutputModel mp)
+          throws StructureMappingException
+  {
+    String seqRes = mp.getSeqResidue();
+    String seqName = mp.getSeqName();
+    int sStart = mp.getSeqStart();
+    int sEnd = mp.getSeqEnd();
+
+    String strRes = mp.getStrResidue();
+    String strName = mp.getStrName();
+    int pdbStart = mp.getStrStart();
+    int pdbEnd = mp.getStrEnd();
+
+    String type = mp.getType();
+
+    int maxid = (seqName.length() >= strName.length()) ? seqName.length()
+            : strName.length();
+    int len = 72 - maxid - 1;
+
+    int nochunks = ((seqRes.length()) / len)
+            + ((seqRes.length()) % len > 0 ? 1 : 0);
+    // output mappings
+    StringBuffer output = new StringBuffer();
+    output.append(NEWLINE);
+    output.append("Sequence \u27f7 Structure mapping details").append(
+            NEWLINE);
+    output.append("Method: Phyre2 Alignment");
+    output.append(NEWLINE).append(NEWLINE);
+
+    output.append(new Format("%" + maxid + "s").form(seqName));
+    output.append(" :  ");
+    output.append(String.valueOf(sStart));
+    output.append(" - ");
+    output.append(String.valueOf(sEnd));
+    output.append(" Maps to ");
+    output.append(NEWLINE);
+    output.append(new Format("%" + maxid + "s").form(strName));
+    output.append(" :  ");
+    output.append(String.valueOf(pdbStart));
+    output.append(" - ");
+    output.append(String.valueOf(pdbEnd));
+    output.append(NEWLINE).append(NEWLINE);
+
+    int matchedSeqCount = 0;
+    for (int j = 0; j < nochunks; j++)
+    {
+      // Print the first aligned sequence
+      output.append(new Format("%" + (maxid) + "s").form(seqName)).append(
+              " ");
+
+      for (int i = 0; i < len; i++)
+      {
+        if ((i + (j * len)) < seqRes.length())
+        {
+          output.append(seqRes.charAt(i + (j * len)));
+        }
+      }
+
+      output.append(NEWLINE);
+      output.append(new Format("%" + (maxid) + "s").form(" ")).append(" ");
+
+      // Print out the matching chars
+      for (int i = 0; i < len; i++)
+      {
+        try
+        {
+          if ((i + (j * len)) < seqRes.length())
+          {
+            boolean sameChar = Comparison.isSameResidue(
+                    seqRes.charAt(i + (j * len)),
+                    strRes.charAt(i + (j * len)), false);
+            if (sameChar
+                    && !jalview.util.Comparison.isGap(seqRes.charAt(i
+                            + (j * len))))
+            {
+              matchedSeqCount++;
+              output.append("|");
+            }
+            else if (type.equals("pep"))
+            {
+              if (ResidueProperties.getPAM250(seqRes.charAt(i + (j * len)),
+                      strRes.charAt(i + (j * len))) > 0)
+              {
+                output.append(".");
+              }
+              else
+              {
+                output.append(" ");
+              }
+            }
+            else
+            {
+              output.append(" ");
+            }
+          }
+        } catch (IndexOutOfBoundsException e)
+        {
+          continue;
+        }
+      }
+      // Now print the second aligned sequence
+      output = output.append(NEWLINE);
+      output = output.append(new Format("%" + (maxid) + "s").form(strName))
+              .append(" ");
+      for (int i = 0; i < len; i++)
+      {
+        if ((i + (j * len)) < strRes.length())
+        {
+          output.append(strRes.charAt(i + (j * len)));
+        }
+      }
+      output.append(NEWLINE).append(NEWLINE);
+    }
+    float pid = (float) matchedSeqCount / seqRes.length() * 100;
+    // if (pid < SiftsSettings.getFailSafePIDThreshold())
+    // {
+    // throw new Exception(">>> Low PID detected for Phyre2 mapping...");
+    // }
+    output.append("Length of alignment = " + seqRes.length()).append(
+            NEWLINE);
+    output.append(new Format("Percentage ID = %2.2f").form(pid));
+    return output;
+  }
+
+
+
+  public static List<Phyre2SummaryPojo> parsePhyre2ResultSummaryTable(
+          String html)
+  {
+    List<Phyre2SummaryPojo> phyre2Results = new ArrayList<Phyre2SummaryPojo>();
+    try
+    {
+      File in = new File(html);
+      Document doc = Jsoup.parse(in, null);
+      // Document doc = Jsoup.connect(html).get();
+      Elements tableElements = doc.select("table.midshade");
+      for (Element table : tableElements)
+      {
+        System.out.println();
+        Elements tableRowElements = table.select(":not(thead) tr");
+        for (int i = 0; i < tableRowElements.size(); i++)
+        {
+          Element row = tableRowElements.get(i);
+          Elements rowItems = row.select("td");
+          if (rowItems.size() > 11)
+          {
+            // for (int j = 0; j < rowItems.size(); j++)
+            // {
+            // System.out.println(">>> r:" + j + "  =  "
+            // + rowItems.get(j).text());
+            // }
+
+            String c = rowItems.get(6).select("input").attr("onmouseover");
+            String alignedRange = c.substring(c.indexOf("Residues ") + 9,
+                    c.indexOf(" of your sequence aligned "));
+            String coverage = c.substring(c.lastIndexOf(" (") + 2,
+                    c.lastIndexOf(" coverage). Click to view detailed"));
+            // System.out.println("coverage" + coverage);
+            try
+            {
+              Phyre2SummaryPojo psp = new Phyre2SummaryPojo();
+              String sn = rowItems.get(0).text();
+              psp.setSerialNo(Integer.valueOf(sn));
+              psp.setTemplateId(rowItems.get(1).text());
+              psp.setCoverage(coverage);
+              psp.setAlignedRange(alignedRange);
+              psp.setConfidence(Double.valueOf(rowItems.get(8).text()));
+              psp.setPid(Integer.valueOf(rowItems.get(9).text()));
+              psp.setTemplateSummary(rowItems.get(10).text());
+              // System.out.println("row  >>>> " + psp.toString());
+              // System.out.println();
+              phyre2Results.add(psp);
+            } catch (NumberFormatException e)
+            {
+              e.printStackTrace();
+            } catch (IndexOutOfBoundsException e)
+            {
+              e.printStackTrace();
+            } catch (Exception e)
+            {
+              e.printStackTrace();
+            }
+          }
+        }
+      }
+      return phyre2Results;
+
+    } catch (Exception e)
+    {
+      e.printStackTrace();
+      return null;
+    }
+  }
+
+  public static DefaultTableModel getTableModel(
+          List<Phyre2SummaryPojo> phyreResults)
+  {
+    if (phyreResults == null)
+    {
+      return null;
+    }
+    DefaultTableModel tableModel = new DefaultTableModel()
+    {
+      @Override
+      public boolean isCellEditable(int row, int column)
+      {
+        return false;
+      }
+
+      @Override
+      public Class<?> getColumnClass(int columnIndex)
+      {
+        switch (columnIndex)
+        {
+        case 0:
+          return Integer.class;
+        case 1:
+          return String.class;
+        case 2:
+          return String.class;
+        case 3:
+          return String.class;
+        case 4:
+          return Double.class;
+        case 5:
+          return Integer.class;
+        case 6:
+          return String.class;
+        default:
+          return String.class;
+        }
+      }
+
+    };
+
+    tableModel.addColumn("#");
+    tableModel.addColumn("Template");
+    tableModel.addColumn("Aligned Range");
+    tableModel.addColumn("Coverage");
+    tableModel.addColumn("Confidence");
+    tableModel.addColumn("%.i.d");
+    tableModel.addColumn("Template Information");
+
+    for (Phyre2SummaryPojo res : phyreResults)
+    {
+      tableModel.addRow(new Object[] { res.getSerialNo(),
+          res.getTemplateId(), res.getAlignedRange(), res.getCoverage(),
+          res.getConfidence(), res.getPid(), res.getTemplateSummary() }); 
+    }
+    return tableModel;
+  }
+
+  public static void configurePhyreResultTable(JTable phyreResultTable)
+  {
+
+    DecimalFormatTableCellRenderer idCellRender = new DecimalFormatTableCellRenderer(
+            true, 0);
+    DecimalFormatTableCellRenderer pidCellRender = new DecimalFormatTableCellRenderer(
+            true, 1);
+    DecimalFormatTableCellRenderer confidenceCellRender = new DecimalFormatTableCellRenderer(
+            true, 1);
+
+    phyreResultTable.getColumn("#").setMinWidth(20);
+    phyreResultTable.getColumn("#").setPreferredWidth(30);
+    phyreResultTable.getColumn("#").setMaxWidth(40);
+    phyreResultTable.getColumn("#").setCellRenderer(idCellRender);
+
+    phyreResultTable.getColumn("Template").setMinWidth(60);
+    phyreResultTable.getColumn("Template").setPreferredWidth(60);
+    phyreResultTable.getColumn("Template").setMaxWidth(90);
+
+    phyreResultTable.getColumn("Aligned Range").setMinWidth(80);
+    phyreResultTable.getColumn("Aligned Range").setPreferredWidth(80);
+    phyreResultTable.getColumn("Aligned Range").setMaxWidth(120);
+
+    phyreResultTable.getColumn("Coverage").setMinWidth(60);
+    phyreResultTable.getColumn("Coverage").setPreferredWidth(60);
+    phyreResultTable.getColumn("Coverage").setMaxWidth(90);
+
+    phyreResultTable.getColumn("Confidence").setMinWidth(60);
+    phyreResultTable.getColumn("Confidence").setPreferredWidth(60);
+    phyreResultTable.getColumn("Confidence").setMaxWidth(90);
+    phyreResultTable.getColumn("Confidence").setCellRenderer(
+            confidenceCellRender);
+
+    phyreResultTable.getColumn("%.i.d").setMinWidth(45);
+    phyreResultTable.getColumn("%.i.d").setPreferredWidth(450);
+    phyreResultTable.getColumn("%.i.d").setMaxWidth(65);
+    phyreResultTable.getColumn("%.i.d").setCellRenderer(pidCellRender);
+
+    phyreResultTable.getColumn("Template Information").setMinWidth(400);
+    phyreResultTable.getColumn("Template Information").setPreferredWidth(
+            600);
+    phyreResultTable.getColumn("Template Information").setMaxWidth(1500);
+  }
+}