JAL-2434 omit unmapped positions from mapping
[jalview.git] / src / jalview / ws / phyre2 / Phyre2Client.java
index 0a29412..80dc841 100644 (file)
@@ -3,6 +3,7 @@ package jalview.ws.phyre2;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.SequenceI;
 import jalview.fts.core.DecimalFormatTableCellRenderer;
+import jalview.io.AppletFormatAdapter;
 import jalview.io.DataSourceType;
 import jalview.io.FileFormat;
 import jalview.io.FormatAdapter;
@@ -14,7 +15,8 @@ import jalview.structures.models.MappingOutputModel;
 import jalview.util.Comparison;
 import jalview.util.Format;
 
-import java.io.File;
+import java.io.BufferedReader;
+import java.io.FileReader;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.util.ArrayList;
@@ -24,24 +26,24 @@ import java.util.List;
 import javax.swing.JTable;
 import javax.swing.table.DefaultTableModel;
 
-import org.jsoup.Jsoup;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-
 public class Phyre2Client extends StructureMappingClient
 {
   private final static String NEWLINE = System.lineSeparator();
 
-  public static final int UNASSIGNED = -1;
-
-  private final static String PATH_SEPARATOR = File.separator;
+  private String fastaMappingFile;
 
   public Phyre2Client(StructureFile structureFile)
   {
     this.structureFile = structureFile;
   }
 
+  public StructureMapping getStructureMapping(SequenceI seq,
+          String pdbFile, String fMappingFile, String chain)
+  {
+    this.fastaMappingFile = fMappingFile;
+    return getStructureMapping(seq, pdbFile, chain);
+  }
+
   @Override
   public StructureMapping getStructureMapping(SequenceI seq,
           String pdbFile, String chain)
@@ -76,8 +78,11 @@ public class Phyre2Client extends StructureMappingClient
     AlignmentI seq2Phyre2ModelFastaMapping = null;
     try
     {
-      seq2Phyre2ModelFastaMapping = new FormatAdapter().readFile(
-              getFastaMappingFile(), DataSourceType.FILE, FileFormat.Fasta);
+      String fastaFile = getFastaMappingFile();
+      DataSourceType protocol = AppletFormatAdapter
+              .checkProtocol(fastaFile);
+      seq2Phyre2ModelFastaMapping = new FormatAdapter().readFile(fastaFile,
+              protocol, FileFormat.Fasta);
     } catch (IOException e1)
     {
       e1.printStackTrace();
@@ -95,24 +100,25 @@ public class Phyre2Client extends StructureMappingClient
     tStructureRes.setEnd(structureFile.getSeqsAsArray()[0].getEnd());
     try
     {
-      int sequenceResLenght = tSequenceRes.getLength();
-      int structureResLenght = tStructureRes.getLength();
-      if (sequenceResLenght == structureResLenght)
+      int sequenceResLength = tSequenceRes.getLength();
+      int structureResLength = tStructureRes.getLength();
+      if (sequenceResLength == structureResLength)
       {
         int prevStructResNum = -1;
-        int alignmentLenght = sequenceResLenght + tSequenceRes.getStart();
-        for (int x = 0; x < alignmentLenght; x++)
+        int alignmentLength = sequenceResLength + tSequenceRes.getStart();
+        for (int x = 0; x < alignmentLength; x++)
         {
           int alignSeqResidueIndex = tSequenceRes.findIndex(x);
           int structResNum = tStructureRes
                   .findPosition(alignSeqResidueIndex);
           int sequenceResNum = tSequenceRes
                   .findPosition(alignSeqResidueIndex - 1);
-          boolean sameResNum = (structResNum == prevStructResNum);
-          // System.out.println(sequenceResNum + " : "
-          // + (sameResNum ? -1 : prevStructResNum));
-          mapping.put(sequenceResNum, new int[] {
-              sameResNum ? -1 : prevStructResNum, -1 });
+          if (structResNum != prevStructResNum)
+          {
+            // System.out.println(sequenceResNum + " : " + prevStructResNum);
+            mapping.put(sequenceResNum, new int[] { prevStructResNum,
+                StructureMapping.UNASSIGNED });
+          }
           prevStructResNum = structResNum;
         }
       }
@@ -121,6 +127,10 @@ public class Phyre2Client extends StructureMappingClient
       e.printStackTrace();
     }
 
+    /*
+     * now populate atom positions for structure residues (and remove
+     * residue if atom position cannot be found)
+     */
     try
     {
       populateAtomPositions(" ", mapping);
@@ -160,11 +170,12 @@ public class Phyre2Client extends StructureMappingClient
 
   private String getFastaMappingFile()
   {
-    File phyre2ModelFile = new File(structureFile.getDataName());
-    String phyre2ModelResultDir = phyre2ModelFile.getParent();
-    String modelId = structureFile.getId().substring(0,
-            structureFile.getId().lastIndexOf(".pdb"));
-    return phyre2ModelResultDir + PATH_SEPARATOR + modelId + ".fasta";
+    return fastaMappingFile;
+  }
+
+  void setFastaMappingFile(String fastaMappingFile)
+  {
+    this.fastaMappingFile = fastaMappingFile;
   }
 
   @Override
@@ -282,10 +293,6 @@ public class Phyre2Client extends StructureMappingClient
       output.append(NEWLINE).append(NEWLINE);
     }
     float pid = (float) matchedSeqCount / seqRes.length() * 100;
-    // if (pid < SiftsSettings.getFailSafePIDThreshold())
-    // {
-    // throw new Exception(">>> Low PID detected for Phyre2 mapping...");
-    // }
     output.append("Length of alignment = " + seqRes.length()).append(
             NEWLINE);
     output.append(new Format("Percentage ID = %2.2f").form(pid));
@@ -293,73 +300,30 @@ public class Phyre2Client extends StructureMappingClient
   }
 
 
-
-  public static List<Phyre2SummaryPojo> parsePhyre2ResultSummaryTable(
-          String html)
+  public static List<Phyre2SummaryPojo> parsePhyreCrudeList(String crudeList)
   {
     List<Phyre2SummaryPojo> phyre2Results = new ArrayList<Phyre2SummaryPojo>();
-    try
+    try (BufferedReader br = new BufferedReader(new FileReader(crudeList)))
     {
-      File in = new File(html);
-      Document doc = Jsoup.parse(in, null);
-      // Document doc = Jsoup.connect(html).get();
-      Elements tableElements = doc.select("table.midshade");
-      for (Element table : tableElements)
+      String line;
+      while ((line = br.readLine()) != null)
       {
-        System.out.println();
-        Elements tableRowElements = table.select(":not(thead) tr");
-        for (int i = 0; i < tableRowElements.size(); i++)
-        {
-          Element row = tableRowElements.get(i);
-          Elements rowItems = row.select("td");
-          if (rowItems.size() > 11)
-          {
-            // for (int j = 0; j < rowItems.size(); j++)
-            // {
-            // System.out.println(">>> r:" + j + "  =  "
-            // + rowItems.get(j).text());
-            // }
-
-            String c = rowItems.get(6).select("input").attr("onmouseover");
-            String alignedRange = c.substring(c.indexOf("Residues ") + 9,
-                    c.indexOf(" of your sequence aligned "));
-            String coverage = c.substring(c.lastIndexOf(" (") + 2,
-                    c.lastIndexOf(" coverage). Click to view detailed"));
-            // System.out.println("coverage" + coverage);
-            try
-            {
-              Phyre2SummaryPojo psp = new Phyre2SummaryPojo();
-              String sn = rowItems.get(0).text();
-              psp.setSerialNo(Integer.valueOf(sn));
-              psp.setTemplateId(rowItems.get(1).text());
-              psp.setCoverage(coverage);
-              psp.setAlignedRange(alignedRange);
-              psp.setConfidence(Double.valueOf(rowItems.get(8).text()));
-              psp.setPid(Integer.valueOf(rowItems.get(9).text()));
-              psp.setTemplateSummary(rowItems.get(10).text());
-              // System.out.println("row  >>>> " + psp.toString());
-              // System.out.println();
-              phyre2Results.add(psp);
-            } catch (NumberFormatException e)
-            {
-              e.printStackTrace();
-            } catch (IndexOutOfBoundsException e)
-            {
-              e.printStackTrace();
-            } catch (Exception e)
-            {
-              e.printStackTrace();
-            }
-          }
-        }
+        String[] lineData = line.split(" ");
+        Phyre2SummaryPojo psp = new Phyre2SummaryPojo();
+        psp.setSerialNo(Integer.valueOf(lineData[0]));
+        psp.setTemplateId(lineData[1]);
+        psp.setConfidence(100 * Double.valueOf(lineData[2]));
+        psp.setPid(Integer.valueOf(lineData[3]));
+        psp.setAlignedRange(lineData[4] + " - " + lineData[5]);
+        // psp.setCoverage(coverage);
+        // psp.setTemplateSummary(templateSummary);
+        phyre2Results.add(psp);
       }
-      return phyre2Results;
-
     } catch (Exception e)
     {
       e.printStackTrace();
-      return null;
     }
+    return phyre2Results;
   }
 
   public static DefaultTableModel getTableModel(
@@ -436,8 +400,8 @@ public class Phyre2Client extends StructureMappingClient
     phyreResultTable.getColumn("#").setCellRenderer(idCellRender);
 
     phyreResultTable.getColumn("Template").setMinWidth(60);
-    phyreResultTable.getColumn("Template").setPreferredWidth(60);
-    phyreResultTable.getColumn("Template").setMaxWidth(90);
+    phyreResultTable.getColumn("Template").setPreferredWidth(90);
+    phyreResultTable.getColumn("Template").setMaxWidth(150);
 
     phyreResultTable.getColumn("Aligned Range").setMinWidth(80);
     phyreResultTable.getColumn("Aligned Range").setPreferredWidth(80);