Merge branch 'develop' into features/JAL-250_hideredundantseqs
[jalview.git] / src / jalview / ws / sifts / SiftsClient.java
index 27db604..68af7c3 100644 (file)
@@ -21,6 +21,8 @@
 package jalview.ws.sifts;
 
 import jalview.analysis.AlignSeq;
+import jalview.analysis.scoremodels.ScoreMatrix;
+import jalview.analysis.scoremodels.ScoreModels;
 import jalview.api.DBRefEntryI;
 import jalview.api.SiftsClientI;
 import jalview.datamodel.DBRefEntry;
@@ -74,6 +76,12 @@ import MCview.PDBChain;
 
 public class SiftsClient implements SiftsClientI
 {
+  /*
+   * for use in mocking out file fetch for tests only
+   * - reset to null after testing!
+   */
+  private static File mockSiftsFile;
+
   private Entry siftsEntry;
 
   private StructureFile pdb;
@@ -120,8 +128,8 @@ public class SiftsClient implements SiftsClientI
 
   private enum ResidueDetailType
   {
-    NAME_SEC_STRUCTURE("nameSecondaryStructure"), CODE_SEC_STRUCTURE(
-            "codeSecondaryStructure"), ANNOTATION("Annotation");
+    NAME_SEC_STRUCTURE("nameSecondaryStructure"),
+    CODE_SEC_STRUCTURE("codeSecondaryStructure"), ANNOTATION("Annotation");
     private String code;
 
     private ResidueDetailType(String code)
@@ -187,6 +195,14 @@ public class SiftsClient implements SiftsClientI
    */
   public static File getSiftsFile(String pdbId) throws SiftsException
   {
+    /*
+     * return mocked file if it has been set
+     */
+    if (mockSiftsFile != null)
+    {
+      return mockSiftsFile;
+    }
+
     String siftsFileName = SiftsSettings.getSiftDownloadDirectory()
             + pdbId.toLowerCase() + ".xml.gz";
     File siftsFile = new File(siftsFileName);
@@ -212,6 +228,10 @@ public class SiftsClient implements SiftsClientI
           return new File(siftsFileName);
         }
       }
+      else
+      {
+        return siftsFile;
+      }
     }
     try
     {
@@ -241,8 +261,9 @@ public class SiftsClient implements SiftsClientI
     try
     {
       attr = Files.readAttributes(filePath, BasicFileAttributes.class);
-      diffInDays = (int) ((new Date().getTime() - attr.lastModifiedTime()
-              .toMillis()) / (1000 * 60 * 60 * 24));
+      diffInDays = (int) ((new Date().getTime()
+              - attr.lastModifiedTime().toMillis())
+              / (1000 * 60 * 60 * 24));
       // System.out.println("Diff in days : " + diffInDays);
     } catch (IOException e)
     {
@@ -259,8 +280,8 @@ public class SiftsClient implements SiftsClientI
    * @throws SiftsException
    * @throws IOException
    */
-  public static File downloadSiftsFile(String pdbId) throws SiftsException,
-          IOException
+  public static File downloadSiftsFile(String pdbId)
+          throws SiftsException, IOException
   {
     if (pdbId.contains(".cif"))
     {
@@ -277,6 +298,7 @@ public class SiftsClient implements SiftsClientI
       siftsDownloadDir.mkdirs();
     }
     // System.out.println(">> Download ftp url : " + siftsFileFTPURL);
+    // long now = System.currentTimeMillis();
     URL url = new URL(siftsFileFTPURL);
     URLConnection conn = url.openConnection();
     InputStream inputStream = conn.getInputStream();
@@ -290,7 +312,8 @@ public class SiftsClient implements SiftsClientI
     }
     outputStream.close();
     inputStream.close();
-    // System.out.println(">>> File downloaded : " + downloadedSiftsFile);
+    // System.out.println(">>> File downloaded : " + downloadedSiftsFile
+    // + " took " + (System.currentTimeMillis() - now) + "ms");
     return new File(downloadedSiftsFile);
   }
 
@@ -338,11 +361,11 @@ public class SiftsClient implements SiftsClientI
       {
         continue;
       }
-      String canonicalSource = DBRefUtils.getCanonicalName(dbRef
-              .getSource());
+      String canonicalSource = DBRefUtils
+              .getCanonicalName(dbRef.getSource());
       if (isValidDBRefEntry(dbRef)
-              && (canonicalSource.equalsIgnoreCase(DBRefSource.UNIPROT) || canonicalSource
-                      .equalsIgnoreCase(DBRefSource.PDB)))
+              && (canonicalSource.equalsIgnoreCase(DBRefSource.UNIPROT)
+                      || canonicalSource.equalsIgnoreCase(DBRefSource.PDB)))
       {
         return dbRef;
       }
@@ -489,8 +512,9 @@ public class SiftsClient implements SiftsClientI
       int orignalSeqStart = seq.getStart();
       if (orignalSeqStart >= 1)
       {
-        int subSeqStart = (seqStart >= orignalSeqStart) ? seqStart
-                - orignalSeqStart : 0;
+        int subSeqStart = (seqStart >= orignalSeqStart)
+                ? seqStart - orignalSeqStart
+                : 0;
         int subSeqEnd = seqEnd - (orignalSeqStart - 1);
         subSeqEnd = originalSeq.length() < subSeqEnd ? originalSeq.length()
                 : subSeqEnd;
@@ -551,22 +575,11 @@ public class SiftsClient implements SiftsClientI
           {
             pdbRefDb = cRefDb;
           }
-          if (cRefDb.getDbCoordSys()
-                  .equalsIgnoreCase(seqCoordSys.getName())
+          if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys.getName())
                   && isAccessionMatched(cRefDb.getDbAccessionId()))
           {
-            String resNumIndexString = cRefDb.getDbResNum()
-                    .equalsIgnoreCase("None") ? String.valueOf(UNASSIGNED)
-                    : cRefDb.getDbResNum();
-            try
-            {
-              currSeqIndex = Integer.valueOf(resNumIndexString);
-            } catch (NumberFormatException nfe)
-            {
-              currSeqIndex = Integer.valueOf(resNumIndexString
-                      .split("[a-zA-Z]")[0]);
-              continue;
-            }
+            currSeqIndex = getLeadingIntegerValue(cRefDb.getDbResNum(),
+                    UNASSIGNED);
             if (pdbRefDb != null)
             {
               break;// exit loop if pdb and uniprot are already found
@@ -579,19 +592,12 @@ public class SiftsClient implements SiftsClientI
         }
         if (currSeqIndex >= seq.getStart() && currSeqIndex <= seq.getEnd())
         {
-          int resNum;
-          try
-          {
-            resNum = (pdbRefDb == null) ? Integer.valueOf(residue
-                    .getDbResNum()) : Integer.valueOf(pdbRefDb
-                    .getDbResNum());
-          } catch (NumberFormatException nfe)
-          {
-            resNum = (pdbRefDb == null) ? Integer.valueOf(residue
-                    .getDbResNum()) : Integer.valueOf(pdbRefDb
-                    .getDbResNum().split("[a-zA-Z]")[0]);
-            continue;
-          }
+
+          int resNum = (pdbRefDb == null)
+                  ? getLeadingIntegerValue(residue.getDbResNum(),
+                          UNASSIGNED)
+                  : getLeadingIntegerValue(pdbRefDb.getDbResNum(),
+                          UNASSIGNED);
 
           if (isResidueObserved(residue)
                   || seqCoordSys == CoordinateSys.UNIPROT)
@@ -606,14 +612,38 @@ public class SiftsClient implements SiftsClientI
             omitNonObserved.add(currSeqIndex);
             ++nonObservedShiftIndex;
           }
-          mapping.put(currSeqIndex - nonObservedShiftIndex, new int[] {
-              Integer.valueOf(resNum), UNASSIGNED });
+          mapping.put(currSeqIndex - nonObservedShiftIndex,
+                  new int[]
+                  { Integer.valueOf(resNum), UNASSIGNED });
         }
       }
     }
   }
 
   /**
+   * Get the leading integer part of a string that begins with an integer.
+   * 
+   * @param input
+   *          - the string input to process
+   * @param failValue
+   *          - value returned if unsuccessful
+   * @return
+   */
+  static int getLeadingIntegerValue(String input, int failValue)
+  {
+    if (input == null)
+    {
+      return failValue;
+    }
+    String[] parts = input.split("(?=\\D)(?<=\\d)");
+    if (parts != null && parts.length > 0 && parts[0].matches("[0-9]+"))
+    {
+      return Integer.valueOf(parts[0]);
+    }
+    return failValue;
+  }
+
+  /**
    * 
    * @param chainId
    *          Target chain to populate mapping of its atom positions.
@@ -787,7 +817,8 @@ public class SiftsClient implements SiftsClientI
    */
   public Entity getEntityByMostOptimalMatchedId(String chainId)
   {
-    // System.out.println("---> advanced greedy entityId matching block entered..");
+    // System.out.println("---> advanced greedy entityId matching block
+    // entered..");
     List<Entity> entities = siftsEntry.getEntity();
     SiftsEntitySortPojo[] sPojo = new SiftsEntitySortPojo[entities.size()];
     int count = 0;
@@ -843,8 +874,8 @@ public class SiftsClient implements SiftsClientI
     return null;
   }
 
-  private class SiftsEntitySortPojo implements
-          Comparable<SiftsEntitySortPojo>
+  private class SiftsEntitySortPojo
+          implements Comparable<SiftsEntitySortPojo>
   {
     public String entityId;
 
@@ -873,8 +904,7 @@ public class SiftsClient implements SiftsClientI
 
     private int nonObservedShiftIndex;
 
-    public SegmentHelperPojo(SequenceI seq,
-            HashMap<Integer, int[]> mapping,
+    public SegmentHelperPojo(SequenceI seq, HashMap<Integer, int[]> mapping,
             TreeMap<Integer, String> resNumMap,
             List<Integer> omitNonObserved, int nonObservedShiftIndex)
     {
@@ -937,7 +967,7 @@ public class SiftsClient implements SiftsClientI
   }
 
   @Override
-  public StringBuffer getMappingOutput(MappingOutputPojo mp)
+  public StringBuilder getMappingOutput(MappingOutputPojo mp)
           throws SiftsException
   {
     String seqRes = mp.getSeqResidue();
@@ -959,10 +989,10 @@ public class SiftsClient implements SiftsClientI
     int nochunks = ((seqRes.length()) / len)
             + ((seqRes.length()) % len > 0 ? 1 : 0);
     // output mappings
-    StringBuffer output = new StringBuffer();
+    StringBuilder output = new StringBuilder(512);
     output.append(NEWLINE);
-    output.append("Sequence \u27f7 Structure mapping details").append(
-            NEWLINE);
+    output.append("Sequence \u27f7 Structure mapping details")
+            .append(NEWLINE);
     output.append("Method: SIFTS");
     output.append(NEWLINE).append(NEWLINE);
 
@@ -980,12 +1010,13 @@ public class SiftsClient implements SiftsClientI
     output.append(String.valueOf(pdbEnd));
     output.append(NEWLINE).append(NEWLINE);
 
+    ScoreMatrix pam250 = ScoreModels.getInstance().getPam250();
     int matchedSeqCount = 0;
     for (int j = 0; j < nochunks; j++)
     {
       // Print the first aligned sequence
-      output.append(new Format("%" + (maxid) + "s").form(seqName)).append(
-              " ");
+      output.append(new Format("%" + (maxid) + "s").form(seqName))
+              .append(" ");
 
       for (int i = 0; i < len; i++)
       {
@@ -998,27 +1029,29 @@ public class SiftsClient implements SiftsClientI
       output.append(NEWLINE);
       output.append(new Format("%" + (maxid) + "s").form(" ")).append(" ");
 
-      // Print out the matching chars
+      /*
+       * Print out the match symbols:
+       * | for exact match (ignoring case)
+       * . if PAM250 score is positive
+       * else a space
+       */
       for (int i = 0; i < len; i++)
       {
         try
         {
           if ((i + (j * len)) < seqRes.length())
           {
-            boolean sameChar = Comparison.isSameResidue(
-                    seqRes.charAt(i + (j * len)),
-                    strRes.charAt(i + (j * len)), false);
-            if (sameChar
-                    && !jalview.util.Comparison.isGap(seqRes.charAt(i
-                            + (j * len))))
+            char c1 = seqRes.charAt(i + (j * len));
+            char c2 = strRes.charAt(i + (j * len));
+            boolean sameChar = Comparison.isSameResidue(c1, c2, false);
+            if (sameChar && !Comparison.isGap(c1))
             {
               matchedSeqCount++;
               output.append("|");
             }
             else if (type.equals("pep"))
             {
-              if (ResidueProperties.getPAM250(seqRes.charAt(i + (j * len)),
-                      strRes.charAt(i + (j * len))) > 0)
+              if (pam250.getPairwiseScore(c1, c2) > 0)
               {
                 output.append(".");
               }
@@ -1055,8 +1088,8 @@ public class SiftsClient implements SiftsClientI
     {
       throw new SiftsException(">>> Low PID detected for SIFTs mapping...");
     }
-    output.append("Length of alignment = " + seqRes.length()).append(
-            NEWLINE);
+    output.append("Length of alignment = " + seqRes.length())
+            .append(NEWLINE);
     output.append(new Format("Percentage ID = %2.2f").form(pid));
     return output;
   }
@@ -1091,4 +1124,9 @@ public class SiftsClient implements SiftsClientI
     return siftsEntry.getDbVersion();
   }
 
+  public static void setMockSiftsFile(File file)
+  {
+    mockSiftsFile = file;
+  }
+
 }