JAL-2616 add ability to change location of family and hmm data
authortzvanaalten <tzvanaalten@LS30916.ad.lifesci.dundee.ac.uk>
Mon, 24 Jul 2017 12:44:41 +0000 (13:44 +0100)
committertzvanaalten <tzvanaalten@LS30916.ad.lifesci.dundee.ac.uk>
Mon, 24 Jul 2017 12:44:41 +0000 (13:44 +0100)
src/jalview/util/HMMProbabilityDistributionAnalyser.java
src/jalview/util/ProbabilityAnalyserKickstarter.java

index b30487d..6c1932a 100644 (file)
@@ -44,13 +44,13 @@ public class HMMProbabilityDistributionAnalyser
   Map<String, Double> binned = new HashMap<>();
 
   // location of the family file
-  final static String FAMILIES = "H:/Desktop/PFAM/Family/SeedFamilies.seed";
+  String families = "H:/Desktop/PFAM/Family/SeedFamilies.seed";
 
   // location of the file containing the family-clan links
   final static String FAMILIESTOCLAN = "H:/Desktop/PFAM/Family/Clanlinks.dat";
 
   // location of the HMM file
-  final static String HMMS = "H:/Desktop/PFAM/HMMs/Pfam-A.hmm";
+  String hmms = "H:/Desktop/PFAM/HMMs/Pfam-A.hmm";
 
   // suffix for raw file
   final static String RAW = "/Raw.csv";
@@ -59,7 +59,7 @@ public class HMMProbabilityDistributionAnalyser
   final static String BINNED = "/Binned.csv";
 
   // normalisation scale
-  final static double SCALE = 100000;
+  final static double SCALE = 1;
 
   // current position in file
   int currentFilePosition = 0;
@@ -107,7 +107,7 @@ public class HMMProbabilityDistributionAnalyser
   /**
    * Analyses a specified number of families and then saves the data. Before
    * analysing the data, the previous saved data will be imported and after
-   * analysing this data is exported back into the file.
+   * analysing this, the data is exported back into the file.
    * 
    * @param increments
    *          The number of families to read before saving.
@@ -116,17 +116,26 @@ public class HMMProbabilityDistributionAnalyser
   public void run(int increments, boolean keepRawData) throws IOException
   {
     keepRaw = keepRawData;
-    readPreviousData(currentFolder);
+    try
+    {
+      readPreviousData(currentFolder);
+      BufferedReader posReader = new BufferedReader(
+              new FileReader(currentFolder + "/CurrentPosition.txt"));
+
+      String line = posReader.readLine();
+      posReader.close();
+      currentFilePosition = Integer.parseInt(line);
+    } catch (Exception e)
+    {
+      System.out.println("No previous data found");
+    }
+
+
 
-    BufferedReader posReader = new BufferedReader(
-            new FileReader(currentFolder + "/CurrentPosition.txt"));
-    String line = posReader.readLine();
-    posReader.close();
+    BufferedReader inputSTO = new BufferedReader(new FileReader(families));
+    BufferedReader inputHMM = new BufferedReader(new FileReader(hmms));
 
-    BufferedReader inputSTO = new BufferedReader(new FileReader(FAMILIES));
-    BufferedReader inputHMM = new BufferedReader(new FileReader(HMMS));
 
-    currentFilePosition = Integer.parseInt(line);
 
     moveLocationBy(currentFilePosition, inputHMM);
     moveLocationBy(currentFilePosition, inputSTO);
@@ -164,6 +173,86 @@ public class HMMProbabilityDistributionAnalyser
   }
 
   /**
+   * Analyses all families and then saves the data. Before analysing the data,
+   * the previous saved data will be imported and after analysing this, the data
+   * is exported back into the file.
+   * 
+   * @param increments
+   *          The number of families to read before saving.
+   * @throws IOException
+   */
+  public void runToEnd(boolean keepRawData) throws IOException
+  {
+    keepRaw = keepRawData;
+    BufferedReader inputSTO = null;
+    BufferedReader inputHMM = null;
+    int size = 0;
+    try
+    {
+      readPreviousData(currentFolder);
+      BufferedReader posReader = new BufferedReader(
+              new FileReader(currentFolder + "/CurrentPosition.txt"));
+
+      String line = posReader.readLine();
+      posReader.close();
+      currentFilePosition = Integer.parseInt(line);
+      readPreviousData(currentFolder);
+      
+      inputSTO = new BufferedReader(new FileReader(families));
+      inputHMM = new BufferedReader(new FileReader(hmms));
+    } catch (Exception e)
+    {
+      System.out.println("No or incomplete previous data found");
+    }
+
+    
+
+    moveLocationBy(currentFilePosition, inputHMM);
+    moveLocationBy(currentFilePosition, inputSTO);
+
+    int filesRead = 0;
+    int i = 0;
+    inputSTO.mark(20);
+    String check = inputSTO.readLine();
+    inputSTO.reset();
+    while (!"".equals(check) && !" ".equals(check) && check != null)
+      {
+      inputSTO.mark(20);
+      String line = inputSTO.readLine();
+      inputSTO.reset();
+
+        FileParse parserSTO = new FileParse(inputSTO, "",
+                DataSourceType.FILE);
+        readStockholm(parserSTO);
+
+        FileParse parserHMM = new FileParse(inputHMM, "",
+                DataSourceType.FILE);
+        readHMM(parserHMM);
+
+        int count = countValidResidues();
+        processData(count);
+        filesRead++;
+
+        currentFilePosition++;
+        System.out.println(i);
+        i++;
+      inputSTO.mark(20);
+      check = inputSTO.readLine();
+      inputSTO.reset();
+      }
+
+
+    PrintWriter p = new PrintWriter(
+            new File(currentFolder + "/CurrentPosition.txt"));
+    p.print(currentFilePosition);
+    p.close();
+    exportData(currentFolder);
+    raw.clear();
+    binned.clear();
+
+  }
+
+  /**
    * Reads the previous data from both files
    * 
    * @param source
@@ -194,7 +283,9 @@ public class HMMProbabilityDistributionAnalyser
     {
       Scanner scanner = new Scanner(line);
       scanner.useDelimiter(",");
-      binned.put(scanner.next(), scanner.nextDouble());
+      String key = scanner.next();
+      String value = scanner.next();
+      binned.put(key, Double.valueOf(value));
       scanner.close();
       line = input.readLine();
     }
@@ -242,6 +333,10 @@ public class HMMProbabilityDistributionAnalyser
         {
           raw.get(i).add(Double.parseDouble(value));
         }
+        else
+        {
+          raw.get(i).add(null);
+        }
 
         i++;
       }
@@ -263,7 +358,7 @@ public class HMMProbabilityDistributionAnalyser
 
     for (int width = 0; width < sequences.size(); width++)
     {
-      for (int length = 1; length < hmm.getLength(); length++)
+      for (int length = 1; length < hmm.getLength() + 1; length++)
       {
         char symbol;
         int alignPos;
@@ -297,7 +392,7 @@ public class HMMProbabilityDistributionAnalyser
 
     for (int width = 0; width < sequences.size(); width++)
     {
-      for (int length = 1; length < hmm.getLength(); length++)
+      for (int length = 1; length < hmm.getLength() + 1; length++)
       {
         char symbol;
         int alignPos;
@@ -431,7 +526,7 @@ public class HMMProbabilityDistributionAnalyser
    */
   public void printFam(int index) throws IOException
   {
-    BufferedReader br = new BufferedReader(new FileReader(FAMILIES));
+    BufferedReader br = new BufferedReader(new FileReader(families));
 
     moveLocationBy(index, br);
 
@@ -455,7 +550,7 @@ public class HMMProbabilityDistributionAnalyser
    */
   public void printHMM(int index) throws IOException
   {
-    BufferedReader br = new BufferedReader(new FileReader(HMMS));
+    BufferedReader br = new BufferedReader(new FileReader(hmms));
 
     moveLocationBy(index, br);
 
@@ -479,7 +574,7 @@ public class HMMProbabilityDistributionAnalyser
    */
   public void exportFam(int index, String location) throws IOException
   {
-    BufferedReader br = new BufferedReader(new FileReader(FAMILIES));
+    BufferedReader br = new BufferedReader(new FileReader(families));
 
     moveLocationBy(index, br);
 
@@ -518,7 +613,7 @@ public class HMMProbabilityDistributionAnalyser
   {
     String name;
 
-    BufferedReader nameFinder = new BufferedReader(new FileReader(HMMS));
+    BufferedReader nameFinder = new BufferedReader(new FileReader(hmms));
 
     moveLocationBy(index, nameFinder);
 
@@ -536,7 +631,7 @@ public class HMMProbabilityDistributionAnalyser
     String name;
 
     BufferedReader nameFinder = new BufferedReader(
-            new FileReader(FAMILIES));
+            new FileReader(families));
 
     moveLocationBy(index, nameFinder);
 
@@ -560,7 +655,7 @@ public class HMMProbabilityDistributionAnalyser
   {
 
 
-    BufferedReader br = new BufferedReader(new FileReader(HMMS));
+    BufferedReader br = new BufferedReader(new FileReader(hmms));
 
     moveLocationBy(index, br);
 
@@ -603,8 +698,8 @@ public class HMMProbabilityDistributionAnalyser
   {
     BufferedReader clanFinder = new BufferedReader(new FileReader(FAMILIESTOCLAN));
     BufferedReader familyReader = new BufferedReader(
-            new FileReader(FAMILIES));
-    BufferedReader hmmReader = new BufferedReader(new FileReader(HMMS));
+            new FileReader(families));
+    BufferedReader hmmReader = new BufferedReader(new FileReader(hmms));
     HashMap<String, Integer> clanIndexes = new HashMap<>();
     int filePos = 0; 
     int clanCount = 0;
@@ -633,6 +728,7 @@ public class HMMProbabilityDistributionAnalyser
             clanCount++;
         }
 
+
           Integer clanI = clanIndexes.get(clanName);
           String clanPath = directory + "/Clan" + clanI.toString();
           File clanFolder = new File(clanPath);
@@ -661,7 +757,29 @@ public class HMMProbabilityDistributionAnalyser
     clanFinder.close();
       
     }
+
+  public String getFamilies()
+  {
+    return families;
+  }
+
+  public void setFamilies(String families)
+  {
+    this.families = currentFolder + families;
+  }
+
+  public String getHmms()
+  {
+    return hmms;
+  }
+
+  public void setHmms(String hmms)
+  {
+    this.hmms = currentFolder + hmms;
+  }
     
+
+
   }
 
 
index 86d9177..999dc1f 100644 (file)
@@ -94,7 +94,7 @@ public class ProbabilityAnalyserKickstarter
       // exported and re-read back into the program. This is to ensure that the
       // program can be terminated without losing a large quantity of data. The
       // increment is the number of families read per 'save'.
-      if (command.indexOf("run") > -1)
+      if (command.indexOf("run") > -1 && !(command.indexOf("ToEnd") > -1))
       {
         try
         {
@@ -117,6 +117,21 @@ public class ProbabilityAnalyserKickstarter
           System.out.println("Command failed");
         }
       }
+      if ((command.indexOf("runToEnd") > -1))
+      {
+        try
+        {
+
+          inputScanner.next();
+          boolean keepRaw = inputScanner.nextBoolean();
+          analyser.runToEnd(keepRaw);
+          System.out.println("Task completed");
+          continue;
+        } catch (Exception e)
+        {
+          System.out.println("Command failed");
+        }
+      }
       // terminates program. Syntax is terminate.
       if (command.indexOf("terminate") > -1)
       {
@@ -163,6 +178,26 @@ public class ProbabilityAnalyserKickstarter
           continue;
 
       }
+      if (command.indexOf("setFamilies") > -1)
+      {
+        inputScanner.next();
+        analyser.setFamilies(inputScanner.next());
+        continue;
+
+      }
+
+      if (command.indexOf("setHMMs") > -1)
+      {
+        inputScanner.next();
+        analyser.setHmms(inputScanner.next());
+        continue;
+
+      }
+      if (command.indexOf("hmmbuild") > -1)
+      {
+        analyser.hmmBuild();
+        continue;
+      }
     }