JAL-2616 add ability to perform random alignments within clans
authorTZVanaalten <TZVanaalten@LS30916.ad.lifesci.dundee.ac.uk>
Tue, 25 Jul 2017 11:28:50 +0000 (12:28 +0100)
committerTZVanaalten <TZVanaalten@LS30916.ad.lifesci.dundee.ac.uk>
Tue, 25 Jul 2017 11:28:50 +0000 (12:28 +0100)
src/jalview/util/HMMProbabilityDistributionAnalyser.java
src/jalview/util/ProbabilityAnalyserKickstarter.java
test/jalview/util/HMMProbabilityDistributionAnalyserTest.java

index 6c1932a..520c874 100644 (file)
@@ -14,11 +14,13 @@ import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.FileReader;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.io.PrintWriter;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Random;
 import java.util.Scanner;
 import java.util.Vector;
 
@@ -66,6 +68,8 @@ public class HMMProbabilityDistributionAnalyser
 
   final static String NL = "\n";
 
+  Random generator = new Random();
+
   // current directory
   String currentFolder;
 
@@ -145,13 +149,9 @@ public class HMMProbabilityDistributionAnalyser
     while (filesRead < increments)
     {
 
-      FileParse parserSTO = new FileParse(inputSTO, "",
-              DataSourceType.FILE);
-      readStockholm(parserSTO);
+      readStockholm(inputSTO);
 
-      FileParse parserHMM = new FileParse(inputHMM, "",
-              DataSourceType.FILE);
-      readHMM(parserHMM);
+      readHMM(inputHMM);
 
         int count = countValidResidues();
         processData(count);
@@ -212,22 +212,11 @@ public class HMMProbabilityDistributionAnalyser
 
     int filesRead = 0;
     int i = 0;
-    inputSTO.mark(20);
-    String check = inputSTO.readLine();
-    inputSTO.reset();
-    while (!"".equals(check) && !" ".equals(check) && check != null)
+    boolean endReached = atEnd(inputSTO);
+    while (!endReached)
       {
-      inputSTO.mark(20);
-      String line = inputSTO.readLine();
-      inputSTO.reset();
-
-        FileParse parserSTO = new FileParse(inputSTO, "",
-                DataSourceType.FILE);
-        readStockholm(parserSTO);
-
-        FileParse parserHMM = new FileParse(inputHMM, "",
-                DataSourceType.FILE);
-        readHMM(parserHMM);
+      readStockholm(inputSTO);
+      readHMM(inputHMM);
 
         int count = countValidResidues();
         processData(count);
@@ -236,9 +225,7 @@ public class HMMProbabilityDistributionAnalyser
         currentFilePosition++;
         System.out.println(i);
         i++;
-      inputSTO.mark(20);
-      check = inputSTO.readLine();
-      inputSTO.reset();
+      endReached = atEnd(inputSTO);
       }
 
 
@@ -442,9 +429,10 @@ public class HMMProbabilityDistributionAnalyser
    * @param source
    * @throws IOException
    */
-  public void readStockholm(FileParse source) throws IOException
+  public void readStockholm(BufferedReader inputSTO) throws IOException
   {
-    StockholmFile file = new StockholmFile(source);
+    FileParse parserSTO = new FileParse(inputSTO, "", DataSourceType.FILE);
+    StockholmFile file = new StockholmFile(parserSTO);
     sequences = file.getSeqs();
   }
 
@@ -454,10 +442,10 @@ public class HMMProbabilityDistributionAnalyser
    * @param source
    * @throws IOException
    */
-  public void readHMM(FileParse source) throws IOException
+  public void readHMM(BufferedReader inputHMM) throws IOException
   {
-
-    HMMFile file = new HMMFile(source);
+    FileParse parserHMM = new FileParse(inputHMM, "", DataSourceType.FILE);
+    HMMFile file = new HMMFile(parserHMM);
     file.parse();
     hmm = file.getHMM();
 
@@ -592,12 +580,12 @@ public class HMMProbabilityDistributionAnalyser
 
   }
 
-  public void exportFile(BufferedReader br, String location)
+  public void exportFile(BufferedReader br, String location, boolean append)
           throws IOException
   {
     String line = br.readLine();
     PrintWriter writer = new PrintWriter(
-            new FileOutputStream(new File(location), true));
+            new FileOutputStream(location, append));
     while (!"//".equals(line))
     {
       writer.println(line);
@@ -701,6 +689,7 @@ public class HMMProbabilityDistributionAnalyser
             new FileReader(families));
     BufferedReader hmmReader = new BufferedReader(new FileReader(hmms));
     HashMap<String, Integer> clanIndexes = new HashMap<>();
+    ArrayList<Integer> familyCounts = new ArrayList<>();
     int filePos = 0; 
     int clanCount = 0;
     String line;
@@ -726,23 +715,29 @@ public class HMMProbabilityDistributionAnalyser
         {
           clanIndexes.put(clanName, clanCount);
             clanCount++;
+            familyCounts.add(0);
         }
 
 
           Integer clanI = clanIndexes.get(clanName);
           String clanPath = directory + "/Clan" + clanI.toString();
-          File clanFolder = new File(clanPath);
-          String famPath = clanPath + "/Families.sto";
-          String hmmPath = clanPath + "/HMMs.hmm";
-          if (!clanFolder.exists())
-        {
-            clanFolder.mkdir();
-        }
-          exportFile(familyReader, famPath);
-          exportFile(hmmReader, hmmPath);
-
+          createFolders(clanPath);
+
+          int index = clanIndexes.get(clanName);
+          exportFile(familyReader,
+                  clanPath + "/Families/Fam" + familyCounts.get(index)
+                          + ".sto",
+                  false);
+          exportFile(hmmReader,
+                  clanPath + "/HMMs/HMM" + familyCounts.get(index) + ".hmm",
+                  false);
+
+          int count = familyCounts.get(index);
+          count++;
+          familyCounts.set(index, count);
       }
         line = clanFinder.readLine();
+
       }
       if (!inClan)
       {
@@ -755,6 +750,15 @@ public class HMMProbabilityDistributionAnalyser
 
      }
     clanFinder.close();
+
+    for (int clan = 0; clan < clanCount; clan++)
+    {
+      PrintWriter writer = new PrintWriter(
+              directory + "/Clan" + clan + "/NumberOfFamilies.txt");
+      int count = familyCounts.get(clan);
+      writer.print(count);
+      writer.close();
+    }
       
     }
 
@@ -778,8 +782,122 @@ public class HMMProbabilityDistributionAnalyser
     this.hmms = currentFolder + hmms;
   }
     
+  public void alignWithinClan(String exportLocation, String clansLocation)
+          throws IOException, InterruptedException
+  {
+    int alignmentsExported = 0;
+    for (int clan = 0; clan < 604; clan++)
+    {
+      int famCount = 0;
+      String clanPath = clansLocation + "/Clan" + clan;
+      int numberOfFamilies;
+      BufferedReader br = new BufferedReader(
+              new FileReader(clanPath + "/NumberOfFamilies.txt"));
+      String line = br.readLine();
+      numberOfFamilies = Integer.parseInt(line);
+      br.close();
+      String commandExportLocation = exportLocation + "/Clan" + clan;
+      createFolders(commandExportLocation);
+      for (int family = 0; family < numberOfFamilies; family++)
+      {
+        famCount++;
+        ArrayList<Integer> indexes = new ArrayList<>();
+        for (int i = 0; i < numberOfFamilies; i++)
+        {
+          if (i != family)
+          {
+            indexes.add(i);
+          }
+        }
+        int hmmIndex = getRandom(indexes);
+        String famPath = clanPath + "/Families/Fam" + family + ".sto";
+        String hmmPath = clanPath + "/HMMs/HMM" + hmmIndex + ".hmm";
+        String command = "H:/Documents/hmmalign -o " + commandExportLocation
+                + "/Fam" + family + ".sto ";
+        command += hmmPath + " ";
+        command += famPath;
+
+        final Process p = Runtime.getRuntime().exec(command);
+
+        new Thread(new Runnable()
+        {
+          @Override
+          public void run()
+          {
+            BufferedReader input = new BufferedReader(
+                    new InputStreamReader(p.getInputStream()));
+            String line = null;
+
+            try
+            {
+              while ((line = input.readLine()) != null)
+              {
+                System.out.println(line);
+              }
+            } catch (IOException e)
+            {
+              e.printStackTrace();
+            }
+          }
+        }).start();
 
+        p.waitFor();
+
+        exportHMM(hmmIndex,
+                commandExportLocation + "/HMMs/HMM" + family + ".hmm");
+
+        alignmentsExported++;
+
+        System.out.println(alignmentsExported + " alignments exported");
+
+      }
+      PrintWriter writer = new PrintWriter(
+              commandExportLocation + "/NumberOfFamilies.txt");
+      writer.print(famCount);
+      writer.close();
+    }
 
   }
 
+  public boolean atEnd(BufferedReader br) throws IOException
+  {
+    boolean end = false;
+    br.mark(80);
+    String line = br.readLine();
+    if ("".equals(line) || line == null)
+    {
+      end = true;
+    }
+    br.reset();
+    return end;
+  }
+
+  public int getRandom(ArrayList<Integer> list)
+  {
+    int index = generator.nextInt(list.size());
+    int value = list.get(index);
+    list.remove(index);
+    return value;
+  }
+
+  public void createFolders(String clanPath)
+  {
+    File clanFolder = new File(clanPath);
+    if (!clanFolder.exists())
+    {
+      clanFolder.mkdir();
+    }
+
+    File famFolder = new File(clanPath + "/Families");
+    File hmmFolder = new File(clanPath + "/HMMs");
+    if (!famFolder.exists())
+    {
+      famFolder.mkdir();
+      hmmFolder.mkdir();
+    }
+  }
+}
+
+
+
 
index 999dc1f..ffc1dec 100644 (file)
@@ -116,6 +116,7 @@ public class ProbabilityAnalyserKickstarter
         {
           System.out.println("Command failed");
         }
+        continue;
       }
       if ((command.indexOf("runToEnd") > -1))
       {
@@ -126,11 +127,11 @@ public class ProbabilityAnalyserKickstarter
           boolean keepRaw = inputScanner.nextBoolean();
           analyser.runToEnd(keepRaw);
           System.out.println("Task completed");
-          continue;
         } catch (Exception e)
         {
           System.out.println("Command failed");
         }
+        continue;
       }
       // terminates program. Syntax is terminate.
       if (command.indexOf("terminate") > -1)
@@ -155,7 +156,9 @@ public class ProbabilityAnalyserKickstarter
         } catch (Exception e)
         {
           System.out.println("Command failed");
+
         }
+        continue;
       }
 
       if (command.indexOf("getFamName") > -1)
@@ -164,12 +167,12 @@ public class ProbabilityAnalyserKickstarter
         {
         inputScanner.next();
         System.out.println(analyser.getFamilyName(inputScanner.nextInt()));
-          inputScanner.close();
-          continue;
+
         } catch (Exception e)
         {
           System.out.println("Command failed");
         }
+        continue;
       }
       if (command.indexOf("sortIntoClans") > -1)
       {
@@ -193,11 +196,18 @@ public class ProbabilityAnalyserKickstarter
         continue;
 
       }
-      if (command.indexOf("hmmbuild") > -1)
+
+      if (command.indexOf("alignWithinClans") > -1)
       {
-        analyser.hmmBuild();
+        inputScanner.next();
+        String export = inputScanner.next();
+        String clans = inputScanner.next();
+        analyser.alignWithinClan(export, clans);
         continue;
+
       }
+
+      System.out.println("Unrecognised command");
     }
 
 
index 815ea66..3e775cc 100644 (file)
@@ -7,8 +7,6 @@ import jalview.datamodel.HMMNode;
 import jalview.datamodel.HiddenMarkovModel;
 import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceI;
-import jalview.io.DataSourceType;
-import jalview.io.FileParse;
 
 import java.io.BufferedReader;
 import java.io.FileReader;
@@ -103,10 +101,8 @@ public class HMMProbabilityDistributionAnalyserTest {
             "test/jalview/util/test_Fams_for_probability_analysis"));
     BufferedReader brHMM = new BufferedReader(new FileReader(
             "test/jalview/util/test_HMMs_for_probability_analysis"));
-    FileParse parserFam = new FileParse(brFam, "", DataSourceType.FILE);
-    FileParse parserHMM = new FileParse(brHMM, "", DataSourceType.FILE);
-    analyser.readStockholm(parserFam);
-    analyser.readHMM(parserHMM);
+    analyser.readStockholm(brFam);
+    analyser.readHMM(brHMM);
     analyser.processData(6);
     Map<String, Double> map = analyser.binned;
     List<ArrayList<Double>> list = analyser.raw;