Merge remote-tracking branch 'origin/tasks/JAL-3070_wsinterfaces' into alpha/JAL...
[jalview.git] / src / jalview / util / HMMProbabilityDistributionAnalyser.java
index 7d66878..66ae552 100644 (file)
@@ -48,13 +48,13 @@ public class HMMProbabilityDistributionAnalyser
   Map<String, Double> binned = new HashMap<>();
 
   // location of the family file
-  String families = "H:/Desktop//PFAM/Family/SeedFamilies.seed";
+  String families = "/media/sf_Shared_Folder/PFAM/Family/SeedFamilies.seed";
 
   // location of the file containing the family-clan links
-  final static String FAMILIESTOCLAN = "H:/Desktop//PFAM/Family/Clanlinks.dat";
+  final static String FAMILIESTOCLAN = "/media/sf_Shared_Folder/PFAM/Family/Clanlinks.dat";
 
   // location of the HMM file
-  String hmms = "H:/Desktop//PFAM/HMMs/Pfam-A.hmm";
+  String hmms = "/media/sf_Shared_Folder/PFAM/HMMs/Pfam-A.hmm";
 
   // suffix for raw file
   final static String RAW = "/Raw.csv";
@@ -185,7 +185,8 @@ public class HMMProbabilityDistributionAnalyser
    *          The number of families to read before saving.
    * @throws IOException
    */
-  public void runToEnd(boolean keepRawData, boolean forClans)
+  public void runToEnd(int minCount, int maxCount, boolean keepRawData,
+          boolean forClans)
           throws IOException
   {
     keepRaw = keepRawData;
@@ -193,18 +194,25 @@ public class HMMProbabilityDistributionAnalyser
     BufferedReader inputHMM = null;
     int size = 0;
     int files = 1;
+    try
+    {
     if (forClans)
     {
-      files = 604;
+        files = 603;
     }
     int filesRead = 0;
     for (int clan = 0; clan < files; clan++)
     {
+      System.out.println(clan);
       String clanPath = "";
       int numberOfFamilies = 0;
       if (forClans)
       {
         clanPath = currentFolder + "/Clan" + clan;
+          if (!new File(clanPath).exists())
+          {
+            continue;
+          }
         BufferedReader famCountReader = new BufferedReader(
                 new FileReader(clanPath + "/NumberOfFamilies.txt"));
         numberOfFamilies = Integer.parseInt(famCountReader.readLine());
@@ -234,16 +242,25 @@ public class HMMProbabilityDistributionAnalyser
           readHMM(inputHMM);
 
         int count = countValidResidues();
-        processData(count);
+            if (count >= minCount && count < maxCount)
+            {
+              processData(count);
+            }
         filesRead++;
           System.out.println(filesRead);
       endReached = atEnd(inputSTO);
       }
       }
     }
-    exportData(currentFolder);
-    raw.clear();
-    binned.clear();
+    } catch (Exception e)
+    {
+      e.printStackTrace();
+    } finally
+    {
+      exportData(currentFolder);
+      raw.clear();
+      binned.clear();
+    }
   }
 
   /**
@@ -356,10 +373,10 @@ public class HMMProbabilityDistributionAnalyser
       {
         char symbol;
         int alignPos;
-        alignPos = hmm.getNodeAlignmentColumn(length);
+        alignPos = hmm.getNodeMapPosition(length);
 
         symbol = sequences.get(width).getCharAt(alignPos);
-        if (ResidueProperties.aminoBackgroundFrequencies
+        if (ResidueProperties.backgroundFrequencies.get("amino")
                 .containsKey(symbol))
         {
           count++;
@@ -383,24 +400,29 @@ public class HMMProbabilityDistributionAnalyser
       raw.add(new ArrayList<Double>());
       rawPos = raw.size() - 1;
     }
-
+    Double total = 0d;
     for (int width = 0; width < sequences.size(); width++)
     {
       for (int length = 1; length < hmm.getLength() + 1; length++)
       {
         char symbol;
         int alignPos;
-        alignPos = hmm.getNodeAlignmentColumn(length);
+        alignPos = hmm.getNodeMapPosition(length);
         
         symbol = sequences.get(width).getCharAt(alignPos);
-        if (ResidueProperties.aminoBackgroundFrequencies
+        if (ResidueProperties.backgroundFrequencies.get("amino")
                 .containsKey(symbol))
         {
           Double prob;
           Float bfreq;
           Double llr;
           prob = hmm.getMatchEmissionProbability(alignPos, symbol);
-          bfreq = ResidueProperties.aminoBackgroundFrequencies.get(symbol);
+          bfreq = ResidueProperties.backgroundFrequencies.get("amino")
+                  .get(symbol);
+          if (prob == 0 || bfreq == 0)
+          {
+            System.out.println("error");
+          }
           llr = Math.log(prob / bfreq);
           if (keepRaw)
           {
@@ -409,6 +431,7 @@ public class HMMProbabilityDistributionAnalyser
 
           String output;
           output = String.format("%.1f", llr);
+          total += Double.parseDouble(output);
           if ("-0.0".equals(output))
           {
             output = "0.0";
@@ -427,6 +450,7 @@ public class HMMProbabilityDistributionAnalyser
         }
       }
     }
+    System.out.println(total / count);
   }
 
 
@@ -462,13 +486,8 @@ public class HMMProbabilityDistributionAnalyser
   {
     FileParse parserHMM = new FileParse(inputHMM, "", DataSourceType.FILE);
     HMMFile file = new HMMFile(parserHMM);
-    file.parse();
     hmm = file.getHMM();
 
-    if (reference != null)
-    {
-      hmm.mapToReferenceAnnotation(reference);
-    }
 
   }
 
@@ -709,6 +728,7 @@ public class HMMProbabilityDistributionAnalyser
     BufferedReader familyReader = new BufferedReader(
             new FileReader(families));
     BufferedReader hmmReader = new BufferedReader(new FileReader(hmms));
+    int families = 0;
     // moveLocationBy(7000, familyReader);
     // moveLocationBy(7000, clanFinder);
     // moveLocationBy(7000, hmmReader);
@@ -721,10 +741,6 @@ public class HMMProbabilityDistributionAnalyser
     
     while (!"".equals(line) && !" ".equals(line) && line != null)
     {
-      if (line.contains("HATP") || line.contains("CL0025"))
-      {
-        System.out.println(filePos);
-      }
      String clanName;
       boolean inClan = false;
      while (!(line.indexOf("//") > -1))
@@ -732,6 +748,8 @@ public class HMMProbabilityDistributionAnalyser
        
       if (line.indexOf("#=GF CL") > -1)
       {
+          families++;
+          System.out.println(families);
           inClan = true;
         Scanner scanner = new Scanner(line);
         scanner.next();
@@ -816,7 +834,7 @@ public class HMMProbabilityDistributionAnalyser
     int alignmentsExported = 0;
     for (int clan = 0; clan < 604; clan++)
     {
-
+      System.out.println(clan);
       int famCount = 0;
       String clanPath = clansLocation + "/Clan" + clan;
       int numberOfFamilies;
@@ -846,7 +864,7 @@ public class HMMProbabilityDistributionAnalyser
         int hmmIndex = getRandom(indexes);
         String famPath = clanPath + "/Families/Fam" + family + ".sto";
         String hmmPath = clanPath + "/HMMs/HMM" + hmmIndex + ".hmm";
-        String command = "H:/Desktop//hmmer/binaries/hmmalign --mapali "
+        String command = "/media/sf_Shared_Folder/hmmer/binaries/hmmalign --mapali "
                 + clanPath + "/Families/Fam" + hmmIndex + ".sto"
                 + " --trim ";
         command += hmmPath + " ";
@@ -902,8 +920,6 @@ public class HMMProbabilityDistributionAnalyser
 
         alignmentsExported++;
 
-        System.out.println(alignmentsExported + " alignments exported");
-        System.out.println("At clan " + clan);
 
       }
       PrintWriter writer = new PrintWriter(