X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Futil%2FHMMProbabilityDistributionAnalyser.java;h=24b3bc3cbf288e41da1430b58c841b6fb79f2dd5;hb=d6cace53173ae859bfd93f5e8a13be427864afd1;hp=520c87454d3acf7e8e526fa35d5a41b5fe8638dc;hpb=0772d75dd19824c5d960bd16f8e9efbb977c4b53;p=jalview.git diff --git a/src/jalview/util/HMMProbabilityDistributionAnalyser.java b/src/jalview/util/HMMProbabilityDistributionAnalyser.java index 520c874..24b3bc3 100644 --- a/src/jalview/util/HMMProbabilityDistributionAnalyser.java +++ b/src/jalview/util/HMMProbabilityDistributionAnalyser.java @@ -1,5 +1,6 @@ package jalview.util; +import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.HiddenMarkovModel; import jalview.datamodel.SequenceI; import jalview.io.DataSourceType; @@ -34,6 +35,7 @@ import java.util.Vector; */ public class HMMProbabilityDistributionAnalyser { + AlignmentAnnotation reference = null; Vector sequences; @@ -46,13 +48,13 @@ public class HMMProbabilityDistributionAnalyser Map binned = new HashMap<>(); // location of the family file - String families = "H:/Desktop/PFAM/Family/SeedFamilies.seed"; + String families = "/media/sf_Shared_Folder/PFAM/Family/SeedFamilies.seed"; // location of the file containing the family-clan links - final static String FAMILIESTOCLAN = "H:/Desktop/PFAM/Family/Clanlinks.dat"; + final static String FAMILIESTOCLAN = "/media/sf_Shared_Folder/PFAM/Family/Clanlinks.dat"; // location of the HMM file - String hmms = "H:/Desktop/PFAM/HMMs/Pfam-A.hmm"; + String hmms = "/media/sf_Shared_Folder/PFAM/HMMs/Pfam-A.hmm"; // suffix for raw file final static String RAW = "/Raw.csv"; @@ -103,6 +105,7 @@ public class HMMProbabilityDistributionAnalyser while (!"//".equals(line)) { line = br.readLine(); + } } @@ -111,7 +114,8 @@ public class HMMProbabilityDistributionAnalyser /** * Analyses a specified number of families and then saves the data. Before * analysing the data, the previous saved data will be imported and after - * analysing this, the data is exported back into the file. + * analysing this, the data is exported back into the file. The file must be + * in flat file format. * * @param increments * The number of families to read before saving. @@ -175,68 +179,84 @@ public class HMMProbabilityDistributionAnalyser /** * Analyses all families and then saves the data. Before analysing the data, * the previous saved data will be imported and after analysing this, the data - * is exported back into the file. + * is exported back into the file. The file must be in flat file format. * * @param increments * The number of families to read before saving. * @throws IOException */ - public void runToEnd(boolean keepRawData) throws IOException + public void runToEnd(boolean keepRawData, boolean forClans) + throws IOException { keepRaw = keepRawData; BufferedReader inputSTO = null; BufferedReader inputHMM = null; int size = 0; + int files = 1; try { - readPreviousData(currentFolder); - BufferedReader posReader = new BufferedReader( - new FileReader(currentFolder + "/CurrentPosition.txt")); - - String line = posReader.readLine(); - posReader.close(); - currentFilePosition = Integer.parseInt(line); - readPreviousData(currentFolder); - - inputSTO = new BufferedReader(new FileReader(families)); - inputHMM = new BufferedReader(new FileReader(hmms)); - } catch (Exception e) + if (forClans) { - System.out.println("No or incomplete previous data found"); + files = 603; } + int filesRead = 0; + for (int clan = 0; clan < files; clan++) + { + System.out.println(clan); + String clanPath = ""; + int numberOfFamilies = 0; + if (forClans) + { + clanPath = currentFolder + "/Clan" + clan; + if (!new File(clanPath).exists()) + { + continue; + } + BufferedReader famCountReader = new BufferedReader( + new FileReader(clanPath + "/NumberOfFamilies.txt")); + numberOfFamilies = Integer.parseInt(famCountReader.readLine()); + } + else + { + numberOfFamilies = 1; + } + + for (int fam = 0; fam < numberOfFamilies; fam++) + { + if (forClans) + { + families = clanPath + "/Families/Fam" + fam + ".sto"; + hmms = clanPath + "/HMMs/HMM" + fam + ".hmm"; + } - + inputSTO = new BufferedReader(new FileReader(families)); + inputHMM = new BufferedReader(new FileReader(hmms)); - moveLocationBy(currentFilePosition, inputHMM); - moveLocationBy(currentFilePosition, inputSTO); - int filesRead = 0; - int i = 0; - boolean endReached = atEnd(inputSTO); - while (!endReached) - { - readStockholm(inputSTO); - readHMM(inputHMM); + int i = 0; + boolean endReached = atEnd(inputSTO); + while (!endReached) + { + readStockholm(inputSTO); + readHMM(inputHMM); int count = countValidResidues(); processData(count); filesRead++; - - currentFilePosition++; - System.out.println(i); - i++; + System.out.println(filesRead); endReached = atEnd(inputSTO); } - - - PrintWriter p = new PrintWriter( - new File(currentFolder + "/CurrentPosition.txt")); - p.print(currentFilePosition); - p.close(); - exportData(currentFolder); - raw.clear(); - binned.clear(); - + } + } + } catch (Exception e) + { + e.printStackTrace(); + } finally + { + exportData(currentFolder); + raw.clear(); + binned.clear(); + } } /** @@ -376,7 +396,7 @@ public class HMMProbabilityDistributionAnalyser raw.add(new ArrayList()); rawPos = raw.size() - 1; } - + Double total = 0d; for (int width = 0; width < sequences.size(); width++) { for (int length = 1; length < hmm.getLength() + 1; length++) @@ -394,6 +414,10 @@ public class HMMProbabilityDistributionAnalyser Double llr; prob = hmm.getMatchEmissionProbability(alignPos, symbol); bfreq = ResidueProperties.aminoBackgroundFrequencies.get(symbol); + if (prob == 0 || bfreq == 0) + { + System.out.println("error"); + } llr = Math.log(prob / bfreq); if (keepRaw) { @@ -402,6 +426,7 @@ public class HMMProbabilityDistributionAnalyser String output; output = String.format("%.1f", llr); + total += Double.parseDouble(output); if ("-0.0".equals(output)) { output = "0.0"; @@ -420,6 +445,7 @@ public class HMMProbabilityDistributionAnalyser } } } + System.out.println(total / count); } @@ -433,6 +459,15 @@ public class HMMProbabilityDistributionAnalyser { FileParse parserSTO = new FileParse(inputSTO, "", DataSourceType.FILE); StockholmFile file = new StockholmFile(parserSTO); + Vector annots = file.getAnnotations(); + + for (AlignmentAnnotation annot : annots) + { + if (annot.label.contains("Reference")) + { + reference = annot; + } + } sequences = file.getSeqs(); } @@ -446,9 +481,9 @@ public class HMMProbabilityDistributionAnalyser { FileParse parserHMM = new FileParse(inputHMM, "", DataSourceType.FILE); HMMFile file = new HMMFile(parserHMM); - file.parse(); hmm = file.getHMM(); + } /** @@ -634,7 +669,7 @@ public class HMMProbabilityDistributionAnalyser } /** - * Prints the specified family to a .hmm file in the current directory. + * Prints the specified family to a .hmm file. * * @param index * @throws IOException @@ -688,6 +723,10 @@ public class HMMProbabilityDistributionAnalyser BufferedReader familyReader = new BufferedReader( new FileReader(families)); BufferedReader hmmReader = new BufferedReader(new FileReader(hmms)); + int families = 0; + // moveLocationBy(7000, familyReader); + // moveLocationBy(7000, clanFinder); + // moveLocationBy(7000, hmmReader); HashMap clanIndexes = new HashMap<>(); ArrayList familyCounts = new ArrayList<>(); int filePos = 0; @@ -704,6 +743,8 @@ public class HMMProbabilityDistributionAnalyser if (line.indexOf("#=GF CL") > -1) { + families++; + System.out.println(families); inClan = true; Scanner scanner = new Scanner(line); scanner.next(); @@ -745,7 +786,7 @@ public class HMMProbabilityDistributionAnalyser moveLocationBy(1, hmmReader); } filePos++; - System.out.println(filePos + " files read."); + // System.out.println(filePos + " files read."); line = clanFinder.readLine(); } @@ -788,6 +829,7 @@ public class HMMProbabilityDistributionAnalyser int alignmentsExported = 0; for (int clan = 0; clan < 604; clan++) { + System.out.println(clan); int famCount = 0; String clanPath = clansLocation + "/Clan" + clan; int numberOfFamilies; @@ -796,7 +838,11 @@ public class HMMProbabilityDistributionAnalyser String line = br.readLine(); numberOfFamilies = Integer.parseInt(line); br.close(); - String commandExportLocation = exportLocation + "/Clan" + clan; + if (numberOfFamilies == 1) + { + continue; + } + final String commandExportLocation = exportLocation + "/Clan" + clan; createFolders(commandExportLocation); for (int family = 0; family < numberOfFamilies; family++) { @@ -809,14 +855,16 @@ public class HMMProbabilityDistributionAnalyser indexes.add(i); } } + int hmmIndex = getRandom(indexes); String famPath = clanPath + "/Families/Fam" + family + ".sto"; String hmmPath = clanPath + "/HMMs/HMM" + hmmIndex + ".hmm"; - String command = "H:/Documents/hmmalign -o " + commandExportLocation - + "/Fam" + family + ".sto "; + String command = "/media/sf_Shared_Folder/hmmer/binaries/hmmalign --mapali " + + clanPath + "/Families/Fam" + hmmIndex + ".sto" + + " --trim "; command += hmmPath + " "; command += famPath; - + final int familyIndex = family; final Process p = Runtime.getRuntime().exec(command); new Thread(new Runnable() @@ -830,10 +878,25 @@ public class HMMProbabilityDistributionAnalyser try { + PrintWriter writer = new PrintWriter(commandExportLocation + + "/Families/Fam" + familyIndex + ".sto"); + String lastLine = ""; + boolean dataFound = false; while ((line = input.readLine()) != null) { - System.out.println(line); + if (line.contains("#=GR") && !dataFound) + { + writer.println(lastLine); + dataFound = true; + } + if (line.contains("#") || dataFound || " ".equals(line) + || "".equals(line) || "//".equals(line)) + { + writer.println(line); + } + lastLine = line; } + writer.close(); } catch (IOException e) { e.printStackTrace(); @@ -843,12 +906,15 @@ public class HMMProbabilityDistributionAnalyser p.waitFor(); - exportHMM(hmmIndex, - commandExportLocation + "/HMMs/HMM" + family + ".hmm"); + BufferedReader hmmExporter = new BufferedReader( + new FileReader(hmmPath)); + + exportFile(hmmExporter, + commandExportLocation + "/HMMs/HMM" + family + ".hmm", + false); alignmentsExported++; - System.out.println(alignmentsExported + " alignments exported"); } PrintWriter writer = new PrintWriter( @@ -874,6 +940,10 @@ public class HMMProbabilityDistributionAnalyser public int getRandom(ArrayList list) { + if (!(list.size() > 0)) + { + System.out.println("Error - size = " + list.size()); + } int index = generator.nextInt(list.size()); int value = list.get(index); list.remove(index);