From 0772d75dd19824c5d960bd16f8e9efbb977c4b53 Mon Sep 17 00:00:00 2001 From: TZVanaalten Date: Tue, 25 Jul 2017 12:28:50 +0100 Subject: [PATCH] JAL-2616 add ability to perform random alignments within clans --- .../util/HMMProbabilityDistributionAnalyser.java | 200 ++++++++++++++++---- .../util/ProbabilityAnalyserKickstarter.java | 20 +- .../HMMProbabilityDistributionAnalyserTest.java | 8 +- 3 files changed, 176 insertions(+), 52 deletions(-) diff --git a/src/jalview/util/HMMProbabilityDistributionAnalyser.java b/src/jalview/util/HMMProbabilityDistributionAnalyser.java index 6c1932a..520c874 100644 --- a/src/jalview/util/HMMProbabilityDistributionAnalyser.java +++ b/src/jalview/util/HMMProbabilityDistributionAnalyser.java @@ -14,11 +14,13 @@ import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.io.PrintWriter; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Random; import java.util.Scanner; import java.util.Vector; @@ -66,6 +68,8 @@ public class HMMProbabilityDistributionAnalyser final static String NL = "\n"; + Random generator = new Random(); + // current directory String currentFolder; @@ -145,13 +149,9 @@ public class HMMProbabilityDistributionAnalyser while (filesRead < increments) { - FileParse parserSTO = new FileParse(inputSTO, "", - DataSourceType.FILE); - readStockholm(parserSTO); + readStockholm(inputSTO); - FileParse parserHMM = new FileParse(inputHMM, "", - DataSourceType.FILE); - readHMM(parserHMM); + readHMM(inputHMM); int count = countValidResidues(); processData(count); @@ -212,22 +212,11 @@ public class HMMProbabilityDistributionAnalyser int filesRead = 0; int i = 0; - inputSTO.mark(20); - String check = inputSTO.readLine(); - inputSTO.reset(); - while (!"".equals(check) && !" ".equals(check) && check != null) + boolean endReached = atEnd(inputSTO); + while (!endReached) { - inputSTO.mark(20); - String line = inputSTO.readLine(); - inputSTO.reset(); - - FileParse parserSTO = new FileParse(inputSTO, "", - DataSourceType.FILE); - readStockholm(parserSTO); - - FileParse parserHMM = new FileParse(inputHMM, "", - DataSourceType.FILE); - readHMM(parserHMM); + readStockholm(inputSTO); + readHMM(inputHMM); int count = countValidResidues(); processData(count); @@ -236,9 +225,7 @@ public class HMMProbabilityDistributionAnalyser currentFilePosition++; System.out.println(i); i++; - inputSTO.mark(20); - check = inputSTO.readLine(); - inputSTO.reset(); + endReached = atEnd(inputSTO); } @@ -442,9 +429,10 @@ public class HMMProbabilityDistributionAnalyser * @param source * @throws IOException */ - public void readStockholm(FileParse source) throws IOException + public void readStockholm(BufferedReader inputSTO) throws IOException { - StockholmFile file = new StockholmFile(source); + FileParse parserSTO = new FileParse(inputSTO, "", DataSourceType.FILE); + StockholmFile file = new StockholmFile(parserSTO); sequences = file.getSeqs(); } @@ -454,10 +442,10 @@ public class HMMProbabilityDistributionAnalyser * @param source * @throws IOException */ - public void readHMM(FileParse source) throws IOException + public void readHMM(BufferedReader inputHMM) throws IOException { - - HMMFile file = new HMMFile(source); + FileParse parserHMM = new FileParse(inputHMM, "", DataSourceType.FILE); + HMMFile file = new HMMFile(parserHMM); file.parse(); hmm = file.getHMM(); @@ -592,12 +580,12 @@ public class HMMProbabilityDistributionAnalyser } - public void exportFile(BufferedReader br, String location) + public void exportFile(BufferedReader br, String location, boolean append) throws IOException { String line = br.readLine(); PrintWriter writer = new PrintWriter( - new FileOutputStream(new File(location), true)); + new FileOutputStream(location, append)); while (!"//".equals(line)) { writer.println(line); @@ -701,6 +689,7 @@ public class HMMProbabilityDistributionAnalyser new FileReader(families)); BufferedReader hmmReader = new BufferedReader(new FileReader(hmms)); HashMap clanIndexes = new HashMap<>(); + ArrayList familyCounts = new ArrayList<>(); int filePos = 0; int clanCount = 0; String line; @@ -726,23 +715,29 @@ public class HMMProbabilityDistributionAnalyser { clanIndexes.put(clanName, clanCount); clanCount++; + familyCounts.add(0); } Integer clanI = clanIndexes.get(clanName); String clanPath = directory + "/Clan" + clanI.toString(); - File clanFolder = new File(clanPath); - String famPath = clanPath + "/Families.sto"; - String hmmPath = clanPath + "/HMMs.hmm"; - if (!clanFolder.exists()) - { - clanFolder.mkdir(); - } - exportFile(familyReader, famPath); - exportFile(hmmReader, hmmPath); - + createFolders(clanPath); + + int index = clanIndexes.get(clanName); + exportFile(familyReader, + clanPath + "/Families/Fam" + familyCounts.get(index) + + ".sto", + false); + exportFile(hmmReader, + clanPath + "/HMMs/HMM" + familyCounts.get(index) + ".hmm", + false); + + int count = familyCounts.get(index); + count++; + familyCounts.set(index, count); } line = clanFinder.readLine(); + } if (!inClan) { @@ -755,6 +750,15 @@ public class HMMProbabilityDistributionAnalyser } clanFinder.close(); + + for (int clan = 0; clan < clanCount; clan++) + { + PrintWriter writer = new PrintWriter( + directory + "/Clan" + clan + "/NumberOfFamilies.txt"); + int count = familyCounts.get(clan); + writer.print(count); + writer.close(); + } } @@ -778,8 +782,122 @@ public class HMMProbabilityDistributionAnalyser this.hmms = currentFolder + hmms; } + public void alignWithinClan(String exportLocation, String clansLocation) + throws IOException, InterruptedException + { + int alignmentsExported = 0; + for (int clan = 0; clan < 604; clan++) + { + int famCount = 0; + String clanPath = clansLocation + "/Clan" + clan; + int numberOfFamilies; + BufferedReader br = new BufferedReader( + new FileReader(clanPath + "/NumberOfFamilies.txt")); + String line = br.readLine(); + numberOfFamilies = Integer.parseInt(line); + br.close(); + String commandExportLocation = exportLocation + "/Clan" + clan; + createFolders(commandExportLocation); + for (int family = 0; family < numberOfFamilies; family++) + { + famCount++; + ArrayList indexes = new ArrayList<>(); + for (int i = 0; i < numberOfFamilies; i++) + { + if (i != family) + { + indexes.add(i); + } + } + int hmmIndex = getRandom(indexes); + String famPath = clanPath + "/Families/Fam" + family + ".sto"; + String hmmPath = clanPath + "/HMMs/HMM" + hmmIndex + ".hmm"; + String command = "H:/Documents/hmmalign -o " + commandExportLocation + + "/Fam" + family + ".sto "; + command += hmmPath + " "; + command += famPath; + + final Process p = Runtime.getRuntime().exec(command); + + new Thread(new Runnable() + { + @Override + public void run() + { + BufferedReader input = new BufferedReader( + new InputStreamReader(p.getInputStream())); + String line = null; + + try + { + while ((line = input.readLine()) != null) + { + System.out.println(line); + } + } catch (IOException e) + { + e.printStackTrace(); + } + } + }).start(); + p.waitFor(); + + exportHMM(hmmIndex, + commandExportLocation + "/HMMs/HMM" + family + ".hmm"); + + alignmentsExported++; + + System.out.println(alignmentsExported + " alignments exported"); + + } + PrintWriter writer = new PrintWriter( + commandExportLocation + "/NumberOfFamilies.txt"); + writer.print(famCount); + writer.close(); + } } + public boolean atEnd(BufferedReader br) throws IOException + { + boolean end = false; + br.mark(80); + String line = br.readLine(); + if ("".equals(line) || line == null) + { + end = true; + } + br.reset(); + return end; + } + + public int getRandom(ArrayList list) + { + int index = generator.nextInt(list.size()); + int value = list.get(index); + list.remove(index); + return value; + } + + public void createFolders(String clanPath) + { + File clanFolder = new File(clanPath); + if (!clanFolder.exists()) + { + clanFolder.mkdir(); + } + + File famFolder = new File(clanPath + "/Families"); + File hmmFolder = new File(clanPath + "/HMMs"); + if (!famFolder.exists()) + { + famFolder.mkdir(); + hmmFolder.mkdir(); + } + } +} + + + diff --git a/src/jalview/util/ProbabilityAnalyserKickstarter.java b/src/jalview/util/ProbabilityAnalyserKickstarter.java index 999dc1f..ffc1dec 100644 --- a/src/jalview/util/ProbabilityAnalyserKickstarter.java +++ b/src/jalview/util/ProbabilityAnalyserKickstarter.java @@ -116,6 +116,7 @@ public class ProbabilityAnalyserKickstarter { System.out.println("Command failed"); } + continue; } if ((command.indexOf("runToEnd") > -1)) { @@ -126,11 +127,11 @@ public class ProbabilityAnalyserKickstarter boolean keepRaw = inputScanner.nextBoolean(); analyser.runToEnd(keepRaw); System.out.println("Task completed"); - continue; } catch (Exception e) { System.out.println("Command failed"); } + continue; } // terminates program. Syntax is terminate. if (command.indexOf("terminate") > -1) @@ -155,7 +156,9 @@ public class ProbabilityAnalyserKickstarter } catch (Exception e) { System.out.println("Command failed"); + } + continue; } if (command.indexOf("getFamName") > -1) @@ -164,12 +167,12 @@ public class ProbabilityAnalyserKickstarter { inputScanner.next(); System.out.println(analyser.getFamilyName(inputScanner.nextInt())); - inputScanner.close(); - continue; + } catch (Exception e) { System.out.println("Command failed"); } + continue; } if (command.indexOf("sortIntoClans") > -1) { @@ -193,11 +196,18 @@ public class ProbabilityAnalyserKickstarter continue; } - if (command.indexOf("hmmbuild") > -1) + + if (command.indexOf("alignWithinClans") > -1) { - analyser.hmmBuild(); + inputScanner.next(); + String export = inputScanner.next(); + String clans = inputScanner.next(); + analyser.alignWithinClan(export, clans); continue; + } + + System.out.println("Unrecognised command"); } diff --git a/test/jalview/util/HMMProbabilityDistributionAnalyserTest.java b/test/jalview/util/HMMProbabilityDistributionAnalyserTest.java index 815ea66..3e775cc 100644 --- a/test/jalview/util/HMMProbabilityDistributionAnalyserTest.java +++ b/test/jalview/util/HMMProbabilityDistributionAnalyserTest.java @@ -7,8 +7,6 @@ import jalview.datamodel.HMMNode; import jalview.datamodel.HiddenMarkovModel; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; -import jalview.io.DataSourceType; -import jalview.io.FileParse; import java.io.BufferedReader; import java.io.FileReader; @@ -103,10 +101,8 @@ public class HMMProbabilityDistributionAnalyserTest { "test/jalview/util/test_Fams_for_probability_analysis")); BufferedReader brHMM = new BufferedReader(new FileReader( "test/jalview/util/test_HMMs_for_probability_analysis")); - FileParse parserFam = new FileParse(brFam, "", DataSourceType.FILE); - FileParse parserHMM = new FileParse(brHMM, "", DataSourceType.FILE); - analyser.readStockholm(parserFam); - analyser.readHMM(parserHMM); + analyser.readStockholm(brFam); + analyser.readHMM(brHMM); analyser.processData(6); Map map = analyser.binned; List> list = analyser.raw; -- 1.7.10.2