From 4324ab9c09ac74782aef9493b98280060bccd5e8 Mon Sep 17 00:00:00 2001 From: tzvanaalten Date: Mon, 24 Jul 2017 13:44:41 +0100 Subject: [PATCH] JAL-2616 add ability to change location of family and hmm data --- .../util/HMMProbabilityDistributionAnalyser.java | 164 +++++++++++++++++--- .../util/ProbabilityAnalyserKickstarter.java | 37 ++++- 2 files changed, 177 insertions(+), 24 deletions(-) diff --git a/src/jalview/util/HMMProbabilityDistributionAnalyser.java b/src/jalview/util/HMMProbabilityDistributionAnalyser.java index b30487d..6c1932a 100644 --- a/src/jalview/util/HMMProbabilityDistributionAnalyser.java +++ b/src/jalview/util/HMMProbabilityDistributionAnalyser.java @@ -44,13 +44,13 @@ public class HMMProbabilityDistributionAnalyser Map binned = new HashMap<>(); // location of the family file - final static String FAMILIES = "H:/Desktop/PFAM/Family/SeedFamilies.seed"; + String families = "H:/Desktop/PFAM/Family/SeedFamilies.seed"; // location of the file containing the family-clan links final static String FAMILIESTOCLAN = "H:/Desktop/PFAM/Family/Clanlinks.dat"; // location of the HMM file - final static String HMMS = "H:/Desktop/PFAM/HMMs/Pfam-A.hmm"; + String hmms = "H:/Desktop/PFAM/HMMs/Pfam-A.hmm"; // suffix for raw file final static String RAW = "/Raw.csv"; @@ -59,7 +59,7 @@ public class HMMProbabilityDistributionAnalyser final static String BINNED = "/Binned.csv"; // normalisation scale - final static double SCALE = 100000; + final static double SCALE = 1; // current position in file int currentFilePosition = 0; @@ -107,7 +107,7 @@ public class HMMProbabilityDistributionAnalyser /** * Analyses a specified number of families and then saves the data. Before * analysing the data, the previous saved data will be imported and after - * analysing this data is exported back into the file. + * analysing this, the data is exported back into the file. * * @param increments * The number of families to read before saving. @@ -116,17 +116,26 @@ public class HMMProbabilityDistributionAnalyser public void run(int increments, boolean keepRawData) throws IOException { keepRaw = keepRawData; - readPreviousData(currentFolder); + try + { + readPreviousData(currentFolder); + BufferedReader posReader = new BufferedReader( + new FileReader(currentFolder + "/CurrentPosition.txt")); + + String line = posReader.readLine(); + posReader.close(); + currentFilePosition = Integer.parseInt(line); + } catch (Exception e) + { + System.out.println("No previous data found"); + } + + - BufferedReader posReader = new BufferedReader( - new FileReader(currentFolder + "/CurrentPosition.txt")); - String line = posReader.readLine(); - posReader.close(); + BufferedReader inputSTO = new BufferedReader(new FileReader(families)); + BufferedReader inputHMM = new BufferedReader(new FileReader(hmms)); - BufferedReader inputSTO = new BufferedReader(new FileReader(FAMILIES)); - BufferedReader inputHMM = new BufferedReader(new FileReader(HMMS)); - currentFilePosition = Integer.parseInt(line); moveLocationBy(currentFilePosition, inputHMM); moveLocationBy(currentFilePosition, inputSTO); @@ -164,6 +173,86 @@ public class HMMProbabilityDistributionAnalyser } /** + * Analyses all families and then saves the data. Before analysing the data, + * the previous saved data will be imported and after analysing this, the data + * is exported back into the file. + * + * @param increments + * The number of families to read before saving. + * @throws IOException + */ + public void runToEnd(boolean keepRawData) throws IOException + { + keepRaw = keepRawData; + BufferedReader inputSTO = null; + BufferedReader inputHMM = null; + int size = 0; + try + { + readPreviousData(currentFolder); + BufferedReader posReader = new BufferedReader( + new FileReader(currentFolder + "/CurrentPosition.txt")); + + String line = posReader.readLine(); + posReader.close(); + currentFilePosition = Integer.parseInt(line); + readPreviousData(currentFolder); + + inputSTO = new BufferedReader(new FileReader(families)); + inputHMM = new BufferedReader(new FileReader(hmms)); + } catch (Exception e) + { + System.out.println("No or incomplete previous data found"); + } + + + + moveLocationBy(currentFilePosition, inputHMM); + moveLocationBy(currentFilePosition, inputSTO); + + int filesRead = 0; + int i = 0; + inputSTO.mark(20); + String check = inputSTO.readLine(); + inputSTO.reset(); + while (!"".equals(check) && !" ".equals(check) && check != null) + { + inputSTO.mark(20); + String line = inputSTO.readLine(); + inputSTO.reset(); + + FileParse parserSTO = new FileParse(inputSTO, "", + DataSourceType.FILE); + readStockholm(parserSTO); + + FileParse parserHMM = new FileParse(inputHMM, "", + DataSourceType.FILE); + readHMM(parserHMM); + + int count = countValidResidues(); + processData(count); + filesRead++; + + currentFilePosition++; + System.out.println(i); + i++; + inputSTO.mark(20); + check = inputSTO.readLine(); + inputSTO.reset(); + } + + + PrintWriter p = new PrintWriter( + new File(currentFolder + "/CurrentPosition.txt")); + p.print(currentFilePosition); + p.close(); + exportData(currentFolder); + raw.clear(); + binned.clear(); + + } + + /** * Reads the previous data from both files * * @param source @@ -194,7 +283,9 @@ public class HMMProbabilityDistributionAnalyser { Scanner scanner = new Scanner(line); scanner.useDelimiter(","); - binned.put(scanner.next(), scanner.nextDouble()); + String key = scanner.next(); + String value = scanner.next(); + binned.put(key, Double.valueOf(value)); scanner.close(); line = input.readLine(); } @@ -242,6 +333,10 @@ public class HMMProbabilityDistributionAnalyser { raw.get(i).add(Double.parseDouble(value)); } + else + { + raw.get(i).add(null); + } i++; } @@ -263,7 +358,7 @@ public class HMMProbabilityDistributionAnalyser for (int width = 0; width < sequences.size(); width++) { - for (int length = 1; length < hmm.getLength(); length++) + for (int length = 1; length < hmm.getLength() + 1; length++) { char symbol; int alignPos; @@ -297,7 +392,7 @@ public class HMMProbabilityDistributionAnalyser for (int width = 0; width < sequences.size(); width++) { - for (int length = 1; length < hmm.getLength(); length++) + for (int length = 1; length < hmm.getLength() + 1; length++) { char symbol; int alignPos; @@ -431,7 +526,7 @@ public class HMMProbabilityDistributionAnalyser */ public void printFam(int index) throws IOException { - BufferedReader br = new BufferedReader(new FileReader(FAMILIES)); + BufferedReader br = new BufferedReader(new FileReader(families)); moveLocationBy(index, br); @@ -455,7 +550,7 @@ public class HMMProbabilityDistributionAnalyser */ public void printHMM(int index) throws IOException { - BufferedReader br = new BufferedReader(new FileReader(HMMS)); + BufferedReader br = new BufferedReader(new FileReader(hmms)); moveLocationBy(index, br); @@ -479,7 +574,7 @@ public class HMMProbabilityDistributionAnalyser */ public void exportFam(int index, String location) throws IOException { - BufferedReader br = new BufferedReader(new FileReader(FAMILIES)); + BufferedReader br = new BufferedReader(new FileReader(families)); moveLocationBy(index, br); @@ -518,7 +613,7 @@ public class HMMProbabilityDistributionAnalyser { String name; - BufferedReader nameFinder = new BufferedReader(new FileReader(HMMS)); + BufferedReader nameFinder = new BufferedReader(new FileReader(hmms)); moveLocationBy(index, nameFinder); @@ -536,7 +631,7 @@ public class HMMProbabilityDistributionAnalyser String name; BufferedReader nameFinder = new BufferedReader( - new FileReader(FAMILIES)); + new FileReader(families)); moveLocationBy(index, nameFinder); @@ -560,7 +655,7 @@ public class HMMProbabilityDistributionAnalyser { - BufferedReader br = new BufferedReader(new FileReader(HMMS)); + BufferedReader br = new BufferedReader(new FileReader(hmms)); moveLocationBy(index, br); @@ -603,8 +698,8 @@ public class HMMProbabilityDistributionAnalyser { BufferedReader clanFinder = new BufferedReader(new FileReader(FAMILIESTOCLAN)); BufferedReader familyReader = new BufferedReader( - new FileReader(FAMILIES)); - BufferedReader hmmReader = new BufferedReader(new FileReader(HMMS)); + new FileReader(families)); + BufferedReader hmmReader = new BufferedReader(new FileReader(hmms)); HashMap clanIndexes = new HashMap<>(); int filePos = 0; int clanCount = 0; @@ -633,6 +728,7 @@ public class HMMProbabilityDistributionAnalyser clanCount++; } + Integer clanI = clanIndexes.get(clanName); String clanPath = directory + "/Clan" + clanI.toString(); File clanFolder = new File(clanPath); @@ -661,7 +757,29 @@ public class HMMProbabilityDistributionAnalyser clanFinder.close(); } + + public String getFamilies() + { + return families; + } + + public void setFamilies(String families) + { + this.families = currentFolder + families; + } + + public String getHmms() + { + return hmms; + } + + public void setHmms(String hmms) + { + this.hmms = currentFolder + hmms; + } + + } diff --git a/src/jalview/util/ProbabilityAnalyserKickstarter.java b/src/jalview/util/ProbabilityAnalyserKickstarter.java index 86d9177..999dc1f 100644 --- a/src/jalview/util/ProbabilityAnalyserKickstarter.java +++ b/src/jalview/util/ProbabilityAnalyserKickstarter.java @@ -94,7 +94,7 @@ public class ProbabilityAnalyserKickstarter // exported and re-read back into the program. This is to ensure that the // program can be terminated without losing a large quantity of data. The // increment is the number of families read per 'save'. - if (command.indexOf("run") > -1) + if (command.indexOf("run") > -1 && !(command.indexOf("ToEnd") > -1)) { try { @@ -117,6 +117,21 @@ public class ProbabilityAnalyserKickstarter System.out.println("Command failed"); } } + if ((command.indexOf("runToEnd") > -1)) + { + try + { + + inputScanner.next(); + boolean keepRaw = inputScanner.nextBoolean(); + analyser.runToEnd(keepRaw); + System.out.println("Task completed"); + continue; + } catch (Exception e) + { + System.out.println("Command failed"); + } + } // terminates program. Syntax is terminate. if (command.indexOf("terminate") > -1) { @@ -163,6 +178,26 @@ public class ProbabilityAnalyserKickstarter continue; } + if (command.indexOf("setFamilies") > -1) + { + inputScanner.next(); + analyser.setFamilies(inputScanner.next()); + continue; + + } + + if (command.indexOf("setHMMs") > -1) + { + inputScanner.next(); + analyser.setHmms(inputScanner.next()); + continue; + + } + if (command.indexOf("hmmbuild") > -1) + { + analyser.hmmBuild(); + continue; + } } -- 1.7.10.2