--- /dev/null
+package jalview.util;
+
+import jalview.datamodel.HiddenMarkovModel;
+import jalview.datamodel.SequenceI;
+import jalview.io.DataSourceType;
+import jalview.io.FileParse;
+import jalview.io.HMMFile;
+import jalview.io.StockholmFile;
+import jalview.schemes.ResidueProperties;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Scanner;
+import java.util.Vector;
+
+public class HMMProbabilityDistributionAnalyser
+{
+
+ Vector<SequenceI> sequences;
+
+ HiddenMarkovModel hmm;
+
+ List<ArrayList<Double>> raw = new ArrayList<>();
+
+ Map<String, Double> binned = new HashMap<>();
+
+ final static String FAMILIES = "C:/Users/TZVanaalten/Pfam-A.full";
+
+ final static String HMMS = "H:/Desktop/PFAM/HMMs/Pfam-A.hmm";
+
+ final static String RAW = "/Raw.csv";
+
+ final static String BINNED = "/Binned.csv";
+
+ final static double SCALE = 100000;
+
+ int currentFilePosition = 0;
+
+ final static String NL = "\n";
+
+ String currentFolder;
+
+ public void setFolder(String path)
+ {
+ currentFolder = path;
+ }
+
+ public void moveToFile(int index, BufferedReader br) throws IOException
+ {
+ for (int i = 0; i < index; i++)
+ {
+ String line = br.readLine();
+ while (!"//".equals(line))
+ {
+ line = br.readLine();
+ }
+ }
+
+ }
+ /**
+ * Analyses probability data
+ *
+ * @param args
+ * @throws IOException
+ */
+ public void run(int increments) throws IOException
+ {
+
+ readPreviousData(currentFolder);
+
+ BufferedReader posReader = new BufferedReader(
+ new FileReader(currentFolder + "/CurrentPosition.txt"));
+ String line = posReader.readLine();
+ posReader.close();
+ currentFilePosition = Integer.parseInt(line);
+
+ BufferedReader inputSTO = new BufferedReader(
+ new FileReader(FAMILIES));
+ BufferedReader inputHMM = new BufferedReader(
+ new FileReader(HMMS));
+
+ moveToFile(currentFilePosition, inputHMM);
+ moveToFile(currentFilePosition, inputSTO);
+
+ int filesRead = 0;
+ while (filesRead < increments)
+ {
+ FileParse parserSTO = new FileParse(inputSTO, "",
+ DataSourceType.FILE);
+ readStockholm(parserSTO);
+
+ FileParse parserHMM = new FileParse(inputHMM, "",
+ DataSourceType.FILE);
+ readHMM(parserHMM);
+
+ if (hmm.getAlphabetType().equals("amino"))
+ {
+ int count = countValidResidues();
+ processData(count);
+ filesRead++;
+ }
+ currentFilePosition++;
+ }
+
+ PrintWriter p = new PrintWriter(
+ new File(currentFolder + "/CurrentPosition"));
+ p.print(currentFilePosition);
+ p.close();
+ exportData(currentFolder);
+ raw.clear();
+ binned.clear();
+
+ }
+
+ public void readPreviousData(String source) throws IOException
+ {
+ readBinned(source);
+ readRaw(source);
+ }
+
+ public void readBinned(String source) throws IOException
+ {
+ BufferedReader input = new BufferedReader(
+ new FileReader(source + BINNED));
+ String line = input.readLine();
+ while (!("".equals(line) || line == null))
+ {
+ binned = new HashMap<>();
+ Scanner scanner = new Scanner(line);
+ scanner.useDelimiter(",");
+ binned.put(scanner.next(), scanner.nextDouble());
+ scanner.close();
+ line = input.readLine();
+ }
+
+ input.close();
+ }
+
+ public void readRaw(String source) throws IOException
+ {
+ BufferedReader input = new BufferedReader(new FileReader(source + RAW));
+ String line = input.readLine();
+ if (line == null)
+ {
+ input.close();
+ return;
+ }
+ Scanner numberScanner = new Scanner(line);
+ numberScanner.useDelimiter(",");
+ raw = new ArrayList<>();
+ while (numberScanner.hasNext())
+ {
+ numberScanner.next();
+ raw.add(new ArrayList<Double>());
+ }
+ numberScanner.close();
+
+ line = input.readLine();
+ while (!("".equals(line) || line == null))
+ {
+ Scanner scanner = new Scanner(line);
+ scanner.useDelimiter(",");
+
+ int i = 0;
+ while (scanner.hasNext())
+ {
+ String value;
+ value = scanner.next();
+ if (!value.equals("EMPTY"))
+ {
+ raw.get(i).add(Double.parseDouble(value));
+ }
+
+ i++;
+ }
+ scanner.close();
+ line = input.readLine();
+ }
+
+ input.close();
+ }
+
+ public int countValidResidues()
+ {
+ int count = 0;
+
+ for (int width = 0; width < sequences.size(); width++)
+ {
+ for (int length = 1; length < hmm.getLength(); length++)
+ {
+ char symbol;
+ int alignPos;
+ alignPos = hmm.getNodeAlignmentColumn(length);
+
+ symbol = sequences.get(width).getCharAt(alignPos);
+ if (ResidueProperties.aminoBackgroundFrequencies
+ .containsKey(symbol))
+ {
+ count++;
+ }
+ }
+ }
+
+ return count;
+ }
+
+ public void processData(int count)
+ {
+
+ raw.add(new ArrayList<Double>());
+ int rawPos = raw.size() - 1;
+ for (int width = 0; width < sequences.size(); width++)
+ {
+ for (int length = 1; length < hmm.getLength(); length++)
+ {
+ char symbol;
+ int alignPos;
+ alignPos = hmm.getNodeAlignmentColumn(length);
+
+ symbol = sequences.get(width).getCharAt(alignPos);
+ if (ResidueProperties.aminoBackgroundFrequencies
+ .containsKey(symbol))
+ {
+
+ Double prob;
+ Float bfreq;
+ Double llr;
+ prob = hmm.getMatchEmissionProbability(alignPos, symbol);
+ bfreq = ResidueProperties.aminoBackgroundFrequencies.get(symbol);
+ llr = Math.log(prob / bfreq);
+ raw.get(rawPos).add(llr);
+ String output;
+ output = String.format("%.1f", llr);
+ if ("-0.0".equals(output))
+ {
+ output = "0.0";
+ }
+ if (binned.containsKey(output))
+ {
+ double prev = binned.get(output);
+ prev += (SCALE / count);
+ binned.put(output, prev);
+
+ }
+ else
+ {
+ binned.put(output, SCALE / count);
+ }
+ }
+ }
+ }
+ }
+
+
+ public void readStockholm(FileParse source) throws IOException
+ {
+ StockholmFile file = new StockholmFile(source);
+ file.parse();
+ sequences = file.getSeqs();
+ }
+
+ public void readHMM(FileParse source) throws IOException
+ {
+
+ HMMFile file = new HMMFile(source);
+ file.parse();
+ hmm = file.getHMM();
+
+ }
+
+ public void exportData(String location) throws FileNotFoundException
+ {
+ PrintWriter writerBin = new PrintWriter(new File(location + BINNED));
+ for (Map.Entry<String, Double> entry : binned.entrySet())
+ {
+ writerBin.println(entry.getKey() + "," + entry.getValue());
+ }
+ writerBin.close();
+
+ PrintWriter writerRaw = new PrintWriter(new File(location + RAW));
+
+ StringBuilder identifier = new StringBuilder();
+
+ for (int i = 1; i < raw.size() + 1; i++)
+ {
+ identifier.append("Fam " + i + ",");
+ }
+
+ writerRaw.println(identifier);
+
+ boolean rowIsEmpty = false;
+ int row = 0;
+ while (!rowIsEmpty)
+ {
+ rowIsEmpty = true;
+ StringBuilder string = new StringBuilder();
+ for (int column = 0; column < raw.size(); column++)
+ {
+ if (raw.get(column).size() <= row)
+ {
+ string.append("EMPTY,");
+ }
+ else
+ {
+ string.append(raw.get(column).get(row) + ",");
+ rowIsEmpty = false;
+ }
+ }
+ row++;
+ writerRaw.println(string);
+ }
+ writerRaw.close();
+
+ }
+
+ public void printFam(int index) throws IOException
+ {
+ BufferedReader br = new BufferedReader(new FileReader(FAMILIES));
+
+ moveToFile(index, br);
+
+ String line = br.readLine();
+
+ while (!"//".equals(line))
+ {
+ System.out.println(line);
+ line = br.readLine();
+ }
+ System.out.println(line);
+ br.close();
+
+ }
+
+ public void printHMM(int index) throws IOException
+ {
+ BufferedReader br = new BufferedReader(new FileReader(HMMS));
+
+ moveToFile(index, br);
+
+ String line = br.readLine();
+
+ while (!"//".equals(line))
+ {
+ System.out.println(line);
+ line = br.readLine();
+ }
+ System.out.println(line);
+ br.close();
+
+ }
+
+ public void printFamToFile(int index) throws IOException
+ {
+ String name;
+
+ BufferedReader nameFinder = new BufferedReader(
+ new FileReader(FAMILIES));
+
+ moveToFile(index, nameFinder);
+
+ nameFinder.readLine();
+
+ Scanner scanner = new Scanner(nameFinder.readLine());
+ scanner.next();
+ scanner.next();
+ name = scanner.next();
+ scanner.close();
+
+ BufferedReader br = new BufferedReader(new FileReader(FAMILIES));
+
+ moveToFile(index, br);
+
+ String line = br.readLine();
+ PrintWriter writer = new PrintWriter(
+ currentFolder + "/" + name + ".sto");
+ while (!"//".equals(line))
+ {
+ writer.println(line);
+ line = br.readLine();
+ }
+ writer.println(line);
+ writer.close();
+ br.close();
+
+ }
+
+ public void printHMMToFile(int index) throws IOException
+ {
+
+ String name;
+
+ BufferedReader nameFinder = new BufferedReader(new FileReader(HMMS));
+
+ moveToFile(index, nameFinder);
+
+ nameFinder.readLine();
+
+ Scanner scanner = new Scanner(nameFinder.readLine());
+ name = scanner.next();
+ name = scanner.next();
+ scanner.close();
+
+ BufferedReader br = new BufferedReader(new FileReader(HMMS));
+
+ moveToFile(index, br);
+
+ String line = br.readLine();
+
+ PrintWriter writer = new PrintWriter(
+ currentFolder + "/" + name + ".hmm");
+ while (!"//".equals(line))
+ {
+ writer.println(line);
+ line = br.readLine();
+ }
+ writer.println(line);
+ writer.close();
+ br.close();
+
+ }
+
+ public void clear() throws FileNotFoundException
+ {
+ PrintWriter pos = new PrintWriter(
+ currentFolder + "/CurrentPosition.txt");
+ pos.println("0");
+
+ PrintWriter raw = new PrintWriter(currentFolder + RAW);
+
+ PrintWriter bin = new PrintWriter(currentFolder + BINNED);
+
+ pos.close();
+ bin.close();
+ raw.close();
+ }
+
+}
--- /dev/null
+package jalview.util;
+
+import java.io.IOException;
+import java.util.Scanner;
+
+public class ProbabilityAnalyserKickstarter
+{
+
+ public static void main(String[] args)
+ throws IOException, InterruptedException
+ {
+
+ HMMProbabilityDistributionAnalyser analyser = new HMMProbabilityDistributionAnalyser();
+
+ boolean running = true;
+ System.out.println("ACTIVATED");
+ while (running)
+ {
+ Scanner keyboard = new Scanner(System.in);
+ String command = keyboard.nextLine();
+
+ Scanner inputScanner = new Scanner(command);
+ if (command.indexOf("printFam") > -1)
+ {
+ inputScanner.next();
+ int index = inputScanner.nextInt();
+ analyser.printFam(index);
+ continue;
+ }
+
+ if (command.indexOf("printHMM") > -1)
+ {
+
+ inputScanner.next();
+ int index = inputScanner.nextInt();
+ analyser.printHMM(index);
+ continue;
+ }
+
+ if (command.indexOf("exportFam") > -1)
+ {
+
+ inputScanner.next();
+ int index = inputScanner.nextInt();
+ String location = inputScanner.next();
+ analyser.printFamToFile(index);
+ continue;
+ }
+
+ if (command.indexOf("exportHMM") > -1)
+ {
+
+ inputScanner.next();
+ int index = inputScanner.nextInt();
+ String location = inputScanner.next();
+ analyser.printHMMToFile(index);
+ continue;
+ }
+
+ if (command.indexOf("run") > -1)
+ {
+ inputScanner.next();
+
+ int loops = inputScanner.nextInt();
+ int increments = inputScanner.nextInt();
+
+ for (int i = 0; i < loops; i++)
+ {
+ analyser.run(increments);
+ }
+ continue;
+ }
+
+ if (command.indexOf("terminate") > -1)
+ {
+ running = false;
+ continue;
+ }
+
+ if (command.indexOf("clear") > -1)
+ {
+ analyser.clear();
+ continue;
+ }
+
+ if (command.indexOf("cd") > -1)
+ {
+ inputScanner.next();
+ analyser.setFolder(inputScanner.next());
+ }
+ inputScanner.close();
+ continue;
+ }
+
+
+
+
+ }
+
+}