From: TZVanaalten Date: Thu, 20 Jul 2017 10:45:41 +0000 (+0100) Subject: JAL-2616 add probability distribution analyser class X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=bbb12f31b7130e384a3e930b265a561ffe29902d;p=jalview.git JAL-2616 add probability distribution analyser class --- diff --git a/src/jalview/util/HMMProbabilityDistributionAnalyser.java b/src/jalview/util/HMMProbabilityDistributionAnalyser.java new file mode 100644 index 0000000..9d6066d --- /dev/null +++ b/src/jalview/util/HMMProbabilityDistributionAnalyser.java @@ -0,0 +1,445 @@ +package jalview.util; + +import jalview.datamodel.HiddenMarkovModel; +import jalview.datamodel.SequenceI; +import jalview.io.DataSourceType; +import jalview.io.FileParse; +import jalview.io.HMMFile; +import jalview.io.StockholmFile; +import jalview.schemes.ResidueProperties; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Scanner; +import java.util.Vector; + +public class HMMProbabilityDistributionAnalyser +{ + + Vector sequences; + + HiddenMarkovModel hmm; + + List> raw = new ArrayList<>(); + + Map binned = new HashMap<>(); + + final static String FAMILIES = "C:/Users/TZVanaalten/Pfam-A.full"; + + final static String HMMS = "H:/Desktop/PFAM/HMMs/Pfam-A.hmm"; + + final static String RAW = "/Raw.csv"; + + final static String BINNED = "/Binned.csv"; + + final static double SCALE = 100000; + + int currentFilePosition = 0; + + final static String NL = "\n"; + + String currentFolder; + + public void setFolder(String path) + { + currentFolder = path; + } + + public void moveToFile(int index, BufferedReader br) throws IOException + { + for (int i = 0; i < index; i++) + { + String line = br.readLine(); + while (!"//".equals(line)) + { + line = br.readLine(); + } + } + + } + /** + * Analyses probability data + * + * @param args + * @throws IOException + */ + public void run(int increments) throws IOException + { + + readPreviousData(currentFolder); + + BufferedReader posReader = new BufferedReader( + new FileReader(currentFolder + "/CurrentPosition.txt")); + String line = posReader.readLine(); + posReader.close(); + currentFilePosition = Integer.parseInt(line); + + BufferedReader inputSTO = new BufferedReader( + new FileReader(FAMILIES)); + BufferedReader inputHMM = new BufferedReader( + new FileReader(HMMS)); + + moveToFile(currentFilePosition, inputHMM); + moveToFile(currentFilePosition, inputSTO); + + int filesRead = 0; + while (filesRead < increments) + { + FileParse parserSTO = new FileParse(inputSTO, "", + DataSourceType.FILE); + readStockholm(parserSTO); + + FileParse parserHMM = new FileParse(inputHMM, "", + DataSourceType.FILE); + readHMM(parserHMM); + + if (hmm.getAlphabetType().equals("amino")) + { + int count = countValidResidues(); + processData(count); + filesRead++; + } + currentFilePosition++; + } + + PrintWriter p = new PrintWriter( + new File(currentFolder + "/CurrentPosition")); + p.print(currentFilePosition); + p.close(); + exportData(currentFolder); + raw.clear(); + binned.clear(); + + } + + public void readPreviousData(String source) throws IOException + { + readBinned(source); + readRaw(source); + } + + public void readBinned(String source) throws IOException + { + BufferedReader input = new BufferedReader( + new FileReader(source + BINNED)); + String line = input.readLine(); + while (!("".equals(line) || line == null)) + { + binned = new HashMap<>(); + Scanner scanner = new Scanner(line); + scanner.useDelimiter(","); + binned.put(scanner.next(), scanner.nextDouble()); + scanner.close(); + line = input.readLine(); + } + + input.close(); + } + + public void readRaw(String source) throws IOException + { + BufferedReader input = new BufferedReader(new FileReader(source + RAW)); + String line = input.readLine(); + if (line == null) + { + input.close(); + return; + } + Scanner numberScanner = new Scanner(line); + numberScanner.useDelimiter(","); + raw = new ArrayList<>(); + while (numberScanner.hasNext()) + { + numberScanner.next(); + raw.add(new ArrayList()); + } + numberScanner.close(); + + line = input.readLine(); + while (!("".equals(line) || line == null)) + { + Scanner scanner = new Scanner(line); + scanner.useDelimiter(","); + + int i = 0; + while (scanner.hasNext()) + { + String value; + value = scanner.next(); + if (!value.equals("EMPTY")) + { + raw.get(i).add(Double.parseDouble(value)); + } + + i++; + } + scanner.close(); + line = input.readLine(); + } + + input.close(); + } + + public int countValidResidues() + { + int count = 0; + + for (int width = 0; width < sequences.size(); width++) + { + for (int length = 1; length < hmm.getLength(); length++) + { + char symbol; + int alignPos; + alignPos = hmm.getNodeAlignmentColumn(length); + + symbol = sequences.get(width).getCharAt(alignPos); + if (ResidueProperties.aminoBackgroundFrequencies + .containsKey(symbol)) + { + count++; + } + } + } + + return count; + } + + public void processData(int count) + { + + raw.add(new ArrayList()); + int rawPos = raw.size() - 1; + for (int width = 0; width < sequences.size(); width++) + { + for (int length = 1; length < hmm.getLength(); length++) + { + char symbol; + int alignPos; + alignPos = hmm.getNodeAlignmentColumn(length); + + symbol = sequences.get(width).getCharAt(alignPos); + if (ResidueProperties.aminoBackgroundFrequencies + .containsKey(symbol)) + { + + Double prob; + Float bfreq; + Double llr; + prob = hmm.getMatchEmissionProbability(alignPos, symbol); + bfreq = ResidueProperties.aminoBackgroundFrequencies.get(symbol); + llr = Math.log(prob / bfreq); + raw.get(rawPos).add(llr); + String output; + output = String.format("%.1f", llr); + if ("-0.0".equals(output)) + { + output = "0.0"; + } + if (binned.containsKey(output)) + { + double prev = binned.get(output); + prev += (SCALE / count); + binned.put(output, prev); + + } + else + { + binned.put(output, SCALE / count); + } + } + } + } + } + + + public void readStockholm(FileParse source) throws IOException + { + StockholmFile file = new StockholmFile(source); + file.parse(); + sequences = file.getSeqs(); + } + + public void readHMM(FileParse source) throws IOException + { + + HMMFile file = new HMMFile(source); + file.parse(); + hmm = file.getHMM(); + + } + + public void exportData(String location) throws FileNotFoundException + { + PrintWriter writerBin = new PrintWriter(new File(location + BINNED)); + for (Map.Entry entry : binned.entrySet()) + { + writerBin.println(entry.getKey() + "," + entry.getValue()); + } + writerBin.close(); + + PrintWriter writerRaw = new PrintWriter(new File(location + RAW)); + + StringBuilder identifier = new StringBuilder(); + + for (int i = 1; i < raw.size() + 1; i++) + { + identifier.append("Fam " + i + ","); + } + + writerRaw.println(identifier); + + boolean rowIsEmpty = false; + int row = 0; + while (!rowIsEmpty) + { + rowIsEmpty = true; + StringBuilder string = new StringBuilder(); + for (int column = 0; column < raw.size(); column++) + { + if (raw.get(column).size() <= row) + { + string.append("EMPTY,"); + } + else + { + string.append(raw.get(column).get(row) + ","); + rowIsEmpty = false; + } + } + row++; + writerRaw.println(string); + } + writerRaw.close(); + + } + + public void printFam(int index) throws IOException + { + BufferedReader br = new BufferedReader(new FileReader(FAMILIES)); + + moveToFile(index, br); + + String line = br.readLine(); + + while (!"//".equals(line)) + { + System.out.println(line); + line = br.readLine(); + } + System.out.println(line); + br.close(); + + } + + public void printHMM(int index) throws IOException + { + BufferedReader br = new BufferedReader(new FileReader(HMMS)); + + moveToFile(index, br); + + String line = br.readLine(); + + while (!"//".equals(line)) + { + System.out.println(line); + line = br.readLine(); + } + System.out.println(line); + br.close(); + + } + + public void printFamToFile(int index) throws IOException + { + String name; + + BufferedReader nameFinder = new BufferedReader( + new FileReader(FAMILIES)); + + moveToFile(index, nameFinder); + + nameFinder.readLine(); + + Scanner scanner = new Scanner(nameFinder.readLine()); + scanner.next(); + scanner.next(); + name = scanner.next(); + scanner.close(); + + BufferedReader br = new BufferedReader(new FileReader(FAMILIES)); + + moveToFile(index, br); + + String line = br.readLine(); + PrintWriter writer = new PrintWriter( + currentFolder + "/" + name + ".sto"); + while (!"//".equals(line)) + { + writer.println(line); + line = br.readLine(); + } + writer.println(line); + writer.close(); + br.close(); + + } + + public void printHMMToFile(int index) throws IOException + { + + String name; + + BufferedReader nameFinder = new BufferedReader(new FileReader(HMMS)); + + moveToFile(index, nameFinder); + + nameFinder.readLine(); + + Scanner scanner = new Scanner(nameFinder.readLine()); + name = scanner.next(); + name = scanner.next(); + scanner.close(); + + BufferedReader br = new BufferedReader(new FileReader(HMMS)); + + moveToFile(index, br); + + String line = br.readLine(); + + PrintWriter writer = new PrintWriter( + currentFolder + "/" + name + ".hmm"); + while (!"//".equals(line)) + { + writer.println(line); + line = br.readLine(); + } + writer.println(line); + writer.close(); + br.close(); + + } + + public void clear() throws FileNotFoundException + { + PrintWriter pos = new PrintWriter( + currentFolder + "/CurrentPosition.txt"); + pos.println("0"); + + PrintWriter raw = new PrintWriter(currentFolder + RAW); + + PrintWriter bin = new PrintWriter(currentFolder + BINNED); + + pos.close(); + bin.close(); + raw.close(); + } + +} diff --git a/src/jalview/util/ProbabilityAnalyserKickstarter.java b/src/jalview/util/ProbabilityAnalyserKickstarter.java new file mode 100644 index 0000000..fd83e3a --- /dev/null +++ b/src/jalview/util/ProbabilityAnalyserKickstarter.java @@ -0,0 +1,100 @@ +package jalview.util; + +import java.io.IOException; +import java.util.Scanner; + +public class ProbabilityAnalyserKickstarter +{ + + public static void main(String[] args) + throws IOException, InterruptedException + { + + HMMProbabilityDistributionAnalyser analyser = new HMMProbabilityDistributionAnalyser(); + + boolean running = true; + System.out.println("ACTIVATED"); + while (running) + { + Scanner keyboard = new Scanner(System.in); + String command = keyboard.nextLine(); + + Scanner inputScanner = new Scanner(command); + if (command.indexOf("printFam") > -1) + { + inputScanner.next(); + int index = inputScanner.nextInt(); + analyser.printFam(index); + continue; + } + + if (command.indexOf("printHMM") > -1) + { + + inputScanner.next(); + int index = inputScanner.nextInt(); + analyser.printHMM(index); + continue; + } + + if (command.indexOf("exportFam") > -1) + { + + inputScanner.next(); + int index = inputScanner.nextInt(); + String location = inputScanner.next(); + analyser.printFamToFile(index); + continue; + } + + if (command.indexOf("exportHMM") > -1) + { + + inputScanner.next(); + int index = inputScanner.nextInt(); + String location = inputScanner.next(); + analyser.printHMMToFile(index); + continue; + } + + if (command.indexOf("run") > -1) + { + inputScanner.next(); + + int loops = inputScanner.nextInt(); + int increments = inputScanner.nextInt(); + + for (int i = 0; i < loops; i++) + { + analyser.run(increments); + } + continue; + } + + if (command.indexOf("terminate") > -1) + { + running = false; + continue; + } + + if (command.indexOf("clear") > -1) + { + analyser.clear(); + continue; + } + + if (command.indexOf("cd") > -1) + { + inputScanner.next(); + analyser.setFolder(inputScanner.next()); + } + inputScanner.close(); + continue; + } + + + + + } + +}