3 import jalview.datamodel.HiddenMarkovModel;
4 import jalview.datamodel.SequenceI;
5 import jalview.io.DataSourceType;
6 import jalview.io.FileParse;
7 import jalview.io.HMMFile;
8 import jalview.io.StockholmFile;
9 import jalview.schemes.ResidueProperties;
11 import java.io.BufferedReader;
13 import java.io.FileNotFoundException;
14 import java.io.FileReader;
15 import java.io.IOException;
16 import java.io.PrintWriter;
17 import java.util.ArrayList;
18 import java.util.HashMap;
19 import java.util.List;
21 import java.util.Scanner;
22 import java.util.Vector;
24 public class HMMProbabilityDistributionAnalyser
27 Vector<SequenceI> sequences;
29 HiddenMarkovModel hmm;
31 List<ArrayList<Double>> raw = new ArrayList<>();
33 Map<String, Double> binned = new HashMap<>();
35 final static String FAMILIES = "C:/Users/TZVanaalten/Pfam-A.full";
37 final static String HMMS = "H:/Desktop/PFAM/HMMs/Pfam-A.hmm";
39 final static String RAW = "/Raw.csv";
41 final static String BINNED = "/Binned.csv";
43 final static double SCALE = 100000;
45 int currentFilePosition = 0;
47 final static String NL = "\n";
51 public void setFolder(String path)
56 public void moveToFile(int index, BufferedReader br) throws IOException
58 for (int i = 0; i < index; i++)
60 String line = br.readLine();
61 while (!"//".equals(line))
69 * Analyses probability data
74 public void run(int increments) throws IOException
77 readPreviousData(currentFolder);
79 BufferedReader posReader = new BufferedReader(
80 new FileReader(currentFolder + "/CurrentPosition.txt"));
81 String line = posReader.readLine();
83 currentFilePosition = Integer.parseInt(line);
85 BufferedReader inputSTO = new BufferedReader(
86 new FileReader(FAMILIES));
87 BufferedReader inputHMM = new BufferedReader(
88 new FileReader(HMMS));
90 moveToFile(currentFilePosition, inputHMM);
91 moveToFile(currentFilePosition, inputSTO);
94 while (filesRead < increments)
96 FileParse parserSTO = new FileParse(inputSTO, "",
98 readStockholm(parserSTO);
100 FileParse parserHMM = new FileParse(inputHMM, "",
101 DataSourceType.FILE);
104 if (hmm.getAlphabetType().equals("amino"))
106 int count = countValidResidues();
110 currentFilePosition++;
113 PrintWriter p = new PrintWriter(
114 new File(currentFolder + "/CurrentPosition"));
115 p.print(currentFilePosition);
117 exportData(currentFolder);
123 public void readPreviousData(String source) throws IOException
129 public void readBinned(String source) throws IOException
131 BufferedReader input = new BufferedReader(
132 new FileReader(source + BINNED));
133 String line = input.readLine();
134 while (!("".equals(line) || line == null))
136 binned = new HashMap<>();
137 Scanner scanner = new Scanner(line);
138 scanner.useDelimiter(",");
139 binned.put(scanner.next(), scanner.nextDouble());
141 line = input.readLine();
147 public void readRaw(String source) throws IOException
149 BufferedReader input = new BufferedReader(new FileReader(source + RAW));
150 String line = input.readLine();
156 Scanner numberScanner = new Scanner(line);
157 numberScanner.useDelimiter(",");
158 raw = new ArrayList<>();
159 while (numberScanner.hasNext())
161 numberScanner.next();
162 raw.add(new ArrayList<Double>());
164 numberScanner.close();
166 line = input.readLine();
167 while (!("".equals(line) || line == null))
169 Scanner scanner = new Scanner(line);
170 scanner.useDelimiter(",");
173 while (scanner.hasNext())
176 value = scanner.next();
177 if (!value.equals("EMPTY"))
179 raw.get(i).add(Double.parseDouble(value));
185 line = input.readLine();
191 public int countValidResidues()
195 for (int width = 0; width < sequences.size(); width++)
197 for (int length = 1; length < hmm.getLength(); length++)
201 alignPos = hmm.getNodeAlignmentColumn(length);
203 symbol = sequences.get(width).getCharAt(alignPos);
204 if (ResidueProperties.aminoBackgroundFrequencies
205 .containsKey(symbol))
215 public void processData(int count)
218 raw.add(new ArrayList<Double>());
219 int rawPos = raw.size() - 1;
220 for (int width = 0; width < sequences.size(); width++)
222 for (int length = 1; length < hmm.getLength(); length++)
226 alignPos = hmm.getNodeAlignmentColumn(length);
228 symbol = sequences.get(width).getCharAt(alignPos);
229 if (ResidueProperties.aminoBackgroundFrequencies
230 .containsKey(symbol))
236 prob = hmm.getMatchEmissionProbability(alignPos, symbol);
237 bfreq = ResidueProperties.aminoBackgroundFrequencies.get(symbol);
238 llr = Math.log(prob / bfreq);
239 raw.get(rawPos).add(llr);
241 output = String.format("%.1f", llr);
242 if ("-0.0".equals(output))
246 if (binned.containsKey(output))
248 double prev = binned.get(output);
249 prev += (SCALE / count);
250 binned.put(output, prev);
255 binned.put(output, SCALE / count);
263 public void readStockholm(FileParse source) throws IOException
265 StockholmFile file = new StockholmFile(source);
267 sequences = file.getSeqs();
270 public void readHMM(FileParse source) throws IOException
273 HMMFile file = new HMMFile(source);
279 public void exportData(String location) throws FileNotFoundException
281 PrintWriter writerBin = new PrintWriter(new File(location + BINNED));
282 for (Map.Entry<String, Double> entry : binned.entrySet())
284 writerBin.println(entry.getKey() + "," + entry.getValue());
288 PrintWriter writerRaw = new PrintWriter(new File(location + RAW));
290 StringBuilder identifier = new StringBuilder();
292 for (int i = 1; i < raw.size() + 1; i++)
294 identifier.append("Fam " + i + ",");
297 writerRaw.println(identifier);
299 boolean rowIsEmpty = false;
304 StringBuilder string = new StringBuilder();
305 for (int column = 0; column < raw.size(); column++)
307 if (raw.get(column).size() <= row)
309 string.append("EMPTY,");
313 string.append(raw.get(column).get(row) + ",");
318 writerRaw.println(string);
324 public void printFam(int index) throws IOException
326 BufferedReader br = new BufferedReader(new FileReader(FAMILIES));
328 moveToFile(index, br);
330 String line = br.readLine();
332 while (!"//".equals(line))
334 System.out.println(line);
335 line = br.readLine();
337 System.out.println(line);
342 public void printHMM(int index) throws IOException
344 BufferedReader br = new BufferedReader(new FileReader(HMMS));
346 moveToFile(index, br);
348 String line = br.readLine();
350 while (!"//".equals(line))
352 System.out.println(line);
353 line = br.readLine();
355 System.out.println(line);
360 public void printFamToFile(int index) throws IOException
364 BufferedReader nameFinder = new BufferedReader(
365 new FileReader(FAMILIES));
367 moveToFile(index, nameFinder);
369 nameFinder.readLine();
371 Scanner scanner = new Scanner(nameFinder.readLine());
374 name = scanner.next();
377 BufferedReader br = new BufferedReader(new FileReader(FAMILIES));
379 moveToFile(index, br);
381 String line = br.readLine();
382 PrintWriter writer = new PrintWriter(
383 currentFolder + "/" + name + ".sto");
384 while (!"//".equals(line))
386 writer.println(line);
387 line = br.readLine();
389 writer.println(line);
395 public void printHMMToFile(int index) throws IOException
400 BufferedReader nameFinder = new BufferedReader(new FileReader(HMMS));
402 moveToFile(index, nameFinder);
404 nameFinder.readLine();
406 Scanner scanner = new Scanner(nameFinder.readLine());
407 name = scanner.next();
408 name = scanner.next();
411 BufferedReader br = new BufferedReader(new FileReader(HMMS));
413 moveToFile(index, br);
415 String line = br.readLine();
417 PrintWriter writer = new PrintWriter(
418 currentFolder + "/" + name + ".hmm");
419 while (!"//".equals(line))
421 writer.println(line);
422 line = br.readLine();
424 writer.println(line);
430 public void clear() throws FileNotFoundException
432 PrintWriter pos = new PrintWriter(
433 currentFolder + "/CurrentPosition.txt");
436 PrintWriter raw = new PrintWriter(currentFolder + RAW);
438 PrintWriter bin = new PrintWriter(currentFolder + BINNED);