3 import jalview.datamodel.HMMNode;
4 import jalview.datamodel.HiddenMarkovModel;
5 import jalview.datamodel.SequenceI;
7 import java.io.BufferedReader;
8 import java.io.FileNotFoundException;
9 import java.io.IOException;
10 import java.io.PrintWriter;
11 import java.io.UnsupportedEncodingException;
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Scanner;
18 * reads in and writes out a HMMER standard file
24 public class HMMFile extends AlignFile
25 implements AlignmentFileReaderI, AlignmentFileWriterI
27 // HMM to store file data
28 private HiddenMarkovModel hmm = new HiddenMarkovModel();
33 // number of possible transitions
34 private final int NUMBER_OF_TRANSITIONS = 7;
36 private final String NEW_LINE = "\n";
44 private final String SPACE = " ";
46 private final String COMPO = "COMPO";
48 private final String EMPTY = "";
50 public HMMFile(FileParse source) throws IOException
60 public HiddenMarkovModel getHMM()
65 public void setHMM(HiddenMarkovModel model)
70 public String getName()
76 * reads data from HMM file
81 public void parse() throws IOException
83 parseFileProperties(dataIn);
90 * imports file properties from hmm file
93 * buffered reader used to read in file
96 void parseFileProperties(BufferedReader input) throws IOException
98 boolean readingFile = true;
99 fileHeader = input.readLine();
100 String line = input.readLine();
105 Scanner parser = new Scanner(line);
106 String next = parser.next();
107 if ("HMM".equals(next)) // indicates start of HMM data (end of file
111 hmm.fillSymbols(parser);
112 numberOfSymbols = hmm.getNumberOfSymbols();
114 else if ("STATS".equals(next))
120 value = parser.next() + SPACE + SPACE + parser.next();
121 hmm.addFileProperty(key, value);
126 String value = parser.next();
127 while (parser.hasNext())
129 value = value + SPACE + parser.next();
131 hmm.addFileProperty(key, value);
135 line = input.readLine();
145 * parses the model data from the hmm file
148 * buffered reader used to read file
149 * @throws IOException
151 void parseModel(BufferedReader input) throws IOException
153 for (int i = 0; i < hmm.getLength() + 1; i++)
155 hmm.getNodes().add(new HMMNode());
158 line = input.readLine();
159 Scanner matchReader = new Scanner(line);
160 next = matchReader.next();
161 if (next.equals(COMPO) || i > 0)
163 // stores match emission line in list
164 List<Double> matches = new ArrayList<>();
165 matches = fillList(matchReader, numberOfSymbols);
166 hmm.getNodes().get(i).setMatchEmissions(matches);
169 parseAnnotations(matchReader, i);
173 // stores insert emission line in list
174 line = input.readLine();
175 Scanner insertReader = new Scanner(line);
176 List<Double> inserts = new ArrayList<>();
177 inserts = fillList(insertReader, numberOfSymbols);
178 hmm.getNodes().get(i).setInsertEmissions(inserts);
179 insertReader.close();
181 // stores state transition line in list
182 line = input.readLine();
183 Scanner transitionReader = new Scanner(line);
184 List<Double> transitions = new ArrayList<>();
185 transitions = fillList(transitionReader, NUMBER_OF_TRANSITIONS);
186 hmm.getNodes().get(i).setStateTransitions(transitions);
187 transitionReader.close();
193 * parses annotations on match emission line
196 * scanner which is processing match emission line
198 * index of node which is beign scanned
200 void parseAnnotations(Scanner scanner, int index)
202 if (hmm.mapIsActive())
205 column = scanner.nextInt();
206 hmm.getNodes().get(index).setAlignmentColumn(column);
207 hmm.getNodeLookup().put(column, index);
215 consensusR = charValue(scanner.next());
216 hmm.getNodes().get(index).setConsensusResidue(consensusR);
219 reference = charValue(scanner.next());
220 hmm.getNodes().get(index).setReferenceAnnotation(reference);
224 value = charValue(scanner.next());
225 hmm.getNodes().get(index).setMaskValue(value);
228 consensusS = charValue(scanner.next());
229 hmm.getNodes().get(index).setConsensusStructure(consensusS);
236 * scanner for line containing data to be transferred to list
237 * @param numberOfElements
238 * number of elements in the list to be filled
239 * @return filled list
241 static List<Double> fillList(Scanner input,
242 int numberOfElements)
244 List<Double> list = new ArrayList<>();
245 for (int i = 0; i < numberOfElements; i++)
248 String next = input.next();
249 if (next.contains("*")) // state transitions to or from delete states
250 // occasionally have values of -infinity. These
251 // values are represented by an * in the .hmm
252 // file, and by a null value in the
253 // HiddenMarkovModel class
255 list.add(Double.NEGATIVE_INFINITY);
259 double prob = Double.valueOf(next);
260 prob = Math.pow(Math.E, -prob);
269 * writes a HiddenMarkovModel to a file
271 * @param exportLocation
272 * Filename, URL or Pasted String to write to
273 * @throws FileNotFoundException
274 * @throws UnsupportedEncodingException
278 public void exportFile(String exportLocation) throws IOException
280 StringBuilder file = new StringBuilder();
281 appendFileProperties(file);
285 PrintWriter output = new PrintWriter(exportLocation);
291 String addData(int initialColumnSeparation,
292 int columnSeparation, List<String> data)
296 for (String value : data)
300 line += String.format("%" + initialColumnSeparation + "s", value);
304 line += String.format("%" + columnSeparation + "s", value);
311 List<String> charListToStringList(List<Character> list)
313 List<String> strList = new ArrayList<>();
314 for (char value : list)
316 String strValue = Character.toString(value);
317 strList.add(strValue);
322 List<String> doubleListToStringList(List<Double> list,
325 List<String> strList = new ArrayList<>();
326 for (double value : list)
331 strValue = String.format("%.5f", value);
334 else if (value == -0.00000d)
336 strValue = "0.00000";
343 strList.add(strValue);
348 List<String> stringArrayToStringList(String[] array)
350 List<String> list = new ArrayList<>();
351 for (String value : array)
359 void appendModel(StringBuilder file)
361 String symbolLine = "HMM";
362 List<Character> charSymbols = hmm.getSymbols();
363 List<String> strSymbols;
364 strSymbols = charListToStringList(charSymbols);
365 symbolLine += addData(11, 9, strSymbols);
366 file.append(symbolLine + NEW_LINE);
368 String transitionTypeLine = "";
369 List<String> transitionTypes;
370 transitionTypes = stringArrayToStringList(hmm.getTransitionTypes());
371 transitionTypeLine += addData(16, 9, transitionTypes);
372 file.append(transitionTypeLine + NEW_LINE);
374 int length = hmm.getLength();
376 for (int node = 0; node <= length; node++)
381 matchLine = String.format("%7s", "COMPO");
385 matchLine = String.format("%7s", node);
388 List<String> strMatches;
389 List<Double> doubleMatches;
390 doubleMatches = hmm.getNode(node).getMatchEmissions();
391 convertListToLogSpace(doubleMatches);
392 strMatches = doubleListToStringList(doubleMatches, 5);
393 matchLine += addData(10, 9, strMatches);
398 matchLine += SPACE + hmm.getNodeAlignmentColumn(node);
399 matchLine += SPACE + hmm.getConsensusResidue(node);
400 matchLine += SPACE + hmm.getReferenceAnnotation(node);
401 matchLine += SPACE + hmm.getMaskedValue(node);
402 matchLine += SPACE + hmm.getConsensusStructure(node);
406 file.append(matchLine + NEW_LINE);
408 String insertLine = EMPTY;
409 List<String> strInserts;
410 List<Double> doubleInserts;
411 doubleInserts = hmm.getNode(node).getInsertEmissions();
412 convertListToLogSpace(doubleInserts);
413 strInserts = doubleListToStringList(doubleInserts, 5);
414 insertLine += addData(17, 9, strInserts);
416 file.append(insertLine + NEW_LINE);
418 String transitionLine = EMPTY;
419 List<String> strTransitions;
420 List<Double> doubleTransitions;
421 doubleTransitions = hmm.getNode(node).getStateTransitions();
422 convertListToLogSpace(doubleTransitions);
423 strTransitions = doubleListToStringList(doubleTransitions, 5);
424 transitionLine += addData(17, 9, strTransitions);
426 file.append(transitionLine + NEW_LINE);
430 void appendFileProperties(StringBuilder file)
434 file.append(fileHeader + NEW_LINE);
436 line = String.format("%-5s %1s", "NAME", hmm.getName());
437 file.append((line + NEW_LINE));
439 if (hmm.getAccessionNumber() != null)
441 line = String.format("%-5s %1s", "ACC", hmm.getAccessionNumber());
442 file.append((line + NEW_LINE));
445 if (hmm.getDescription() != null)
447 line = String.format("%-5s %1s", "DESC", hmm.getDescription());
448 file.append((line + NEW_LINE));
450 line = String.format("%-5s %1s", "LENG", hmm.getLength());
451 file.append((line + NEW_LINE));
453 if (hmm.getMaxInstanceLength() != null)
455 line = String.format("%-5s %1s", "MAXL", hmm.getMaxInstanceLength());
456 file.append((line + NEW_LINE));
458 line = String.format("%-5s %1s", "ALPH", hmm.getAlphabetType());
459 file.append((line + NEW_LINE));
464 status = hmm.referenceAnnotationIsActive();
465 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
466 line = String.format("%-5s %1s", "RF",
468 file.append((line + NEW_LINE));
470 status = hmm.maskValueIsActive();
471 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
472 line = String.format("%-5s %1s", "MM",
474 file.append((line + NEW_LINE));
476 status = hmm.consensusResidueIsActive();
477 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
478 line = String.format("%-5s %1s", "CONS",
480 file.append((line + NEW_LINE));
482 status = hmm.consensusStructureIsActive();
483 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
484 line = String.format("%-5s %1s", "CS",
486 file.append((line + NEW_LINE));
488 status = hmm.mapIsActive();
489 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
490 line = String.format("%-5s %1s", "MAP",
492 file.append((line + NEW_LINE));
495 if (hmm.getDate() != null)
497 line = String.format("%-5s %1s", "DATE", hmm.getDate());
498 file.append((line + NEW_LINE));
500 if (hmm.getNumberOfSequences() != null)
502 line = String.format("%-5s %1s", "NSEQ", hmm.getNumberOfSequences());
503 file.append((line + NEW_LINE));
505 if (hmm.getEffectiveNumberOfSequences() != null)
507 line = String.format("%-5s %1s", "EFFN",
508 hmm.getEffectiveNumberOfSequences());
509 file.append((line + NEW_LINE));
511 if (hmm.getCheckSum() != null)
513 line = String.format("%-5s %1s", "CKSUM", hmm.getCheckSum());
514 file.append((line + NEW_LINE));
516 if (hmm.getGatheringThreshold() != null)
518 line = String.format("%-5s %1s", "GA", hmm.getGatheringThreshold());
519 file.append((line + NEW_LINE));
522 if (hmm.getTrustedCutoff() != null)
524 line = String.format("%-5s %1s", "TC", hmm.getTrustedCutoff());
525 file.append((line + NEW_LINE));
527 if (hmm.getNoiseCutoff() != null)
529 line = String.format("%-5s %1s", "NC", hmm.getNoiseCutoff());
530 file.append((line + NEW_LINE));
532 if (hmm.getMSV() != null)
534 line = String.format("%-19s %18s", "STATS LOCAL MSV", hmm.getMSV());
535 file.append((line + NEW_LINE));
537 line = String.format("%-19s %18s", "STATS LOCAL VITERBI",
539 file.append((line + NEW_LINE));
541 line = String.format("%-19s %18s", "STATS LOCAL FORWARD",
543 file.append((line + NEW_LINE));
549 char charValue(String string)
552 character = string.charAt(0);
557 public String print(SequenceI[] seqs, boolean jvsuffix)
563 void convertListToLogSpace(List<Double> list)
566 for (int i = 0; i < list.size(); i++)
568 double prob = list.get(i);
569 double logProb = -1 * Math.log(prob);
571 list.set(i, logProb);