3 import jalview.api.AlignExportSettingI;
4 import jalview.api.AlignmentViewPanel;
5 import jalview.datamodel.HMMNode;
6 import jalview.datamodel.HiddenMarkovModel;
7 import jalview.datamodel.SequenceI;
9 import java.io.BufferedReader;
10 import java.io.IOException;
11 import java.util.ArrayList;
12 import java.util.List;
13 import java.util.Scanner;
17 * Adds capability to read in and write out HMMER3 files. .
23 public class HMMFile extends AlignFile
24 implements AlignmentFileReaderI, AlignmentFileWriterI
26 private static final int NUMBER_OF_TRANSITIONS = 7;
28 private static final String SPACE = " ";
30 private static final String COMPO = "COMPO";
32 private static final String EMPTY = "";
35 * guide line added to an output HMMER file, purely for readability
37 private static final String TRANSITIONTYPELINE = " m->m m->i m->d i->m i->i d->m d->d";
39 private static String NL = "\n";
41 private HiddenMarkovModel hmm;
43 // number of symbols in the alphabet used in the hidden Markov model
53 public HMMFile(String inFile, DataSourceType type) throws IOException
64 public HMMFile(FileParse source) throws IOException
70 * Default constructor, do not use!
78 * Constructor for HMMFile used for exporting.
81 * @param exportImmediately
83 public HMMFile(HiddenMarkovModel markov)
89 * For testing, do not use.
93 HMMFile(BufferedReader br)
99 * Returns the HMM produced by reading in a HMMER3 file.
103 public HiddenMarkovModel getHMM()
109 * Sets the HMM used in this file.
113 public void setHMM(HiddenMarkovModel model)
119 * Gets the name of the hidden Markov model.
123 public String getName()
125 return hmm.getName();
129 * Reads the data from HMM file into the HMM field on this object.
131 * @throws IOException
134 public void parse() throws IOException
138 hmm = new HiddenMarkovModel();
139 parseFileProperties(dataIn);
141 } catch (Exception e)
148 * Reads the data from HMM file into the HMM field on this object.
150 * @throws IOException
153 public void parse(BufferedReader br) throws IOException
155 hmm = new HiddenMarkovModel();
156 parseFileProperties(br);
163 * Imports the file properties from a HMMER3 file.
166 * The buffered reader used to read in the file.
167 * @throws IOException
169 void parseFileProperties(BufferedReader input) throws IOException
171 boolean readingFile = true;
172 hmm.setFileHeader(input.readLine());
173 String line = input.readLine();
178 Scanner parser = new Scanner(line);
179 String next = parser.next();
180 if ("HMM".equals(next)) // indicates start of HMM data (end of file
185 numberOfSymbols = hmm.getNumberOfSymbols();
187 else if ("STATS".equals(next))
193 value = parser.next() + SPACE + SPACE + parser.next();
194 hmm.addFileProperty(key, value);
199 String value = parser.next();
200 while (parser.hasNext())
202 value = value + SPACE + parser.next();
204 hmm.addFileProperty(key, value);
208 line = input.readLine();
218 * Parses the model data from the HMMER3 file
221 * The buffered reader used to read the file.
222 * @throws IOException
224 void parseModel(BufferedReader input) throws IOException
226 boolean first = true;
227 String line = input.readLine();
228 while (!"//".equals(line))
230 HMMNode node = new HMMNode();
231 hmm.getNodes().add(node);
232 Scanner matchReader = new Scanner(line);
233 String next = matchReader.next();
234 if (next.equals(COMPO) || !first)
236 // stores match emission line in list
237 List<Double> matches = new ArrayList<>();
238 matches = fillList(matchReader, numberOfSymbols);
239 node.setMatchEmissions(matches);
242 int column = parseAnnotations(matchReader, node);
243 hmm.setAlignmentColumn(node, column - 1);
247 // stores insert emission line in list
248 line = input.readLine();
249 Scanner insertReader = new Scanner(line);
250 List<Double> inserts = new ArrayList<>();
251 inserts = fillList(insertReader, numberOfSymbols);
252 node.setInsertEmissions(inserts);
253 insertReader.close();
255 // stores state transition line in list
256 line = input.readLine();
257 Scanner transitionReader = new Scanner(line);
258 List<Double> transitions = new ArrayList<>();
259 transitions = fillList(transitionReader, NUMBER_OF_TRANSITIONS);
260 node.setStateTransitions(transitions);
261 transitionReader.close();
262 line = input.readLine();
269 * Parses the annotations on the match emission line and add them to the node.
270 * (See p109 of the HMMER User Guide (V3.1b2) for the specification.) Returns
271 * the alignment column number (base 1) that the node maps to, if provided,
277 int parseAnnotations(Scanner scanner, HMMNode node)
280 * map from hmm node to alignment column index, if provided
281 * HMM counts columns from 1, convert to base 0 for Jalview
284 if (hmm.mapIsActive() && scanner.hasNext())
286 column = scanner.nextInt();
287 node.setAlignmentColumn(column - 1);
295 * hmm consensus residue if provided, else -
297 if (scanner.hasNext())
300 consensusR = charValue(scanner.next());
301 node.setConsensusResidue(consensusR);
305 * RF reference annotation, if provided, else -
307 if (scanner.hasNext())
310 reference = charValue(scanner.next());
311 node.setReferenceAnnotation(reference);
315 * 'm' for masked position, if provided, else -
317 if (scanner.hasNext())
320 value = charValue(scanner.next());
321 node.setMaskValue(value);
325 * structure consensus symbol, if provided, else -
327 if (scanner.hasNext())
330 consensusS = charValue(scanner.next());
331 node.setConsensusStructure(consensusS);
338 * Fills a list of doubles from an input line
341 * The scanner for the line containing the data to be transferred to
343 * @param numberOfElements
344 * The number of elements in the list to be filled.
345 * @return filled list Returns the list of doubles.
346 * @throws IOException
348 static List<Double> fillList(Scanner input,
349 int numberOfElements) throws IOException
351 List<Double> list = new ArrayList<>();
352 for (int i = 0; i < numberOfElements; i++)
355 String next = input.next();
356 if (next.contains("*")) // state transitions to or from delete states
357 // occasionally have values of -infinity. These
358 // values are represented by an * in the .hmm
361 list.add(Double.NEGATIVE_INFINITY);
365 double prob = Double.valueOf(next);
366 prob = Math.pow(Math.E, -prob);
370 if (list.size() < numberOfElements)
372 throw new IOException("Incomplete data");
378 * Returns a string to be added to the StringBuilder containing the entire
381 * @param initialColumnSeparation
382 * The initial whitespace separation between the left side of the
383 * file and first character.
384 * @param columnSeparation
385 * The separation between subsequent data entries.
387 * The list fo data to be added to the String.
390 String addData(int initialColumnSeparation,
391 int columnSeparation, List<String> data)
395 for (String value : data)
399 line += String.format("%" + initialColumnSeparation + "s", value);
403 line += String.format("%" + columnSeparation + "s", value);
411 * Converts list of characters into a list of Strings.
414 * @return Returns the list of Strings.
416 List<String> charListToStringList(List<Character> list)
418 List<String> strList = new ArrayList<>();
419 for (char value : list)
421 String strValue = Character.toString(value);
422 strList.add(strValue);
428 * Converts a list of doubles into a list of Strings, rounded to the nearest
432 * @param noOfDecimals
435 List<String> doubleListToStringList(List<Double> list)
437 List<String> strList = new ArrayList<>();
438 for (double value : list)
443 strValue = String.format("%.5f", value);
446 else if (value == -0.00000d)
448 strValue = "0.00000";
455 strList.add(strValue);
461 * Converts a primitive array of Strings to a list of Strings.
466 List<String> stringArrayToStringList(String[] array)
468 List<String> list = new ArrayList<>();
469 for (String value : array)
478 * Returns a string containing the model data.
480 String getModelAsString()
482 StringBuilder output = new StringBuilder();
483 String symbolLine = "HMM";
484 List<Character> charSymbols = hmm.getSymbols();
485 List<String> strSymbols;
486 strSymbols = charListToStringList(charSymbols);
487 symbolLine += addData(11, 9, strSymbols);
488 output.append(symbolLine);
489 output.append(NL).append(TRANSITIONTYPELINE);
491 int length = hmm.getLength();
493 for (int node = 0; node <= length; node++)
498 matchLine = String.format("%7s", "COMPO");
502 matchLine = String.format("%7s", node);
505 List<String> strMatches;
506 List<Double> doubleMatches;
507 doubleMatches = convertListToLogSpace(
508 hmm.getNode(node).getMatchEmissions());
509 strMatches = doubleListToStringList(doubleMatches);
510 matchLine += addData(10, 9, strMatches);
515 matchLine += SPACE + (hmm.getNodeAlignmentColumn(node) + 1);
516 matchLine += SPACE + hmm.getConsensusResidue(node);
517 matchLine += SPACE + hmm.getReferenceAnnotation(node);
518 if (hmm.getFileHeader().contains("HMMER3/f"))
520 matchLine += SPACE + hmm.getMaskedValue(node);
521 matchLine += SPACE + hmm.getConsensusStructure(node);
526 output.append(NL).append(matchLine);
528 String insertLine = EMPTY;
529 List<String> strInserts;
530 List<Double> doubleInserts;
531 doubleInserts = convertListToLogSpace(
532 hmm.getNode(node).getInsertEmissions());
533 strInserts = doubleListToStringList(doubleInserts);
534 insertLine += addData(17, 9, strInserts);
536 output.append(NL).append(insertLine);
538 String transitionLine = EMPTY;
539 List<String> strTransitions;
540 List<Double> doubleTransitions;
541 doubleTransitions = convertListToLogSpace(
542 hmm.getNode(node).getStateTransitions());
543 strTransitions = doubleListToStringList(doubleTransitions);
544 transitionLine += addData(17, 9, strTransitions);
546 output.append(NL).append(transitionLine);
548 return output.toString();
552 * Returns a String containing the HMM file properties
554 String getFilePropertiesAsString()
556 StringBuffer output = new StringBuffer();
559 output.append(hmm.getFileHeader());
561 line = String.format("%-5s %1s", "NAME", hmm.getName());
562 output.append(NL + line);
564 if (hmm.getAccessionNumber() != null)
566 line = String.format("%-5s %1s", "ACC", hmm.getAccessionNumber());
567 output.append(NL + line);
570 if (hmm.getDescription() != null)
572 line = String.format("%-5s %1s", "DESC", hmm.getDescription());
573 output.append(NL + line);
575 line = String.format("%-5s %1s", "LENG", hmm.getLength());
576 output.append(NL + line);
578 if (hmm.getMaxInstanceLength() != null)
580 line = String.format("%-5s %1s", "MAXL", hmm.getMaxInstanceLength());
581 output.append(NL + line);
583 line = String.format("%-5s %1s", "ALPH", hmm.getAlphabetType());
584 output.append(NL + line);
589 status = hmm.referenceAnnotationIsActive();
590 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
591 line = String.format("%-5s %1s", "RF",
593 output.append(NL + line);
595 status = hmm.maskValueIsActive();
596 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
597 line = String.format("%-5s %1s", "MM",
599 output.append(NL + line);
601 status = hmm.consensusResidueIsActive();
602 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
603 line = String.format("%-5s %1s", "CONS",
605 output.append(NL + line);
607 status = hmm.consensusStructureIsActive();
608 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
609 line = String.format("%-5s %1s", "CS",
611 output.append(NL + line);
613 status = hmm.mapIsActive();
614 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
615 line = String.format("%-5s %1s", "MAP",
617 output.append(NL + line);
620 if (hmm.getDate() != null)
622 line = String.format("%-5s %1s", "DATE", hmm.getDate());
623 output.append(NL + line);
625 if (hmm.getNumberOfSequences() != null)
627 line = String.format("%-5s %1s", "NSEQ", hmm.getNumberOfSequences());
628 output.append(NL + line);
630 if (hmm.getEffectiveNumberOfSequences() != null)
632 line = String.format("%-5s %1s", "EFFN",
633 hmm.getEffectiveNumberOfSequences());
634 output.append(NL + line);
636 if (hmm.getCheckSum() != null)
638 line = String.format("%-5s %1s", "CKSUM", hmm.getCheckSum());
639 output.append(NL + line);
641 if (hmm.getGatheringThreshold() != null)
643 line = String.format("%-5s %1s", "GA", hmm.getGatheringThreshold());
644 output.append(NL + line);
647 if (hmm.getTrustedCutoff() != null)
649 line = String.format("%-5s %1s", "TC", hmm.getTrustedCutoff());
650 output.append(NL + line);
652 if (hmm.getNoiseCutoff() != null)
654 line = String.format("%-5s %1s", "NC", hmm.getNoiseCutoff());
655 output.append(NL + line);
657 if (hmm.getMSV() != null)
659 line = String.format("%-19s %18s", "STATS LOCAL MSV", hmm.getMSV());
660 output.append(NL + line);
662 line = String.format("%-19s %18s", "STATS LOCAL VITERBI",
664 output.append(NL + line);
666 line = String.format("%-19s %18s", "STATS LOCAL FORWARD",
668 output.append(NL + line);
670 return output.toString();
675 * Returns the char value of a single lettered String.
680 char charValue(String string)
683 character = string.charAt(0);
689 public String print(SequenceI[] seqs, boolean jvsuffix)
691 if (seqs[0].getHMM() != null)
693 hmm = seqs[0].getHMM();
699 * Prints the .hmm file to a String.
703 public String print()
705 StringBuffer output = new StringBuffer();
706 output.append(getFilePropertiesAsString());
708 output.append(getModelAsString());
709 output.append(NL + "//");
710 return output.toString();
714 * Converts the probabilities contained in a list into log space.
718 List<Double> convertListToLogSpace(List<Double> list)
721 List<Double> convertedList = new ArrayList<>();
722 for (int i = 0; i < list.size(); i++)
724 double prob = list.get(i);
725 double logProb = -1 * Math.log(prob);
727 convertedList.add(logProb);
729 return convertedList;
735 * Returns the HMM sequence produced by reading a .hmm file.
738 public SequenceI[] getSeqsAsArray()
740 SequenceI hmmSeq = hmm.initHMMSequence();
741 SequenceI[] seq = new SequenceI[1];
748 * Fills symbol array and adds each symbol to an index lookup
751 * The scanner scanning the symbol line in the file.
753 public void fillSymbols(Scanner parser)
756 while (parser.hasNext())
758 String strSymbol = parser.next();
759 char[] symbol = strSymbol.toCharArray();
760 hmm.getSymbols().add(symbol[0]);
761 hmm.setSymbolIndex(symbol[0], i);
767 public void setNewlineString(String newLine)
773 public void setExportSettings(AlignExportSettingI exportSettings)
779 public void configureForView(AlignmentViewPanel viewpanel)
785 public boolean hasWarningMessage()
791 public String getWarningMessage()
793 return "warning message";