3 import jalview.api.AlignExportSettingI;
4 import jalview.api.AlignmentViewPanel;
5 import jalview.datamodel.HMMNode;
6 import jalview.datamodel.HiddenMarkovModel;
7 import jalview.datamodel.SequenceI;
9 import java.io.BufferedReader;
10 import java.io.IOException;
11 import java.util.ArrayList;
12 import java.util.List;
13 import java.util.Scanner;
17 * Adds capability to read in and write out HMMER3 files. .
23 public class HMMFile extends AlignFile
24 implements AlignmentFileReaderI, AlignmentFileWriterI
26 // HMM to store file data
27 private HiddenMarkovModel hmm;
29 // number of possible transitions
30 private static final int NUMBER_OF_TRANSITIONS = 7;
32 private String NL = "\n";
34 //number of symbols in the alphabet used in the hidden Markov model
37 private final String SPACE = " ";
39 private final String COMPO = "COMPO";
41 private final String EMPTY = "";
43 //This is a line that needs to be added to each HMMER� file. It is purely for readability.
44 private static final String TRANSITIONTYPELINE = " m->m m->i m->d i->m i->i d->m d->d";
53 public HMMFile(String inFile, DataSourceType type) throws IOException
64 public HMMFile(FileParse source) throws IOException
70 * Default constructor, do not use!
78 * Constructor for HMMFile used for exporting.
81 * @param exportImmediately
83 public HMMFile(HiddenMarkovModel markov)
89 * For testing, do not use.
93 HMMFile(BufferedReader br)
99 * Returns the HMM produced by reading in a HMMER3 file.
103 public HiddenMarkovModel getHMM()
109 * Sets the HMM used in this file.
113 public void setHMM(HiddenMarkovModel model)
119 * Gets the name of the hidden Markov model.
123 public String getName()
125 return hmm.getName();
129 * Reads the data from HMM file into the HMM field on this object.
131 * @throws IOException
134 public void parse() throws IOException
138 hmm = new HiddenMarkovModel();
139 parseFileProperties(dataIn);
141 } catch (Exception e)
148 * Reads the data from HMM file into the HMM field on this object.
150 * @throws IOException
153 public void parse(BufferedReader br) throws IOException
155 hmm = new HiddenMarkovModel();
156 parseFileProperties(br);
163 * Imports the file properties from a HMMER3 file.
166 * The buffered reader used to read in the file.
167 * @throws IOException
169 void parseFileProperties(BufferedReader input) throws IOException
171 boolean readingFile = true;
172 hmm.setFileHeader(input.readLine());
173 String line = input.readLine();
178 Scanner parser = new Scanner(line);
179 String next = parser.next();
180 if ("HMM".equals(next)) // indicates start of HMM data (end of file
185 numberOfSymbols = hmm.getNumberOfSymbols();
187 else if ("STATS".equals(next))
193 value = parser.next() + SPACE + SPACE + parser.next();
194 hmm.addFileProperty(key, value);
199 String value = parser.next();
200 while (parser.hasNext())
202 value = value + SPACE + parser.next();
204 hmm.addFileProperty(key, value);
208 line = input.readLine();
218 * Parses the model data from the HMMER3 file
221 * The buffered reader used to read the file.
222 * @throws IOException
224 void parseModel(BufferedReader input) throws IOException
226 String line = input.readLine();
228 while (!"//".equals(line))
230 hmm.getNodes().add(new HMMNode());
232 Scanner matchReader = new Scanner(line);
233 next = matchReader.next();
234 if (next.equals(COMPO) || node > 0)
236 // stores match emission line in list
237 List<Double> matches = new ArrayList<>();
238 matches = fillList(matchReader, numberOfSymbols);
239 hmm.getNodes().get(node).setMatchEmissions(matches);
242 parseAnnotations(matchReader, node);
246 // stores insert emission line in list
247 line = input.readLine();
248 Scanner insertReader = new Scanner(line);
249 List<Double> inserts = new ArrayList<>();
250 inserts = fillList(insertReader, numberOfSymbols);
251 hmm.getNodes().get(node).setInsertEmissions(inserts);
252 insertReader.close();
254 // stores state transition line in list
255 line = input.readLine();
256 Scanner transitionReader = new Scanner(line);
257 List<Double> transitions = new ArrayList<>();
258 transitions = fillList(transitionReader, NUMBER_OF_TRANSITIONS);
259 hmm.getNodes().get(node).setStateTransitions(transitions);
260 transitionReader.close();
261 line = input.readLine();
268 * Parses the annotations on the match emission line.
271 * The scanner which is processing match emission line.
273 * The index of node which is being scanned.
275 void parseAnnotations(Scanner scanner, int index)
277 if (hmm.mapIsActive() && scanner.hasNext())
280 column = scanner.nextInt();
281 hmm.getNodes().get(index).setAlignmentColumn(column - 1);
282 hmm.getNodeLookup().put(column - 1, index);
289 if (scanner.hasNext())
292 consensusR = charValue(scanner.next());
293 hmm.getNodes().get(index).setConsensusResidue(consensusR);
296 if (scanner.hasNext())
299 reference = charValue(scanner.next());
300 hmm.getNodes().get(index).setReferenceAnnotation(reference);
303 if (scanner.hasNext())
306 value = charValue(scanner.next());
307 hmm.getNodes().get(index).setMaskValue(value);
309 if (scanner.hasNext())
312 consensusS = charValue(scanner.next());
313 hmm.getNodes().get(index).setConsensusStructure(consensusS);
320 * Fills a list of doubles based on an input line.
323 * The scanner for the line containing the data to be transferred to
325 * @param numberOfElements
326 * The number of elements in the list to be filled.
327 * @return filled list Returns the list of doubles.
328 * @throws IOException
330 static List<Double> fillList(Scanner input,
331 int numberOfElements) throws IOException
333 List<Double> list = new ArrayList<>();
334 for (int i = 0; i < numberOfElements; i++)
337 String next = input.next();
338 if (next.contains("*")) // state transitions to or from delete states
339 // occasionally have values of -infinity. These
340 // values are represented by an * in the .hmm
343 list.add(Double.NEGATIVE_INFINITY);
347 double prob = Double.valueOf(next);
348 prob = Math.pow(Math.E, -prob);
352 if (list.size() < numberOfElements)
354 throw new IOException("Incomplete data");
360 * Returns a string to be added to the StringBuilder containing the entire
363 * @param initialColumnSeparation
364 * The initial whitespace separation between the left side of the
365 * file and first character.
366 * @param columnSeparation
367 * The separation between subsequent data entries.
369 * The list fo data to be added to the String.
372 String addData(int initialColumnSeparation,
373 int columnSeparation, List<String> data)
377 for (String value : data)
381 line += String.format("%" + initialColumnSeparation + "s", value);
385 line += String.format("%" + columnSeparation + "s", value);
393 * Converts list of characters into a list of Strings.
396 * @return Returns the list of Strings.
398 List<String> charListToStringList(List<Character> list)
400 List<String> strList = new ArrayList<>();
401 for (char value : list)
403 String strValue = Character.toString(value);
404 strList.add(strValue);
410 * Converts a list of doubles into a list of Strings, rounded to the nearest
414 * @param noOfDecimals
417 List<String> doubleListToStringList(List<Double> list)
419 List<String> strList = new ArrayList<>();
420 for (double value : list)
425 strValue = String.format("%.5f", value);
428 else if (value == -0.00000d)
430 strValue = "0.00000";
437 strList.add(strValue);
443 * Converts a primitive array of Strings to a list of Strings.
448 List<String> stringArrayToStringList(String[] array)
450 List<String> list = new ArrayList<>();
451 for (String value : array)
460 * Returns a string containing the model data.
462 String getModelAsString()
464 StringBuilder output = new StringBuilder();
465 String symbolLine = "HMM";
466 List<Character> charSymbols = hmm.getSymbols();
467 List<String> strSymbols;
468 strSymbols = charListToStringList(charSymbols);
469 symbolLine += addData(11, 9, strSymbols);
470 output.append(symbolLine);
471 output.append(NL).append(TRANSITIONTYPELINE);
473 int length = hmm.getLength();
475 for (int node = 0; node <= length; node++)
480 matchLine = String.format("%7s", "COMPO");
484 matchLine = String.format("%7s", node);
487 List<String> strMatches;
488 List<Double> doubleMatches;
489 doubleMatches = convertListToLogSpace(
490 hmm.getNode(node).getMatchEmissions());
491 strMatches = doubleListToStringList(doubleMatches);
492 matchLine += addData(10, 9, strMatches);
497 matchLine += SPACE + (hmm.getNodeAlignmentColumn(node) + 1);
498 matchLine += SPACE + hmm.getConsensusResidue(node);
499 matchLine += SPACE + hmm.getReferenceAnnotation(node);
500 if (hmm.getFileHeader().contains("HMMER3/f"))
502 matchLine += SPACE + hmm.getMaskedValue(node);
503 matchLine += SPACE + hmm.getConsensusStructure(node);
508 output.append(NL).append(matchLine);
510 String insertLine = EMPTY;
511 List<String> strInserts;
512 List<Double> doubleInserts;
513 doubleInserts = convertListToLogSpace(
514 hmm.getNode(node).getInsertEmissions());
515 strInserts = doubleListToStringList(doubleInserts);
516 insertLine += addData(17, 9, strInserts);
518 output.append(NL).append(insertLine);
520 String transitionLine = EMPTY;
521 List<String> strTransitions;
522 List<Double> doubleTransitions;
523 doubleTransitions = convertListToLogSpace(
524 hmm.getNode(node).getStateTransitions());
525 strTransitions = doubleListToStringList(doubleTransitions);
526 transitionLine += addData(17, 9, strTransitions);
528 output.append(NL).append(transitionLine);
530 return output.toString();
534 * Returns a String containing the HMM file properties
536 String getFilePropertiesAsString()
538 StringBuffer output = new StringBuffer();
541 output.append(hmm.getFileHeader());
543 line = String.format("%-5s %1s", "NAME", hmm.getName());
544 output.append(NL + line);
546 if (hmm.getAccessionNumber() != null)
548 line = String.format("%-5s %1s", "ACC", hmm.getAccessionNumber());
549 output.append(NL + line);
552 if (hmm.getDescription() != null)
554 line = String.format("%-5s %1s", "DESC", hmm.getDescription());
555 output.append(NL + line);
557 line = String.format("%-5s %1s", "LENG", hmm.getLength());
558 output.append(NL + line);
560 if (hmm.getMaxInstanceLength() != null)
562 line = String.format("%-5s %1s", "MAXL", hmm.getMaxInstanceLength());
563 output.append(NL + line);
565 line = String.format("%-5s %1s", "ALPH", hmm.getAlphabetType());
566 output.append(NL + line);
571 status = hmm.referenceAnnotationIsActive();
572 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
573 line = String.format("%-5s %1s", "RF",
575 output.append(NL + line);
577 status = hmm.maskValueIsActive();
578 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
579 line = String.format("%-5s %1s", "MM",
581 output.append(NL + line);
583 status = hmm.consensusResidueIsActive();
584 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
585 line = String.format("%-5s %1s", "CONS",
587 output.append(NL + line);
589 status = hmm.consensusStructureIsActive();
590 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
591 line = String.format("%-5s %1s", "CS",
593 output.append(NL + line);
595 status = hmm.mapIsActive();
596 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
597 line = String.format("%-5s %1s", "MAP",
599 output.append(NL + line);
602 if (hmm.getDate() != null)
604 line = String.format("%-5s %1s", "DATE", hmm.getDate());
605 output.append(NL + line);
607 if (hmm.getNumberOfSequences() != null)
609 line = String.format("%-5s %1s", "NSEQ", hmm.getNumberOfSequences());
610 output.append(NL + line);
612 if (hmm.getEffectiveNumberOfSequences() != null)
614 line = String.format("%-5s %1s", "EFFN",
615 hmm.getEffectiveNumberOfSequences());
616 output.append(NL + line);
618 if (hmm.getCheckSum() != null)
620 line = String.format("%-5s %1s", "CKSUM", hmm.getCheckSum());
621 output.append(NL + line);
623 if (hmm.getGatheringThreshold() != null)
625 line = String.format("%-5s %1s", "GA", hmm.getGatheringThreshold());
626 output.append(NL + line);
629 if (hmm.getTrustedCutoff() != null)
631 line = String.format("%-5s %1s", "TC", hmm.getTrustedCutoff());
632 output.append(NL + line);
634 if (hmm.getNoiseCutoff() != null)
636 line = String.format("%-5s %1s", "NC", hmm.getNoiseCutoff());
637 output.append(NL + line);
639 if (hmm.getMSV() != null)
641 line = String.format("%-19s %18s", "STATS LOCAL MSV", hmm.getMSV());
642 output.append(NL + line);
644 line = String.format("%-19s %18s", "STATS LOCAL VITERBI",
646 output.append(NL + line);
648 line = String.format("%-19s %18s", "STATS LOCAL FORWARD",
650 output.append(NL + line);
652 return output.toString();
657 * Returns the char value of a single lettered String.
662 char charValue(String string)
665 character = string.charAt(0);
671 public String print(SequenceI[] seqs, boolean jvsuffix)
673 if (seqs[0].getHMM() != null)
675 hmm = seqs[0].getHMM();
681 * Prints the .hmm file to a String.
685 public String print()
687 StringBuffer output = new StringBuffer();
688 output.append(getFilePropertiesAsString());
690 output.append(getModelAsString());
691 output.append(NL + "//");
692 return output.toString();
696 * Converts the probabilities contained in a list into log space.
700 List<Double> convertListToLogSpace(List<Double> list)
703 List<Double> convertedList = new ArrayList<>();
704 for (int i = 0; i < list.size(); i++)
706 double prob = list.get(i);
707 double logProb = -1 * Math.log(prob);
709 convertedList.add(logProb);
711 return convertedList;
717 * Returns the HMM sequence produced by reading a .hmm file.
720 public SequenceI[] getSeqsAsArray()
722 SequenceI hmmSeq = hmm.initHMMSequence();
723 SequenceI[] seq = new SequenceI[1];
730 * Fills symbol array and adds each symbol to an index lookup
733 * The scanner scanning the symbol line in the file.
735 public void fillSymbols(Scanner parser)
738 while (parser.hasNext())
740 String strSymbol = parser.next();
741 char[] symbol = strSymbol.toCharArray();
742 hmm.getSymbols().add(symbol[0]);
743 hmm.setSymbolIndex(symbol[0], i);
749 public void setNewlineString(String newLine)
755 public void setExportSettings(AlignExportSettingI exportSettings)
761 public void configureForView(AlignmentViewPanel viewpanel)
767 public boolean hasWarningMessage()
773 public String getWarningMessage()
775 return "warning message";