3 import jalview.api.AlignExportSettingI;
4 import jalview.api.AlignmentViewPanel;
5 import jalview.datamodel.HMMNode;
6 import jalview.datamodel.HiddenMarkovModel;
7 import jalview.datamodel.SequenceI;
9 import java.io.BufferedReader;
10 import java.io.IOException;
11 import java.util.ArrayList;
12 import java.util.List;
13 import java.util.Scanner;
17 * Adds capability to read in and write out HMMER3 files. .
23 public class HMMFile extends AlignFile
24 implements AlignmentFileReaderI, AlignmentFileWriterI
26 // HMM to store file data
27 private HiddenMarkovModel hmm;
29 // number of possible transitions
30 private static final int NUMBER_OF_TRANSITIONS = 7;
32 private String NL = "\n";
34 //number of symbols in the alphabet used in the hidden Markov model
37 private final String SPACE = " ";
39 private final String COMPO = "COMPO";
41 private final String EMPTY = "";
43 //This is a line that needs to be added to each HMMER� file. It is purely for readability.
44 private static final String TRANSITIONTYPELINE = " m->m m->i m->d i->m i->i d->m d->d";
53 public HMMFile(String inFile, DataSourceType type) throws IOException
64 public HMMFile(FileParse source) throws IOException
70 * Default constructor, do not use!
78 * Constructor for HMMFile used for exporting.
81 * @param exportImmediately
83 public HMMFile(HiddenMarkovModel markov)
89 * For testing, do not use.
93 HMMFile(BufferedReader br)
99 * Returns the HMM produced by reading in a HMMER3 file.
103 public HiddenMarkovModel getHMM()
109 * Sets the HMM used in this file.
113 public void setHMM(HiddenMarkovModel model)
119 * Gets the name of the hidden Markov model.
123 public String getName()
125 return hmm.getName();
129 * Reads the data from HMM file into the HMM field on this object.
131 * @throws IOException
134 public void parse() throws IOException
136 hmm = new HiddenMarkovModel();
137 parseFileProperties(dataIn);
142 * Reads the data from HMM file into the HMM field on this object.
144 * @throws IOException
147 public void parse(BufferedReader br) throws IOException
149 hmm = new HiddenMarkovModel();
150 parseFileProperties(br);
157 * Imports the file properties from a HMMER3 file.
160 * The buffered reader used to read in the file.
161 * @throws IOException
163 void parseFileProperties(BufferedReader input) throws IOException
165 boolean readingFile = true;
166 hmm.setFileHeader(input.readLine());
167 String line = input.readLine();
172 Scanner parser = new Scanner(line);
173 String next = parser.next();
174 if ("HMM".equals(next)) // indicates start of HMM data (end of file
179 numberOfSymbols = hmm.getNumberOfSymbols();
181 else if ("STATS".equals(next))
187 value = parser.next() + SPACE + SPACE + parser.next();
188 hmm.addFileProperty(key, value);
193 String value = parser.next();
194 while (parser.hasNext())
196 value = value + SPACE + parser.next();
198 hmm.addFileProperty(key, value);
202 line = input.readLine();
212 * Parses the model data from the HMMER3 file
215 * The buffered reader used to read the file.
216 * @throws IOException
218 void parseModel(BufferedReader input) throws IOException
220 String line = input.readLine();
222 while (!"//".equals(line))
224 hmm.getNodes().add(new HMMNode());
226 Scanner matchReader = new Scanner(line);
227 next = matchReader.next();
228 if (next.equals(COMPO) || node > 0)
230 // stores match emission line in list
231 List<Double> matches = new ArrayList<>();
232 matches = fillList(matchReader, numberOfSymbols);
233 hmm.getNodes().get(node).setMatchEmissions(matches);
236 parseAnnotations(matchReader, node);
240 // stores insert emission line in list
241 line = input.readLine();
242 Scanner insertReader = new Scanner(line);
243 List<Double> inserts = new ArrayList<>();
244 inserts = fillList(insertReader, numberOfSymbols);
245 hmm.getNodes().get(node).setInsertEmissions(inserts);
246 insertReader.close();
248 // stores state transition line in list
249 line = input.readLine();
250 Scanner transitionReader = new Scanner(line);
251 List<Double> transitions = new ArrayList<>();
252 transitions = fillList(transitionReader, NUMBER_OF_TRANSITIONS);
253 hmm.getNodes().get(node).setStateTransitions(transitions);
254 transitionReader.close();
255 line = input.readLine();
262 * Parses the annotations on the match emission line.
265 * The scanner which is processing match emission line.
267 * The index of node which is being scanned.
269 void parseAnnotations(Scanner scanner, int index)
271 if (hmm.mapIsActive() && scanner.hasNext())
274 column = scanner.nextInt();
275 hmm.getNodes().get(index).setAlignmentColumn(column - 1);
276 hmm.getNodeLookup().put(column - 1, index);
283 if (scanner.hasNext())
286 consensusR = charValue(scanner.next());
287 hmm.getNodes().get(index).setConsensusResidue(consensusR);
290 if (scanner.hasNext())
293 reference = charValue(scanner.next());
294 hmm.getNodes().get(index).setReferenceAnnotation(reference);
297 if (scanner.hasNext())
300 value = charValue(scanner.next());
301 hmm.getNodes().get(index).setMaskValue(value);
303 if (scanner.hasNext())
306 consensusS = charValue(scanner.next());
307 hmm.getNodes().get(index).setConsensusStructure(consensusS);
314 * Fills a list of doubles based on an input line.
317 * The scanner for the line containing the data to be transferred to
319 * @param numberOfElements
320 * The number of elements in the list to be filled.
321 * @return filled list Returns the list of doubles.
322 * @throws IOException
324 static List<Double> fillList(Scanner input,
325 int numberOfElements) throws IOException
327 List<Double> list = new ArrayList<>();
328 for (int i = 0; i < numberOfElements; i++)
331 String next = input.next();
332 if (next.contains("*")) // state transitions to or from delete states
333 // occasionally have values of -infinity. These
334 // values are represented by an * in the .hmm
337 list.add(Double.NEGATIVE_INFINITY);
341 double prob = Double.valueOf(next);
342 prob = Math.pow(Math.E, -prob);
346 if (list.size() < numberOfElements)
348 throw new IOException("Incomplete data");
354 * Returns a string to be added to the StringBuilder containing the entire
357 * @param initialColumnSeparation
358 * The initial whitespace separation between the left side of the
359 * file and first character.
360 * @param columnSeparation
361 * The separation between subsequent data entries.
363 * The list fo data to be added to the String.
366 String addData(int initialColumnSeparation,
367 int columnSeparation, List<String> data)
371 for (String value : data)
375 line += String.format("%" + initialColumnSeparation + "s", value);
379 line += String.format("%" + columnSeparation + "s", value);
387 * Converts list of characters into a list of Strings.
390 * @return Returns the list of Strings.
392 List<String> charListToStringList(List<Character> list)
394 List<String> strList = new ArrayList<>();
395 for (char value : list)
397 String strValue = Character.toString(value);
398 strList.add(strValue);
404 * Converts a list of doubles into a list of Strings, rounded to the nearest
408 * @param noOfDecimals
411 List<String> doubleListToStringList(List<Double> list)
413 List<String> strList = new ArrayList<>();
414 for (double value : list)
419 strValue = String.format("%.5f", value);
422 else if (value == -0.00000d)
424 strValue = "0.00000";
431 strList.add(strValue);
437 * Converts a primitive array of Strings to a list of Strings.
442 List<String> stringArrayToStringList(String[] array)
444 List<String> list = new ArrayList<>();
445 for (String value : array)
454 * Returns a string containing the model data.
456 String getModelAsString()
458 StringBuffer output = new StringBuffer();
459 String symbolLine = "HMM";
460 List<Character> charSymbols = hmm.getSymbols();
461 List<String> strSymbols;
462 strSymbols = charListToStringList(charSymbols);
463 symbolLine += addData(11, 9, strSymbols);
464 output.append(symbolLine);
465 output.append(NL + TRANSITIONTYPELINE);
467 int length = hmm.getLength();
469 for (int node = 0; node <= length; node++)
474 matchLine = String.format("%7s", "COMPO");
478 matchLine = String.format("%7s", node);
481 List<String> strMatches;
482 List<Double> doubleMatches;
483 doubleMatches = convertListToLogSpace(
484 hmm.getNode(node).getMatchEmissions());
485 strMatches = doubleListToStringList(doubleMatches);
486 matchLine += addData(10, 9, strMatches);
491 matchLine += SPACE + (hmm.getNodeAlignmentColumn(node) + 1);
492 matchLine += SPACE + hmm.getConsensusResidue(node);
493 matchLine += SPACE + hmm.getReferenceAnnotation(node);
494 if (hmm.getFileHeader().contains("HMMER3/f"))
496 matchLine += SPACE + hmm.getMaskedValue(node);
497 matchLine += SPACE + hmm.getConsensusStructure(node);
502 output.append(NL + matchLine);
504 String insertLine = EMPTY;
505 List<String> strInserts;
506 List<Double> doubleInserts;
507 doubleInserts = convertListToLogSpace(
508 hmm.getNode(node).getInsertEmissions());
509 strInserts = doubleListToStringList(doubleInserts);
510 insertLine += addData(17, 9, strInserts);
512 output.append(NL + insertLine);
514 String transitionLine = EMPTY;
515 List<String> strTransitions;
516 List<Double> doubleTransitions;
517 doubleTransitions = convertListToLogSpace(
518 hmm.getNode(node).getStateTransitions());
519 strTransitions = doubleListToStringList(doubleTransitions);
520 transitionLine += addData(17, 9, strTransitions);
522 output.append(NL + transitionLine);
524 return output.toString();
528 * Returns a String containing the HMM file properties
530 String getFilePropertiesAsString()
532 StringBuffer output = new StringBuffer();
535 output.append(hmm.getFileHeader());
537 line = String.format("%-5s %1s", "NAME", hmm.getName());
538 output.append(NL + line);
540 if (hmm.getAccessionNumber() != null)
542 line = String.format("%-5s %1s", "ACC", hmm.getAccessionNumber());
543 output.append(NL + line);
546 if (hmm.getDescription() != null)
548 line = String.format("%-5s %1s", "DESC", hmm.getDescription());
549 output.append(NL + line);
551 line = String.format("%-5s %1s", "LENG", hmm.getLength());
552 output.append(NL + line);
554 if (hmm.getMaxInstanceLength() != null)
556 line = String.format("%-5s %1s", "MAXL", hmm.getMaxInstanceLength());
557 output.append(NL + line);
559 line = String.format("%-5s %1s", "ALPH", hmm.getAlphabetType());
560 output.append(NL + line);
565 status = hmm.referenceAnnotationIsActive();
566 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
567 line = String.format("%-5s %1s", "RF",
569 output.append(NL + line);
571 status = hmm.maskValueIsActive();
572 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
573 line = String.format("%-5s %1s", "MM",
575 output.append(NL + line);
577 status = hmm.consensusResidueIsActive();
578 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
579 line = String.format("%-5s %1s", "CONS",
581 output.append(NL + line);
583 status = hmm.consensusStructureIsActive();
584 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
585 line = String.format("%-5s %1s", "CS",
587 output.append(NL + line);
589 status = hmm.mapIsActive();
590 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
591 line = String.format("%-5s %1s", "MAP",
593 output.append(NL + line);
596 if (hmm.getDate() != null)
598 line = String.format("%-5s %1s", "DATE", hmm.getDate());
599 output.append(NL + line);
601 if (hmm.getNumberOfSequences() != null)
603 line = String.format("%-5s %1s", "NSEQ", hmm.getNumberOfSequences());
604 output.append(NL + line);
606 if (hmm.getEffectiveNumberOfSequences() != null)
608 line = String.format("%-5s %1s", "EFFN",
609 hmm.getEffectiveNumberOfSequences());
610 output.append(NL + line);
612 if (hmm.getCheckSum() != null)
614 line = String.format("%-5s %1s", "CKSUM", hmm.getCheckSum());
615 output.append(NL + line);
617 if (hmm.getGatheringThreshold() != null)
619 line = String.format("%-5s %1s", "GA", hmm.getGatheringThreshold());
620 output.append(NL + line);
623 if (hmm.getTrustedCutoff() != null)
625 line = String.format("%-5s %1s", "TC", hmm.getTrustedCutoff());
626 output.append(NL + line);
628 if (hmm.getNoiseCutoff() != null)
630 line = String.format("%-5s %1s", "NC", hmm.getNoiseCutoff());
631 output.append(NL + line);
633 if (hmm.getMSV() != null)
635 line = String.format("%-19s %18s", "STATS LOCAL MSV", hmm.getMSV());
636 output.append(NL + line);
638 line = String.format("%-19s %18s", "STATS LOCAL VITERBI",
640 output.append(NL + line);
642 line = String.format("%-19s %18s", "STATS LOCAL FORWARD",
644 output.append(NL + line);
646 return output.toString();
651 * Returns the char value of a single lettered String.
656 char charValue(String string)
659 character = string.charAt(0);
665 public String print(SequenceI[] seqs, boolean jvsuffix)
667 if (seqs[0].getHMM() != null)
669 hmm = seqs[0].getHMM();
675 * Prints the .hmm file to a String.
679 public String print()
681 StringBuffer output = new StringBuffer();
682 output.append(getFilePropertiesAsString());
684 output.append(getModelAsString());
685 output.append(NL + "//");
686 return output.toString();
690 * Converts the probabilities contained in a list into log space.
694 List<Double> convertListToLogSpace(List<Double> list)
697 List<Double> convertedList = new ArrayList<>();
698 for (int i = 0; i < list.size(); i++)
700 double prob = list.get(i);
701 double logProb = -1 * Math.log(prob);
703 convertedList.add(logProb);
705 return convertedList;
711 * Returns the HMM sequence produced by reading a .hmm file.
714 public SequenceI[] getSeqsAsArray()
716 SequenceI hmmSeq = hmm.initHMMSequence();
717 SequenceI[] seq = new SequenceI[1];
724 * Fills symbol array and adds each symbol to an index lookup
727 * The scanner scanning the symbol line in the file.
729 public void fillSymbols(Scanner parser)
732 while (parser.hasNext())
734 String strSymbol = parser.next();
735 char[] symbol = strSymbol.toCharArray();
736 hmm.getSymbols().add(symbol[0]);
737 hmm.setSymbolIndex(symbol[0], i);
743 public void setNewlineString(String newLine)
749 public void setExportSettings(AlignExportSettingI exportSettings)
755 public void configureForView(AlignmentViewPanel viewpanel)
761 public boolean hasWarningMessage()
767 public String getWarningMessage()
769 return "warning message";