3 import jalview.datamodel.HMMNode;
4 import jalview.datamodel.HiddenMarkovModel;
5 import jalview.datamodel.SequenceI;
7 import java.io.BufferedReader;
9 import java.io.FileNotFoundException;
10 import java.io.IOException;
11 import java.io.PrintWriter;
12 import java.io.UnsupportedEncodingException;
13 import java.util.ArrayList;
14 import java.util.List;
15 import java.util.Scanner;
19 * Adds capability to read in and write out HMMER3 files. .
25 public class HMMFile extends AlignFile
26 implements AlignmentFileReaderI, AlignmentFileWriterI
28 // HMM to store file data
29 private HiddenMarkovModel hmm = new HiddenMarkovModel();
31 // number of possible transitions
32 private final int NUMBER_OF_TRANSITIONS = 7;
34 private final String NEW_LINE = "\n";
36 //number of symbols in the alphabet used in the hidden Markov model
39 private final String SPACE = " ";
41 private final String COMPO = "COMPO";
43 private final String EMPTY = "";
45 //This is a line that needs to be added to each HMMER� file. It is purely for readability.
46 private static final String TRANSITIONTYPELINE = "m->m m->i m->d i->m i->i d->m d->d";
49 * Constructor for HMMFile, parses immediately
54 public HMMFile(FileParse source) throws IOException
61 * Default constructor, do not use!
69 * Constructor for HMMFile used for exporting.
72 * @param exportImmediately
74 public HMMFile(HiddenMarkovModel markov)
80 * Returns the HMM produced by reading in a HMMER3 file.
84 public HiddenMarkovModel getHMM()
90 * Sets the HMM used in this file.
94 public void setHMM(HiddenMarkovModel model)
100 * Gets the name of the hidden Markov model.
104 public String getName()
106 return hmm.getName();
110 * Reads the data from HMM file into the HMM field on this object.
112 * @throws IOException
115 public void parse() throws IOException
117 parseFileProperties(dataIn);
122 * Reads the data from HMM file into the HMM field on this object.
124 * @throws IOException
127 public void parse(BufferedReader br) throws IOException
129 parseFileProperties(br);
136 * Imports the file properties from a HMMER3 file.
139 * The buffered reader used to read in the file.
140 * @throws IOException
142 void parseFileProperties(BufferedReader input) throws IOException
144 boolean readingFile = true;
145 hmm.setFileHeader(input.readLine());
146 String line = input.readLine();
151 Scanner parser = new Scanner(line);
152 String next = parser.next();
153 if ("HMM".equals(next)) // indicates start of HMM data (end of file
158 numberOfSymbols = hmm.getNumberOfSymbols();
160 else if ("STATS".equals(next))
166 value = parser.next() + SPACE + SPACE + parser.next();
167 hmm.addFileProperty(key, value);
172 String value = parser.next();
173 while (parser.hasNext())
175 value = value + SPACE + parser.next();
177 hmm.addFileProperty(key, value);
181 line = input.readLine();
191 * Parses the model data from the HMMER3 file
194 * The buffered reader used to read the file.
195 * @throws IOException
197 void parseModel(BufferedReader input) throws IOException
199 String line = input.readLine();
201 while (!"//".equals(line))
203 hmm.getNodes().add(new HMMNode());
205 Scanner matchReader = new Scanner(line);
206 next = matchReader.next();
207 if (next.equals(COMPO) || node > 0)
209 // stores match emission line in list
210 List<Double> matches = new ArrayList<>();
211 matches = fillList(matchReader, numberOfSymbols);
212 hmm.getNodes().get(node).setMatchEmissions(matches);
215 parseAnnotations(matchReader, node);
219 // stores insert emission line in list
220 line = input.readLine();
221 Scanner insertReader = new Scanner(line);
222 List<Double> inserts = new ArrayList<>();
223 inserts = fillList(insertReader, numberOfSymbols);
224 hmm.getNodes().get(node).setInsertEmissions(inserts);
225 insertReader.close();
227 // stores state transition line in list
228 line = input.readLine();
229 Scanner transitionReader = new Scanner(line);
230 List<Double> transitions = new ArrayList<>();
231 transitions = fillList(transitionReader, NUMBER_OF_TRANSITIONS);
232 hmm.getNodes().get(node).setStateTransitions(transitions);
233 transitionReader.close();
234 line = input.readLine();
241 * Parses the annotations on the match emission line.
244 * The scanner which is processing match emission line.
246 * The index of node which is being scanned.
248 void parseAnnotations(Scanner scanner, int index)
250 if (hmm.mapIsActive() && scanner.hasNext())
253 column = scanner.nextInt();
254 hmm.getNodes().get(index).setAlignmentColumn(column - 1);
255 hmm.getNodeLookup().put(column - 1, index);
262 if (scanner.hasNext())
265 consensusR = charValue(scanner.next());
266 hmm.getNodes().get(index).setConsensusResidue(consensusR);
269 if (scanner.hasNext())
272 reference = charValue(scanner.next());
273 hmm.getNodes().get(index).setReferenceAnnotation(reference);
276 if (scanner.hasNext())
279 value = charValue(scanner.next());
280 hmm.getNodes().get(index).setMaskValue(value);
282 if (scanner.hasNext())
285 consensusS = charValue(scanner.next());
286 hmm.getNodes().get(index).setConsensusStructure(consensusS);
293 * Fills a list of doubles based on an input line.
296 * The scanner for the line containing the data to be transferred to
298 * @param numberOfElements
299 * The number of elements in the list to be filled.
300 * @return filled list Returns the list of doubles.
301 * @throws IOException
303 static List<Double> fillList(Scanner input,
304 int numberOfElements) throws IOException
306 List<Double> list = new ArrayList<>();
307 for (int i = 0; i < numberOfElements; i++)
310 String next = input.next();
311 if (next.contains("*")) // state transitions to or from delete states
312 // occasionally have values of -infinity. These
313 // values are represented by an * in the .hmm
316 list.add(Double.NEGATIVE_INFINITY);
320 double prob = Double.valueOf(next);
321 prob = Math.pow(Math.E, -prob);
325 if (list.size() < numberOfElements)
327 throw new IOException("Incomplete data");
334 * Writes a HMM to a file/
336 * @param exportLocation
337 * Filename, URL or Pasted String to write to.
338 * @throws FileNotFoundException
339 * @throws UnsupportedEncodingException
343 public void exportFile(String exportLocation) throws IOException
345 PrintWriter writer = new PrintWriter(exportLocation);
346 appendFileProperties(writer);
348 writer.println("//");
355 * Writes a HMM to a file/
357 * @param exportLocation
358 * Filename, URL or Pasted String to write to.
359 * @throws FileNotFoundException
360 * @throws UnsupportedEncodingException
364 public void exportFile(File exportLocation) throws IOException
366 PrintWriter writer = new PrintWriter(exportLocation);
367 appendFileProperties(writer);
369 writer.println("//");
376 * Returns a string to be added to the StringBuilder containing the entire
379 * @param initialColumnSeparation
380 * The initial whitespace separation between the left side of the
381 * file and first character.
382 * @param columnSeparation
383 * The separation between subsequent data entries.
385 * The list fo data to be added to the String.
388 String addData(int initialColumnSeparation,
389 int columnSeparation, List<String> data)
393 for (String value : data)
397 line += String.format("%" + initialColumnSeparation + "s", value);
401 line += String.format("%" + columnSeparation + "s", value);
409 * Converts list of characters into a list of Strings.
412 * @return Returns the list of Strings.
414 List<String> charListToStringList(List<Character> list)
416 List<String> strList = new ArrayList<>();
417 for (char value : list)
419 String strValue = Character.toString(value);
420 strList.add(strValue);
426 * Converts a list of doubles into a list of Strings, rounded to the nearest
430 * @param noOfDecimals
433 List<String> doubleListToStringList(List<Double> list)
435 List<String> strList = new ArrayList<>();
436 for (double value : list)
441 strValue = String.format("%.5f", value);
444 else if (value == -0.00000d)
446 strValue = "0.00000";
453 strList.add(strValue);
459 * Converts a primitive array of Strings to a list of Strings.
464 List<String> stringArrayToStringList(String[] array)
466 List<String> list = new ArrayList<>();
467 for (String value : array)
476 * Appends the hidden Markov model data to the StringBuilder containing the
480 * The StringBuilder containing the output.
482 void appendModel(PrintWriter writer)
484 String symbolLine = "HMM";
485 List<Character> charSymbols = hmm.getSymbols();
486 List<String> strSymbols;
487 strSymbols = charListToStringList(charSymbols);
488 symbolLine += addData(11, 9, strSymbols);
489 writer.println(symbolLine);
490 writer.println(TRANSITIONTYPELINE);
492 int length = hmm.getLength();
494 for (int node = 0; node <= length; node++)
499 matchLine = String.format("%7s", "COMPO");
503 matchLine = String.format("%7s", node);
506 List<String> strMatches;
507 List<Double> doubleMatches;
508 doubleMatches = convertListToLogSpace(
509 hmm.getNode(node).getMatchEmissions());
510 strMatches = doubleListToStringList(doubleMatches);
511 matchLine += addData(10, 9, strMatches);
516 matchLine += SPACE + (hmm.getNodeAlignmentColumn(node) + 1);
517 matchLine += SPACE + hmm.getConsensusResidue(node);
518 matchLine += SPACE + hmm.getReferenceAnnotation(node);
519 if (hmm.getFileHeader().contains("HMMER3/f"))
521 matchLine += SPACE + hmm.getMaskedValue(node);
522 matchLine += SPACE + hmm.getConsensusStructure(node);
527 writer.println(matchLine);
529 String insertLine = EMPTY;
530 List<String> strInserts;
531 List<Double> doubleInserts;
532 doubleInserts = convertListToLogSpace(
533 hmm.getNode(node).getInsertEmissions());
534 strInserts = doubleListToStringList(doubleInserts);
535 insertLine += addData(17, 9, strInserts);
537 writer.println(insertLine);
539 String transitionLine = EMPTY;
540 List<String> strTransitions;
541 List<Double> doubleTransitions;
542 doubleTransitions = convertListToLogSpace(
543 hmm.getNode(node).getStateTransitions());
544 strTransitions = doubleListToStringList(doubleTransitions);
545 transitionLine += addData(17, 9, strTransitions);
547 writer.println(transitionLine);
552 * Appends the hidden Markov model file properties to the StringBuilder
553 * containing the output
556 * The StringBuilder containing the output.
558 void appendFileProperties(PrintWriter writer)
562 writer.println(hmm.getFileHeader());
564 line = String.format("%-5s %1s", "NAME", hmm.getName());
565 writer.println((line));
567 if (hmm.getAccessionNumber() != null)
569 line = String.format("%-5s %1s", "ACC", hmm.getAccessionNumber());
570 writer.println((line));
573 if (hmm.getDescription() != null)
575 line = String.format("%-5s %1s", "DESC", hmm.getDescription());
576 writer.println((line));
578 line = String.format("%-5s %1s", "LENG", hmm.getLength());
579 writer.println((line));
581 if (hmm.getMaxInstanceLength() != null)
583 line = String.format("%-5s %1s", "MAXL", hmm.getMaxInstanceLength());
584 writer.println((line));
586 line = String.format("%-5s %1s", "ALPH", hmm.getAlphabetType());
587 writer.println((line));
592 status = hmm.referenceAnnotationIsActive();
593 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
594 line = String.format("%-5s %1s", "RF",
596 writer.println((line));
598 status = hmm.maskValueIsActive();
599 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
600 line = String.format("%-5s %1s", "MM",
602 writer.println((line));
604 status = hmm.consensusResidueIsActive();
605 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
606 line = String.format("%-5s %1s", "CONS",
608 writer.println((line));
610 status = hmm.consensusStructureIsActive();
611 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
612 line = String.format("%-5s %1s", "CS",
614 writer.println((line));
616 status = hmm.mapIsActive();
617 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
618 line = String.format("%-5s %1s", "MAP",
620 writer.println((line));
623 if (hmm.getDate() != null)
625 line = String.format("%-5s %1s", "DATE", hmm.getDate());
626 writer.println((line));
628 if (hmm.getNumberOfSequences() != null)
630 line = String.format("%-5s %1s", "NSEQ", hmm.getNumberOfSequences());
631 writer.println((line));
633 if (hmm.getEffectiveNumberOfSequences() != null)
635 line = String.format("%-5s %1s", "EFFN",
636 hmm.getEffectiveNumberOfSequences());
637 writer.println((line));
639 if (hmm.getCheckSum() != null)
641 line = String.format("%-5s %1s", "CKSUM", hmm.getCheckSum());
642 writer.println((line));
644 if (hmm.getGatheringThreshold() != null)
646 line = String.format("%-5s %1s", "GA", hmm.getGatheringThreshold());
647 writer.println((line));
650 if (hmm.getTrustedCutoff() != null)
652 line = String.format("%-5s %1s", "TC", hmm.getTrustedCutoff());
653 writer.println((line));
655 if (hmm.getNoiseCutoff() != null)
657 line = String.format("%-5s %1s", "NC", hmm.getNoiseCutoff());
658 writer.println((line));
660 if (hmm.getMSV() != null)
662 line = String.format("%-19s %18s", "STATS LOCAL MSV", hmm.getMSV());
663 writer.println((line));
665 line = String.format("%-19s %18s", "STATS LOCAL VITERBI",
667 writer.println((line));
669 line = String.format("%-19s %18s", "STATS LOCAL FORWARD",
671 writer.println((line));
677 * Returns the char value of a single lettered String.
682 char charValue(String string)
685 character = string.charAt(0);
692 public String print(SequenceI[] seqs, boolean jvsuffix)
699 * Converts the probabilities contained in a list into log space.
703 List<Double> convertListToLogSpace(List<Double> list)
706 List<Double> convertedList = new ArrayList<>();
707 for (int i = 0; i < list.size(); i++)
709 double prob = list.get(i);
710 double logProb = -1 * Math.log(prob);
712 convertedList.add(logProb);
714 return convertedList;
720 * Returns the HMM sequence produced by reading a .hmm file.
723 public SequenceI[] getSeqsAsArray()
725 SequenceI hmmSeq = hmm.initHMMSequence();
726 SequenceI[] seq = new SequenceI[1];
733 * Fills symbol array and adds each symbol to an index lookup
736 * The scanner scanning the symbol line in the file.
738 public void fillSymbols(Scanner parser)
741 while (parser.hasNext())
743 String strSymbol = parser.next();
744 char[] symbol = strSymbol.toCharArray();
745 hmm.getSymbols().add(symbol[0]);
746 hmm.setSymbolIndex(symbol[0], i);