3 import jalview.datamodel.HMMNode;
4 import jalview.datamodel.HiddenMarkovModel;
5 import jalview.datamodel.SequenceI;
7 import java.io.BufferedReader;
8 import java.io.FileNotFoundException;
9 import java.io.IOException;
10 import java.io.PrintWriter;
11 import java.io.UnsupportedEncodingException;
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Scanner;
18 * reads in and writes out a HMMER standard file
24 public class HMMFile extends AlignFile
25 implements AlignmentFileReaderI, AlignmentFileWriterI
27 // HMM to store file data
28 private HiddenMarkovModel hmm = new HiddenMarkovModel();
33 // number of possible transitions
34 private final int NUMBER_OF_TRANSITIONS = 7;
36 private final String NEW_LINE = "\n";
44 private final String SPACE = " ";
46 private final String COMPO = "COMPO";
48 private final String EMPTY = "";
50 private static final String TRANSITIONTYPELINE = "m->m m->i m->d i->m i->i d->m d->d";
52 public HMMFile(FileParse source) throws IOException
62 public HiddenMarkovModel getHMM()
67 public void setHMM(HiddenMarkovModel model)
72 public String getName()
78 * reads data from HMM file
83 public void parse() throws IOException
85 parseFileProperties(dataIn);
92 * imports file properties from hmm file
95 * buffered reader used to read in file
98 void parseFileProperties(BufferedReader input) throws IOException
100 boolean readingFile = true;
101 fileHeader = input.readLine();
102 String line = input.readLine();
107 Scanner parser = new Scanner(line);
108 String next = parser.next();
109 if ("HMM".equals(next)) // indicates start of HMM data (end of file
113 hmm.fillSymbols(parser);
114 numberOfSymbols = hmm.getNumberOfSymbols();
116 else if ("STATS".equals(next))
122 value = parser.next() + SPACE + SPACE + parser.next();
123 hmm.addFileProperty(key, value);
128 String value = parser.next();
129 while (parser.hasNext())
131 value = value + SPACE + parser.next();
133 hmm.addFileProperty(key, value);
137 line = input.readLine();
147 * parses the model data from the hmm file
150 * buffered reader used to read file
151 * @throws IOException
153 void parseModel(BufferedReader input) throws IOException
155 for (int i = 0; i < hmm.getLength() + 1; i++)
157 hmm.getNodes().add(new HMMNode());
160 line = input.readLine();
161 Scanner matchReader = new Scanner(line);
162 next = matchReader.next();
163 if (next.equals(COMPO) || i > 0)
165 // stores match emission line in list
166 List<Double> matches = new ArrayList<>();
167 matches = fillList(matchReader, numberOfSymbols);
168 hmm.getNodes().get(i).setMatchEmissions(matches);
171 parseAnnotations(matchReader, i);
175 // stores insert emission line in list
176 line = input.readLine();
177 Scanner insertReader = new Scanner(line);
178 List<Double> inserts = new ArrayList<>();
179 inserts = fillList(insertReader, numberOfSymbols);
180 hmm.getNodes().get(i).setInsertEmissions(inserts);
181 insertReader.close();
183 // stores state transition line in list
184 line = input.readLine();
185 Scanner transitionReader = new Scanner(line);
186 List<Double> transitions = new ArrayList<>();
187 transitions = fillList(transitionReader, NUMBER_OF_TRANSITIONS);
188 hmm.getNodes().get(i).setStateTransitions(transitions);
189 transitionReader.close();
195 * parses annotations on match emission line
198 * scanner which is processing match emission line
200 * index of node which is beign scanned
202 void parseAnnotations(Scanner scanner, int index)
204 if (hmm.mapIsActive())
207 column = scanner.nextInt();
208 hmm.getNodes().get(index).setAlignmentColumn(column);
209 hmm.getNodeLookup().put(column, index);
217 consensusR = charValue(scanner.next());
218 hmm.getNodes().get(index).setConsensusResidue(consensusR);
221 reference = charValue(scanner.next());
222 hmm.getNodes().get(index).setReferenceAnnotation(reference);
226 value = charValue(scanner.next());
227 hmm.getNodes().get(index).setMaskValue(value);
230 consensusS = charValue(scanner.next());
231 hmm.getNodes().get(index).setConsensusStructure(consensusS);
238 * scanner for line containing data to be transferred to list
239 * @param numberOfElements
240 * number of elements in the list to be filled
241 * @return filled list
243 static List<Double> fillList(Scanner input,
244 int numberOfElements)
246 List<Double> list = new ArrayList<>();
247 for (int i = 0; i < numberOfElements; i++)
250 String next = input.next();
251 if (next.contains("*")) // state transitions to or from delete states
252 // occasionally have values of -infinity. These
253 // values are represented by an * in the .hmm
254 // file, and by a null value in the
255 // HiddenMarkovModel class
257 list.add(Double.NEGATIVE_INFINITY);
261 double prob = Double.valueOf(next);
262 prob = Math.pow(Math.E, -prob);
271 * writes a HiddenMarkovModel to a file
273 * @param exportLocation
274 * Filename, URL or Pasted String to write to
275 * @throws FileNotFoundException
276 * @throws UnsupportedEncodingException
280 public void exportFile(String exportLocation) throws IOException
282 StringBuilder file = new StringBuilder();
283 appendFileProperties(file);
287 PrintWriter output = new PrintWriter(exportLocation);
293 String addData(int initialColumnSeparation,
294 int columnSeparation, List<String> data)
298 for (String value : data)
302 line += String.format("%" + initialColumnSeparation + "s", value);
306 line += String.format("%" + columnSeparation + "s", value);
313 List<String> charListToStringList(List<Character> list)
315 List<String> strList = new ArrayList<>();
316 for (char value : list)
318 String strValue = Character.toString(value);
319 strList.add(strValue);
324 List<String> doubleListToStringList(List<Double> list,
327 List<String> strList = new ArrayList<>();
328 for (double value : list)
333 strValue = String.format("%.5f", value);
336 else if (value == -0.00000d)
338 strValue = "0.00000";
345 strList.add(strValue);
350 List<String> stringArrayToStringList(String[] array)
352 List<String> list = new ArrayList<>();
353 for (String value : array)
361 void appendModel(StringBuilder file)
363 String symbolLine = "HMM";
364 List<Character> charSymbols = hmm.getSymbols();
365 List<String> strSymbols;
366 strSymbols = charListToStringList(charSymbols);
367 symbolLine += addData(11, 9, strSymbols);
368 file.append(symbolLine + NEW_LINE);
369 file.append(TRANSITIONTYPELINE + NEW_LINE);
371 int length = hmm.getLength();
373 for (int node = 0; node <= length; node++)
378 matchLine = String.format("%7s", "COMPO");
382 matchLine = String.format("%7s", node);
385 List<String> strMatches;
386 List<Double> doubleMatches;
387 doubleMatches = hmm.getNode(node).getMatchEmissions();
388 convertListToLogSpace(doubleMatches);
389 strMatches = doubleListToStringList(doubleMatches, 5);
390 matchLine += addData(10, 9, strMatches);
395 matchLine += SPACE + hmm.getNodeAlignmentColumn(node);
396 matchLine += SPACE + hmm.getConsensusResidue(node);
397 matchLine += SPACE + hmm.getReferenceAnnotation(node);
398 matchLine += SPACE + hmm.getMaskedValue(node);
399 matchLine += SPACE + hmm.getConsensusStructure(node);
403 file.append(matchLine + NEW_LINE);
405 String insertLine = EMPTY;
406 List<String> strInserts;
407 List<Double> doubleInserts;
408 doubleInserts = hmm.getNode(node).getInsertEmissions();
409 convertListToLogSpace(doubleInserts);
410 strInserts = doubleListToStringList(doubleInserts, 5);
411 insertLine += addData(17, 9, strInserts);
413 file.append(insertLine + NEW_LINE);
415 String transitionLine = EMPTY;
416 List<String> strTransitions;
417 List<Double> doubleTransitions;
418 doubleTransitions = hmm.getNode(node).getStateTransitions();
419 convertListToLogSpace(doubleTransitions);
420 strTransitions = doubleListToStringList(doubleTransitions, 5);
421 transitionLine += addData(17, 9, strTransitions);
423 file.append(transitionLine + NEW_LINE);
427 void appendFileProperties(StringBuilder file)
431 file.append(fileHeader + NEW_LINE);
433 line = String.format("%-5s %1s", "NAME", hmm.getName());
434 file.append((line + NEW_LINE));
436 if (hmm.getAccessionNumber() != null)
438 line = String.format("%-5s %1s", "ACC", hmm.getAccessionNumber());
439 file.append((line + NEW_LINE));
442 if (hmm.getDescription() != null)
444 line = String.format("%-5s %1s", "DESC", hmm.getDescription());
445 file.append((line + NEW_LINE));
447 line = String.format("%-5s %1s", "LENG", hmm.getLength());
448 file.append((line + NEW_LINE));
450 if (hmm.getMaxInstanceLength() != null)
452 line = String.format("%-5s %1s", "MAXL", hmm.getMaxInstanceLength());
453 file.append((line + NEW_LINE));
455 line = String.format("%-5s %1s", "ALPH", hmm.getAlphabetType());
456 file.append((line + NEW_LINE));
461 status = hmm.referenceAnnotationIsActive();
462 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
463 line = String.format("%-5s %1s", "RF",
465 file.append((line + NEW_LINE));
467 status = hmm.maskValueIsActive();
468 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
469 line = String.format("%-5s %1s", "MM",
471 file.append((line + NEW_LINE));
473 status = hmm.consensusResidueIsActive();
474 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
475 line = String.format("%-5s %1s", "CONS",
477 file.append((line + NEW_LINE));
479 status = hmm.consensusStructureIsActive();
480 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
481 line = String.format("%-5s %1s", "CS",
483 file.append((line + NEW_LINE));
485 status = hmm.mapIsActive();
486 statusStr = HiddenMarkovModel.findStringFromBoolean(status);
487 line = String.format("%-5s %1s", "MAP",
489 file.append((line + NEW_LINE));
492 if (hmm.getDate() != null)
494 line = String.format("%-5s %1s", "DATE", hmm.getDate());
495 file.append((line + NEW_LINE));
497 if (hmm.getNumberOfSequences() != null)
499 line = String.format("%-5s %1s", "NSEQ", hmm.getNumberOfSequences());
500 file.append((line + NEW_LINE));
502 if (hmm.getEffectiveNumberOfSequences() != null)
504 line = String.format("%-5s %1s", "EFFN",
505 hmm.getEffectiveNumberOfSequences());
506 file.append((line + NEW_LINE));
508 if (hmm.getCheckSum() != null)
510 line = String.format("%-5s %1s", "CKSUM", hmm.getCheckSum());
511 file.append((line + NEW_LINE));
513 if (hmm.getGatheringThreshold() != null)
515 line = String.format("%-5s %1s", "GA", hmm.getGatheringThreshold());
516 file.append((line + NEW_LINE));
519 if (hmm.getTrustedCutoff() != null)
521 line = String.format("%-5s %1s", "TC", hmm.getTrustedCutoff());
522 file.append((line + NEW_LINE));
524 if (hmm.getNoiseCutoff() != null)
526 line = String.format("%-5s %1s", "NC", hmm.getNoiseCutoff());
527 file.append((line + NEW_LINE));
529 if (hmm.getMSV() != null)
531 line = String.format("%-19s %18s", "STATS LOCAL MSV", hmm.getMSV());
532 file.append((line + NEW_LINE));
534 line = String.format("%-19s %18s", "STATS LOCAL VITERBI",
536 file.append((line + NEW_LINE));
538 line = String.format("%-19s %18s", "STATS LOCAL FORWARD",
540 file.append((line + NEW_LINE));
546 char charValue(String string)
549 character = string.charAt(0);
554 public String print(SequenceI[] seqs, boolean jvsuffix)
560 void convertListToLogSpace(List<Double> list)
563 for (int i = 0; i < list.size(); i++)
565 double prob = list.get(i);
566 double logProb = -1 * Math.log(prob);
568 list.set(i, logProb);