3 import jalview.datamodel.HMMNode;
4 import jalview.datamodel.HiddenMarkovModel;
6 import java.io.BufferedReader;
8 import java.io.FileNotFoundException;
9 import java.io.FileReader;
10 import java.io.IOException;
11 import java.io.UnsupportedEncodingException;
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Scanner;
18 * reads in and writes out a HMMER standard file
24 public class HMMFile extends FileParse
26 // HMM to store file data
27 HiddenMarkovModel hmm = new HiddenMarkovModel();
33 // number of possible transitions
34 final static int NUMBER_OF_TRANSITIONS = 7;
36 final static String NEW_LINE = "\n";
44 final static String SPACE = " ";
46 final static String COMPO = "COMPO";
48 final static String EMPTY = "";
52 * Constructor which contains model to be filled or exported
55 * Filename, URL or Pasted String to read from
57 public HMMFile(String dataSource)
59 dataObject = dataSource;
62 public HiddenMarkovModel getHmm()
67 public void setHmm(HiddenMarkovModel model)
73 * reads data from HMM file
77 public void parse() throws IOException
79 File file = new File(dataObject);
80 FileReader fr = new FileReader(file);
81 BufferedReader br = new BufferedReader(fr);
82 parseFileProperties(br);
87 public String getDataObject()
92 public void setDataObject(String value)
94 this.dataObject = value;
98 * imports file properties from hmm file
101 * buffered reader used to read in file
102 * @throws IOException
104 public void parseFileProperties(BufferedReader input) throws IOException
106 boolean readingFile = true;
107 fileHeader = input.readLine();
108 String line = input.readLine();
113 Scanner parser = new Scanner(line);
114 String next = parser.next();
115 if ("HMM".equals(next)) // indicates start of HMM data (end of file
119 hmm.fillSymbols(parser);
120 numberOfSymbols = hmm.getNumberOfSymbols();
122 else if ("STATS".equals(next))
128 value = parser.next() + SPACE + SPACE + parser.next();
129 hmm.addFileProperty(key, value);
134 String value = parser.next();
135 while (parser.hasNext())
137 value = value + SPACE + parser.next();
139 hmm.addFileProperty(key, value);
143 line = input.readLine();
153 * parses the model data from the hmm file
156 * buffered reader used to read file
157 * @throws IOException
159 public void parseModel(BufferedReader input) throws IOException
161 for (int i = 0; i < hmm.getLength() + 1; i++)
163 hmm.getNodes().add(new HMMNode());
166 line = input.readLine();
167 Scanner matchReader = new Scanner(line);
168 next = matchReader.next();
169 if (next.equals(COMPO) || i > 0)
171 // stores match emission line in list
172 List<Double> matches = new ArrayList<>();
173 matches = fillList(matchReader, numberOfSymbols);
174 hmm.getNodes().get(i).setMatchEmissions(matches);
177 parseAnnotations(matchReader, i);
181 // stores insert emission line in list
182 line = input.readLine();
183 Scanner insertReader = new Scanner(line);
184 List<Double> inserts = new ArrayList<>();
185 inserts = fillList(insertReader, numberOfSymbols);
186 hmm.getNodes().get(i).setInsertEmissions(inserts);
187 insertReader.close();
189 // stores state transition line in list
190 line = input.readLine();
191 Scanner transitionReader = new Scanner(line);
192 List<Double> transitions = new ArrayList<>();
193 transitions = fillList(transitionReader, NUMBER_OF_TRANSITIONS);
194 hmm.getNodes().get(i).setStateTransitions(transitions);
195 transitionReader.close();
201 * parses annotations on match emission line
204 * scanner which is processing match emission line
206 * index of node which is beign scanned
208 public void parseAnnotations(Scanner scanner, int index)
210 if (hmm.mapIsActive())
213 column = scanner.nextInt();
214 hmm.getNodes().get(index).setAlignmentColumn(column);
222 consensusR = charValue(scanner.next());
223 hmm.getNodes().get(index).setConsensusResidue(consensusR);
226 reference = charValue(scanner.next());
227 hmm.getNodes().get(index).setReferenceAnnotation(reference);
231 value = charValue(scanner.next());
232 hmm.getNodes().get(index).setMaskValue(value);
235 consensusS = charValue(scanner.next());
236 hmm.getNodes().get(index).setConsensusStructure(consensusS);
242 * type of transition occuring
243 * @return index value representing position along stateTransition array.
245 public Integer getTransitionType(String transition)
280 * scanner for line containing data to be transferred to list
281 * @param numberOfElements
282 * number of elements in the list to be filled
283 * @return filled list
285 public static List<Double> fillList(Scanner input,
286 int numberOfElements)
288 List<Double> list = new ArrayList<>();
289 for (int i = 0; i < numberOfElements; i++)
292 String next = input.next();
293 if (next.contains("*")) // state transitions to or from delete states
294 // occasionally have values of -infinity. These
295 // values are represented by an * in the .hmm
296 // file, and by a null value in the
297 // HiddenMarkovModel class
299 list.add(Double.NEGATIVE_INFINITY);
303 list.add(Double.valueOf(next));
311 * writes a HiddenMarkovModel to a file
313 * @param exportLocation
314 * Filename, URL or Pasted String to write to
315 * @throws FileNotFoundException
316 * @throws UnsupportedEncodingException
320 public void exportFile(String exportLocation) throws IOException
322 StringBuilder file = new StringBuilder();
323 appendFileProperties(file);
330 public String addData(int initialColumnSeparation,
331 int columnSeparation, List<String> data)
335 for (String value : data)
339 line += String.format("%" + initialColumnSeparation + "s", value);
343 line += String.format("%" + columnSeparation + "s", value);
350 public static List<String> charListToStringList(List<Character> list)
352 List<String> strList = new ArrayList<>();
353 for (char value : list)
355 String strValue = Character.toString(value);
356 strList.add(strValue);
361 public static List<String> doubleListToStringList(List<Double> list,
364 List<String> strList = new ArrayList<>();
365 for (double value : list)
368 if (value == Double.NEGATIVE_INFINITY)
374 strValue = String.format("%.5f", value);
377 strList.add(strValue);
382 public static List<String> stringArrayToStringList(String[] array)
384 List<String> list = new ArrayList<>();
385 for (String value : array)
393 void appendModel(StringBuilder file)
395 String symbolLine = "HMM";
396 List<Character> charSymbols = hmm.getSymbols();
397 List<String> strSymbols;
398 strSymbols = charListToStringList(charSymbols);
399 symbolLine += addData(11, 9, strSymbols);
400 file.append(symbolLine + NEW_LINE);
402 String transitionTypeLine = "";
403 List<String> transitionTypes;
404 transitionTypes = stringArrayToStringList(hmm.getTransitionTypes());
405 transitionTypeLine += addData(16, 9, transitionTypes);
406 file.append(transitionTypeLine + NEW_LINE);
408 int length = hmm.getLength();
410 for (int node = 0; node <= length; node++)
415 matchLine = String.format("%7s", "COMPO");
419 matchLine = String.format("%7s", node);
422 List<String> strMatches;
423 List<Double> doubleMatches;
424 doubleMatches = hmm.getNode(node).getMatchEmissions();
425 strMatches = doubleListToStringList(doubleMatches, 5);
426 matchLine += addData(10, 9, strMatches);
431 matchLine += SPACE + hmm.getNodeAlignmentColumn(node);
432 matchLine += SPACE + hmm.getConsensusResidue(node);
433 matchLine += SPACE + hmm.getReferenceAnnotation(node);
434 matchLine += SPACE + hmm.getMaskedValue(node);
435 matchLine += SPACE + hmm.getConsensusStructure(node);
439 file.append(matchLine + NEW_LINE);
441 String insertLine = EMPTY;
442 List<String> strInserts;
443 List<Double> doubleInserts;
444 doubleInserts = hmm.getNode(node).getInsertEmissions();
445 strInserts = doubleListToStringList(doubleInserts, 5);
446 insertLine += addData(17, 9, strInserts);
448 file.append(insertLine + NEW_LINE);
450 String transitionLine = EMPTY;
451 List<String> strTransitions;
452 List<Double> doubleTransitions;
453 doubleTransitions = hmm.getNode(node).getStateTransitions();
454 strTransitions = doubleListToStringList(doubleTransitions, 5);
455 transitionLine += addData(17, 9, strTransitions);
457 file.append(transitionLine + NEW_LINE);
461 void appendFileProperties(StringBuilder file)
465 file.append(fileHeader + NEW_LINE);
467 line = String.format("%-5s %1s", "NAME", hmm.getName());
468 file.append((line + NEW_LINE));
470 if (hmm.getAccessionNumber() != null)
472 line = String.format("%-5s %1s", "ACC", hmm.getAccessionNumber());
473 file.append((line + NEW_LINE));
476 if (hmm.getDescription() != null)
478 line = String.format("%-5s %1s", "DESC", hmm.getDescription());
479 file.append((line + NEW_LINE));
481 line = String.format("%-5s %1s", "LENG", hmm.getLength());
482 file.append((line + NEW_LINE));
484 if (hmm.getMaxInstanceLength() != null)
486 line = String.format("%-5s %1s", "MAXL", hmm.getMaxInstanceLength());
487 file.append((line + NEW_LINE));
489 line = String.format("%-5s %1s", "ALPH", hmm.getAlphabetType());
490 file.append((line + NEW_LINE));
492 line = String.format("%-5s %1s", "RF",
493 hmm.getFileProperties().get("RF"));
494 file.append((line + NEW_LINE));
496 line = String.format("%-5s %1s", "MM",
497 hmm.getFileProperties().get("MM"));
498 file.append((line + NEW_LINE));
500 line = String.format("%-5s %1s", "CONS",
501 hmm.getFileProperties().get("CONS"));
502 file.append((line + NEW_LINE));
504 line = String.format("%-5s %1s", "CS",
505 hmm.getFileProperties().get("CS"));
506 file.append((line + NEW_LINE));
508 line = String.format("%-5s %1s", "MAP",
509 hmm.getFileProperties().get("MAP"));
510 file.append((line + NEW_LINE));
512 if (hmm.getDate() != null)
514 line = String.format("%-5s %1s", "DATE", hmm.getDate());
515 file.append((line + NEW_LINE));
517 if (hmm.getNumberOfSequences() != null)
519 line = String.format("%-5s %1s", "NSEQ", hmm.getNumberOfSequences());
520 file.append((line + NEW_LINE));
522 if (hmm.getEffectiveNumberOfSequences() != null)
524 line = String.format("%-5s %1s", "EFFN",
525 hmm.getEffectiveNumberOfSequences());
526 file.append((line + NEW_LINE));
528 if (hmm.getCheckSum() != null)
530 line = String.format("%-5s %1s", "CKSUM", hmm.getCheckSum());
531 file.append((line + NEW_LINE));
533 if (hmm.getGatheringThreshold() != null)
535 line = String.format("%-5s %1s", "GA", hmm.getGatheringThreshold());
536 file.append((line + NEW_LINE));
539 if (hmm.getTrustedCutoff() != null)
541 line = String.format("%-5s %1s", "TC", hmm.getTrustedCutoff());
542 file.append((line + NEW_LINE));
544 if (hmm.getNoiseCutoff() != null)
546 line = String.format("%-5s %1s", "NC", hmm.getNoiseCutoff());
547 file.append((line + NEW_LINE));
549 if (hmm.getMSV() != null)
551 line = String.format("%-19s %18s", "STATS LOCAL MSV", hmm.getMSV());
552 file.append((line + NEW_LINE));
554 line = String.format("%-19s %18s", "STATS LOCAL VITERBI",
556 file.append((line + NEW_LINE));
558 line = String.format("%-19s %18s", "STATS LOCAL FORWARD",
560 file.append((line + NEW_LINE));
566 public static char charValue(String string)
569 character = string.charAt(0);