3 import jalview.datamodel.EValueStatistic;
4 import jalview.datamodel.HiddenMarkovModel;
6 import java.io.BufferedReader;
8 import java.io.FileNotFoundException;
9 import java.io.FileReader;
10 import java.io.IOException;
11 import java.io.PrintWriter;
12 import java.io.UnsupportedEncodingException;
13 import java.util.ArrayList;
14 import java.util.HashMap;
15 import java.util.List;
17 import java.util.Scanner;
20 * reads in and writes out a HMMER standard file
26 public class HMMFile extends FileParse
28 // HMM to store file data
29 HiddenMarkovModel hmm = new HiddenMarkovModel();
37 // number of possible transitions
38 final int NUMBER_OF_TRANSITIONS = 7;
44 * Constructor which contains model to be filled or exported
47 * Filename, URL or Pasted String to read from
49 public HMMFile(String dataSource)
51 dataObject = dataSource;
55 * reads data from HMM file
59 public void parse() throws IOException
61 File file = new File(dataObject);
62 FileReader fr = new FileReader(file);
63 BufferedReader br = new BufferedReader(fr);
64 parseFileProperties(br);
70 * imports file properties from hmm file
73 * buffered reader used to read in file
76 public void parseFileProperties(BufferedReader input) throws IOException
78 boolean readingFile = true;
79 fileHeader = input.readLine();
80 String line = input.readLine();
85 Scanner parser = new Scanner(line);
86 String next = parser.next();
87 if ("HMM".equals(next)) // indicates start of HMM data (end of file
91 hmm.fillSymbols(line);
92 numberOfSymbols = hmm.getSymbols().size();
94 else if ("STATS".equals(next)) // reads e-value stats into separate
100 else if ("GA".equals(next) || "TC".equals(next)
101 || "NC".equals(next)) // reads
111 Double[] data = new Double[2];
112 data[0] = parser.nextDouble();
113 data[1] = parser.nextDouble();
114 hmm.setPFAMData(next, data);
119 String value = parser.next();
120 while (parser.hasNext())
122 value = value + " " + parser.next();
128 line = input.readLine();
138 * creates a new EValueStatistic object to store stats
141 * Scanner which contains data for STATS line
144 public void readStats(Scanner parser)
146 if (parser.hasNext())
151 String configuration;
153 configuration = parser.next();
154 name = parser.next();
155 slope = parser.nextDouble();
156 location = parser.nextDouble();
157 hmm.addStatistic(name,
158 new EValueStatistic(configuration, slope, location));
163 * parses the model data from the hmm file
166 * buffered reader used to read file
167 * @throws IOException
169 public void parseModel(BufferedReader input) throws IOException
172 String line = input.readLine();
173 Scanner scanner = new Scanner(line);
174 String next = scanner.next();
175 if ("COMPO".equals(next)) // checks to and stores COMPO data if present
177 for (int i = 0; i < numberOfSymbols; i++)
180 hmm.getAverageMatchStateEmissionProbabilities()
181 .add(scanner.nextDouble());
185 parseBeginNodeData(input);
186 for (int i = 0; i < hmm.getLength(); i++)
188 Scanner matchReader = new Scanner(input.readLine());
189 matchReader.nextInt(); // skips number indicating position in HMM
190 hmm.getMatchEmissions()
191 .add(fillList(matchReader, numberOfSymbols));
192 parseAnnotations(matchReader, i);
194 Scanner insertReader = new Scanner(input.readLine());
195 hmm.getInsertEmissions().add(fillList(insertReader, numberOfSymbols));
196 insertReader.close();
197 Scanner transitionReader = new Scanner(input.readLine());
198 hmm.getStateTransitions()
199 .add(fillList(transitionReader, NUMBER_OF_TRANSITIONS));
200 transitionReader.close();
206 * parses the begin state transitions and insert 0 emissions
209 * buffered reader used to read model
211 * string contain all data on current line of buffered reader
212 * @throws IOException
215 public void parseBeginNodeData(BufferedReader input)
218 Scanner scanner = new Scanner(input.readLine());
219 hmm.setInsertZeroEmissions(fillList(scanner, hmm.getSymbols().size()));
221 Scanner scannerTransitions = new Scanner(input.readLine());
222 hmm.setBeginStateTransitions(
223 fillList(scannerTransitions, NUMBER_OF_TRANSITIONS));
224 scannerTransitions.close();
228 * parses annotations on match emission line
231 * scanner which is processing match emission line
233 * index of node which is beign scanned
235 public void parseAnnotations(Scanner scanner, int index)
237 if (hmm.getMapAnnotationFlag())
239 hmm.getAlignmentColumnIndexes().add(scanner.nextInt());
245 hmm.getAnnotations().add(new HashMap<String, Character>());
246 hmm.getAnnotations().get(index).put("CONS", scanner.next().charAt(0));
247 hmm.getAnnotations().get(index).put("RF", scanner.next().charAt(0));
248 hmm.getAnnotations().get(index).put("MM", scanner.next().charAt(0));
249 hmm.getAnnotations().get(index).put("CS", scanner.next().charAt(0));
254 * type of transition occuring
255 * @return index value representing position along stateTransition array.
257 public Integer getTransitionType(String transition)
292 * scanner for line containing data to be transferred to list
293 * @param numberOfElements
294 * number of elements in the list to be filled
295 * @return filled list
297 public static List<Double> fillList(Scanner input,
298 int numberOfElements)
300 List<Double> list = new ArrayList<>();
302 for (int i = 0; i < numberOfElements; i++)
305 if (next.contains("*")) // state transitions to or from delete states
306 // occasionally have values of -infinity. These
307 // values are represented by an * in the .hmm
308 // file, and by a null value in the
309 // HiddenMarkovModel class
315 list.add(Double.valueOf(next));
322 * writes a HiddenMarkovModel to a file. Needs mode work to make file more
323 * readable for humans (align columns)
325 * @param exportLocation
326 * Filename, URL or Pasted String to write to
327 * @throws FileNotFoundException
328 * @throws UnsupportedEncodingException
330 public void exportFile(String exportLocation)
331 throws FileNotFoundException, UnsupportedEncodingException
333 PrintWriter writer = new PrintWriter(exportLocation, "UTF-8");
334 writer.println(fileHeader);
335 for (Map.Entry<String, String> entry : hmm.getFileProperties()
338 writer.println(entry.getKey() + " " + entry.getValue());
341 "HMM" + " " + convertCharListToString(hmm.getSymbols()));
342 writer.println("m->m m->i m->d i->m i->i d->m d->d");
343 if (false == hmm.getAverageMatchStateEmissionProbabilities().isEmpty())
345 writer.println("COMPO" + " " + convertDoubleListToString(
346 hmm.getAverageMatchStateEmissionProbabilities()));
348 writer.println(convertDoubleListToString(hmm.getInsertZeroEmissions()));
350 convertDoubleListToString(hmm.getBeginStateTransitions()));
352 for (Integer i = 0; i < hmm.getLength(); i++)
354 String matchEmissionLine = i.toString() + " "; // adds node index
355 matchEmissionLine += convertDoubleListToString(
356 hmm.getMatchEmissions().get(i)); // adds match emissions
357 matchEmissionLine += " "
358 + hmm.getAlignmentColumnIndexes().get(i).toString(); // adds MAP
360 matchEmissionLine += " "
361 + hmm.getAnnotations().get(i).get("CONS").toString(); // adds CONS
363 matchEmissionLine += " "
364 + hmm.getAnnotations().get(i).get("RF").toString(); // adds RF
366 matchEmissionLine += " "
367 + hmm.getAnnotations().get(i).get("MM").toString(); // adds MM
369 matchEmissionLine += " "
370 + hmm.getAnnotations().get(i).get("CS").toString(); // adds CS
372 writer.println(matchEmissionLine);
375 convertDoubleListToString(hmm.getInsertEmissions().get(i)));
377 convertDoubleListToString(hmm.getStateTransitions().get(i)));
379 writer.println("//");
385 * converts an list of characters to a string with items separated by spaces
388 * character list to be converted
389 * @return string value of char list
391 public String convertCharListToString(List<Character> list)
394 for (Character item : list)
396 string = string + item.toString() + " ";
403 * converts an list of doubles to a string with items separated by spaces
406 * double list to be converted
407 * @return string value of double list
409 public String convertDoubleListToString(List<Double> list)
412 for (Double item : list)
416 string = string + item.toString() + " ";
420 string = string + "*" + " ";