--- /dev/null
+package jalview.io;
+
+import jalview.datamodel.EValueStatistic;
+import jalview.datamodel.HiddenMarkovModel;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Scanner;
+
+/**
+ * reads in and writes out a HMMER standard file
+ *
+ *
+ * @author TZVanaalten
+ *
+ */
+public class HMMFile extends FileParse
+{
+ // HMM to store file data
+ HiddenMarkovModel hmm = new HiddenMarkovModel();
+
+ // Source of file
+ String dataObject;
+
+ // number of symbols
+ int numberOfSymbols;
+
+ // number of possible transitions
+ final int NUMBER_OF_TRANSITIONS = 7;
+
+ // file header
+ String fileHeader;
+
+ /**
+ * Constructor which contains model to be filled or exported
+ *
+ * @param dataSource
+ * Filename, URL or Pasted String to read from
+ */
+ public HMMFile(String dataSource)
+ {
+ dataObject = dataSource;
+ }
+
+ /**
+ * reads data from HMM file
+ *
+ * @throws IOException
+ */
+ public void parse() throws IOException
+ {
+ File file = new File(dataObject);
+ FileReader fr = new FileReader(file);
+ BufferedReader br = new BufferedReader(fr);
+ parseFileProperties(br);
+ parseModel(br);
+
+ }
+
+ /**
+ * imports file properties from hmm file
+ *
+ * @param input
+ * buffered reader used to read in file
+ * @throws IOException
+ */
+ public void parseFileProperties(BufferedReader input) throws IOException
+ {
+ boolean readingFile = true;
+ fileHeader = input.readLine();
+ String line = input.readLine();
+ while (readingFile)
+ {
+ if (line != null)
+ {
+ Scanner parser = new Scanner(line);
+ String next = parser.next();
+ if ("HMM".equals(next)) // indicates start of HMM data (end of file
+ // properties)
+ {
+ readingFile = false;
+ hmm.fillSymbols(line);
+ numberOfSymbols = hmm.getSymbols().size();
+ }
+ else if ("STATS".equals(next)) // reads e-value stats into separate
+ // field
+ // on HMM object
+ {
+ readStats(parser);
+ }
+ else if ("GA".equals(next) || "TC".equals(next)
+ || "NC".equals(next)) // reads
+ // pfam
+ // data
+ // into
+ // separate
+ // field
+ // on
+ // HMM
+ // object
+ {
+ Double[] data = new Double[2];
+ data[0] = parser.nextDouble();
+ data[1] = parser.nextDouble();
+ hmm.setPFAMData(next, data);
+ }
+ else
+ {
+ String key = next;
+ String value = parser.next();
+ while (parser.hasNext())
+ {
+ value = value + " " + parser.next();
+ }
+ hmm.put(key, value);
+ }
+ parser.close();
+ }
+ line = input.readLine();
+ if (line == null)
+ {
+ readingFile = false;
+ }
+ }
+
+ }
+
+ /**
+ * creates a new EValueStatistic object to store stats
+ *
+ * @param parser
+ * Scanner which contains data for STATS line
+ *
+ */
+ public void readStats(Scanner parser)
+ {
+ if (parser.hasNext())
+ {
+ String name;
+ double slope;
+ double location;
+ String configuration;
+
+ configuration = parser.next();
+ name = parser.next();
+ slope = parser.nextDouble();
+ location = parser.nextDouble();
+ hmm.addStatistic(name,
+ new EValueStatistic(configuration, slope, location));
+ }
+ }
+
+ /**
+ * parses the model data from the hmm file
+ *
+ * @param input
+ * buffered reader used to read file
+ * @throws IOException
+ */
+ public void parseModel(BufferedReader input) throws IOException
+ {
+
+ String line = input.readLine();
+ Scanner scanner = new Scanner(line);
+ String next = scanner.next();
+ if ("COMPO".equals(next)) // checks to and stores COMPO data if present
+ {
+ for (int i = 0; i < numberOfSymbols; i++)
+
+ {
+ hmm.getAverageMatchStateEmissionProbabilities()
+ .add(scanner.nextDouble());
+ }
+ }
+ scanner.close();
+ parseBeginNodeData(input);
+ for (int i = 0; i < hmm.getLength(); i++)
+ {
+ Scanner matchReader = new Scanner(input.readLine());
+ matchReader.nextInt(); // skips number indicating position in HMM
+ hmm.getMatchEmissions()
+ .add(fillList(matchReader, numberOfSymbols));
+ parseAnnotations(matchReader, i);
+ matchReader.close();
+ Scanner insertReader = new Scanner(input.readLine());
+ hmm.getInsertEmissions().add(fillList(insertReader, numberOfSymbols));
+ insertReader.close();
+ Scanner transitionReader = new Scanner(input.readLine());
+ hmm.getStateTransitions()
+ .add(fillList(transitionReader, NUMBER_OF_TRANSITIONS));
+ transitionReader.close();
+ }
+
+ }
+
+ /**
+ * parses the begin state transitions and insert 0 emissions
+ *
+ * @param input
+ * buffered reader used to read model
+ * @param currentline
+ * string contain all data on current line of buffered reader
+ * @throws IOException
+ */
+
+ public void parseBeginNodeData(BufferedReader input)
+ throws IOException
+ {
+ Scanner scanner = new Scanner(input.readLine());
+ hmm.setInsertZeroEmissions(fillList(scanner, hmm.getSymbols().size()));
+ scanner.close();
+ Scanner scannerTransitions = new Scanner(input.readLine());
+ hmm.setBeginStateTransitions(
+ fillList(scannerTransitions, NUMBER_OF_TRANSITIONS));
+ scannerTransitions.close();
+ }
+
+ /**
+ * parses annotations on match emission line
+ *
+ * @param scanner
+ * scanner which is processing match emission line
+ * @param index
+ * index of node which is beign scanned
+ */
+ public void parseAnnotations(Scanner scanner, int index)
+ {
+ if (hmm.getMapAnnotationFlag())
+ {
+ hmm.getAlignmentColumnIndexes().add(scanner.nextInt());
+ }
+ else
+ {
+ scanner.next();
+ }
+ hmm.getAnnotations().add(new HashMap<String, Character>());
+ hmm.getAnnotations().get(index).put("CONS", scanner.next().charAt(0));
+ hmm.getAnnotations().get(index).put("RF", scanner.next().charAt(0));
+ hmm.getAnnotations().get(index).put("MM", scanner.next().charAt(0));
+ hmm.getAnnotations().get(index).put("CS", scanner.next().charAt(0));
+ }
+ /**
+ *
+ * @param transition
+ * type of transition occuring
+ * @return index value representing position along stateTransition array.
+ */
+ public Integer getTransitionType(String transition)
+ {
+ Integer index;
+ switch (transition)
+ {
+ case "mm":
+ index = 0;
+ break;
+ case "mi":
+ index = 1;
+ break;
+ case "md":
+ index = 2;
+ break;
+ case "im":
+ index = 3;
+ break;
+ case "ii":
+ index = 4;
+ break;
+ case "dm":
+ index = 5;
+ break;
+ case "dd":
+ index = 6;
+ break;
+ default:
+ index = null;
+ }
+ return index;
+ }
+
+ /**
+ *
+ * @param input
+ * scanner for line containing data to be transferred to list
+ * @param numberOfElements
+ * number of elements in the list to be filled
+ * @return filled list
+ */
+ public static List<Double> fillList(Scanner input,
+ int numberOfElements)
+ {
+ List<Double> list = new ArrayList<>();
+ String next;
+ for (int i = 0; i < numberOfElements; i++)
+ {
+ next = input.next();
+ if (next.contains("*")) // state transitions to or from delete states
+ // occasionally have values of -infinity. These
+ // values are represented by an * in the .hmm
+ // file, and by a null value in the
+ // HiddenMarkovModel class
+ {
+ list.add(null);
+ }
+ else
+ {
+ list.add(Double.valueOf(next));
+ }
+ }
+ return list;
+ }
+
+ /**
+ * writes a HiddenMarkovModel to a file. Needs mode work to make file more
+ * readable for humans (align columns)
+ *
+ * @param exportLocation
+ * Filename, URL or Pasted String to write to
+ * @throws FileNotFoundException
+ * @throws UnsupportedEncodingException
+ */
+ public void exportFile(String exportLocation)
+ throws FileNotFoundException, UnsupportedEncodingException
+ {
+ PrintWriter writer = new PrintWriter(exportLocation, "UTF-8");
+ writer.println(fileHeader);
+ for (Map.Entry<String, String> entry : hmm.getFileProperties()
+ .entrySet())
+ {
+ writer.println(entry.getKey() + " " + entry.getValue());
+ }
+ writer.println(
+ "HMM" + " " + convertCharListToString(hmm.getSymbols()));
+ writer.println("m->m m->i m->d i->m i->i d->m d->d");
+ if (false == hmm.getAverageMatchStateEmissionProbabilities().isEmpty())
+ {
+ writer.println("COMPO" + " " + convertDoubleListToString(
+ hmm.getAverageMatchStateEmissionProbabilities()));
+ }
+ writer.println(convertDoubleListToString(hmm.getInsertZeroEmissions()));
+ writer.println(
+ convertDoubleListToString(hmm.getBeginStateTransitions()));
+
+ for (Integer i = 0; i < hmm.getLength(); i++)
+ {
+ String matchEmissionLine = i.toString() + " "; // adds node index
+ matchEmissionLine += convertDoubleListToString(
+ hmm.getMatchEmissions().get(i)); // adds match emissions
+ matchEmissionLine += " "
+ + hmm.getAlignmentColumnIndexes().get(i).toString(); // adds MAP
+ // annotation
+ matchEmissionLine += " "
+ + hmm.getAnnotations().get(i).get("CONS").toString(); // adds CONS
+ // annotation
+ matchEmissionLine += " "
+ + hmm.getAnnotations().get(i).get("RF").toString(); // adds RF
+ // annotation
+ matchEmissionLine += " "
+ + hmm.getAnnotations().get(i).get("MM").toString(); // adds MM
+ // annotation
+ matchEmissionLine += " "
+ + hmm.getAnnotations().get(i).get("CS").toString(); // adds CS
+ // annotation
+ writer.println(matchEmissionLine);
+
+ writer.println(
+ convertDoubleListToString(hmm.getInsertEmissions().get(i)));
+ writer.println(
+ convertDoubleListToString(hmm.getStateTransitions().get(i)));
+ }
+ writer.println("//");
+
+ writer.close();
+ }
+
+ /**
+ * converts an list of characters to a string with items separated by spaces
+ *
+ * @param list
+ * character list to be converted
+ * @return string value of char list
+ */
+ public String convertCharListToString(List<Character> list)
+ {
+ String string = "";
+ for (Character item : list)
+ {
+ string = string + item.toString() + " ";
+ }
+
+ return string;
+ }
+
+ /**
+ * converts an list of doubles to a string with items separated by spaces
+ *
+ * @param list
+ * double list to be converted
+ * @return string value of double list
+ */
+ public String convertDoubleListToString(List<Double> list)
+ {
+ String string = "";
+ for (Double item : list)
+ {
+ if (item != null)
+ {
+ string = string + item.toString() + " ";
+ }
+ else
+ {
+ string = string + "*" + " ";
+ }
+
+ }
+
+ return string;
+ }
+}
+