package jalview.io;
-import jalview.datamodel.EValueStatistic;
+import jalview.datamodel.HMMNode;
import jalview.datamodel.HiddenMarkovModel;
+import jalview.datamodel.SequenceI;
import java.io.BufferedReader;
-import java.io.File;
import java.io.FileNotFoundException;
-import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import java.util.Scanner;
+
/**
* reads in and writes out a HMMER standard file
*
* @author TZVanaalten
*
*/
-public class HMMFile extends FileParse
+public class HMMFile extends AlignFile
+ implements AlignmentFileReaderI, AlignmentFileWriterI
{
// HMM to store file data
- HiddenMarkovModel hmm = new HiddenMarkovModel();
+ private HiddenMarkovModel hmm = new HiddenMarkovModel();
+
- // Source of file
- String dataObject;
- // number of symbols
- int numberOfSymbols;
// number of possible transitions
- final int NUMBER_OF_TRANSITIONS = 7;
+ private final int NUMBER_OF_TRANSITIONS = 7;
+
+ private final String NEW_LINE = "\n";
+
// file header
String fileHeader;
- /**
- * Constructor which contains model to be filled or exported
- *
- * @param dataSource
- * Filename, URL or Pasted String to read from
- */
- public HMMFile(String dataSource)
+ int numberOfSymbols;
+
+ private final String SPACE = " ";
+
+ private final String COMPO = "COMPO";
+
+ private final String EMPTY = "";
+
+ public HMMFile(FileParse source) throws IOException
+ {
+ super(false, source);
+ }
+
+ public HMMFile()
+ {
+
+ }
+
+ public HiddenMarkovModel getHMM()
+ {
+ return hmm;
+ }
+
+ public void setHMM(HiddenMarkovModel model)
{
- dataObject = dataSource;
+ this.hmm = model;
+ }
+
+ public String getName()
+ {
+ return hmm.getName();
}
/**
*
* @throws IOException
*/
+ @Override
public void parse() throws IOException
{
- File file = new File(dataObject);
- FileReader fr = new FileReader(file);
- BufferedReader br = new BufferedReader(fr);
- parseFileProperties(br);
- parseModel(br);
-
+ parseFileProperties(dataIn);
+ parseModel(dataIn);
}
+
+
/**
* imports file properties from hmm file
*
* buffered reader used to read in file
* @throws IOException
*/
- public void parseFileProperties(BufferedReader input) throws IOException
+ void parseFileProperties(BufferedReader input) throws IOException
{
boolean readingFile = true;
fileHeader = input.readLine();
// properties)
{
readingFile = false;
- hmm.fillSymbols(line);
- numberOfSymbols = hmm.getSymbols().size();
+ hmm.fillSymbols(parser);
+ numberOfSymbols = hmm.getNumberOfSymbols();
}
- else if ("STATS".equals(next)) // reads e-value stats into separate
- // field
- // on HMM object
+ else if ("STATS".equals(next))
{
- readStats(parser);
- }
- else if ("GA".equals(next) || "TC".equals(next)
- || "NC".equals(next)) // reads
- // pfam
- // data
- // into
- // separate
- // field
- // on
- // HMM
- // object
- {
- Double[] data = new Double[2];
- data[0] = parser.nextDouble();
- data[1] = parser.nextDouble();
- hmm.setPFAMData(next, data);
+ parser.next();
+ String key;
+ String value;
+ key = parser.next();
+ value = parser.next() + SPACE + SPACE + parser.next();
+ hmm.addFileProperty(key, value);
}
else
{
String value = parser.next();
while (parser.hasNext())
{
- value = value + " " + parser.next();
+ value = value + SPACE + parser.next();
}
- hmm.put(key, value);
+ hmm.addFileProperty(key, value);
}
parser.close();
}
}
/**
- * creates a new EValueStatistic object to store stats
- *
- * @param parser
- * Scanner which contains data for STATS line
- *
- */
- public void readStats(Scanner parser)
- {
- if (parser.hasNext())
- {
- String name;
- double slope;
- double location;
- String configuration;
-
- configuration = parser.next();
- name = parser.next();
- slope = parser.nextDouble();
- location = parser.nextDouble();
- hmm.addStatistic(name,
- new EValueStatistic(configuration, slope, location));
- }
- }
-
- /**
* parses the model data from the hmm file
*
* @param input
* buffered reader used to read file
* @throws IOException
*/
- public void parseModel(BufferedReader input) throws IOException
+ void parseModel(BufferedReader input) throws IOException
{
-
- String line = input.readLine();
- Scanner scanner = new Scanner(line);
- String next = scanner.next();
- if ("COMPO".equals(next)) // checks to and stores COMPO data if present
+ for (int i = 0; i < hmm.getLength() + 1; i++)
{
- for (int i = 0; i < numberOfSymbols; i++)
-
+ hmm.getNodes().add(new HMMNode());
+ String next;
+ String line;
+ line = input.readLine();
+ Scanner matchReader = new Scanner(line);
+ next = matchReader.next();
+ if (next.equals(COMPO) || i > 0)
{
- hmm.getAverageMatchStateEmissionProbabilities()
- .add(scanner.nextDouble());
+ // stores match emission line in list
+ List<Double> matches = new ArrayList<>();
+ matches = fillList(matchReader, numberOfSymbols);
+ hmm.getNodes().get(i).setMatchEmissions(matches);
+ if (i > 0)
+ {
+ parseAnnotations(matchReader, i);
+ }
}
- }
- scanner.close();
- parseBeginNodeData(input);
- for (int i = 0; i < hmm.getLength(); i++)
- {
- Scanner matchReader = new Scanner(input.readLine());
- matchReader.nextInt(); // skips number indicating position in HMM
- hmm.getMatchEmissions()
- .add(fillList(matchReader, numberOfSymbols));
- parseAnnotations(matchReader, i);
matchReader.close();
- Scanner insertReader = new Scanner(input.readLine());
- hmm.getInsertEmissions().add(fillList(insertReader, numberOfSymbols));
+ // stores insert emission line in list
+ line = input.readLine();
+ Scanner insertReader = new Scanner(line);
+ List<Double> inserts = new ArrayList<>();
+ inserts = fillList(insertReader, numberOfSymbols);
+ hmm.getNodes().get(i).setInsertEmissions(inserts);
insertReader.close();
- Scanner transitionReader = new Scanner(input.readLine());
- hmm.getStateTransitions()
- .add(fillList(transitionReader, NUMBER_OF_TRANSITIONS));
+
+ // stores state transition line in list
+ line = input.readLine();
+ Scanner transitionReader = new Scanner(line);
+ List<Double> transitions = new ArrayList<>();
+ transitions = fillList(transitionReader, NUMBER_OF_TRANSITIONS);
+ hmm.getNodes().get(i).setStateTransitions(transitions);
transitionReader.close();
}
}
/**
- * parses the begin state transitions and insert 0 emissions
- *
- * @param input
- * buffered reader used to read model
- * @param currentline
- * string contain all data on current line of buffered reader
- * @throws IOException
- */
-
- public void parseBeginNodeData(BufferedReader input)
- throws IOException
- {
- Scanner scanner = new Scanner(input.readLine());
- hmm.setInsertZeroEmissions(fillList(scanner, hmm.getSymbols().size()));
- scanner.close();
- Scanner scannerTransitions = new Scanner(input.readLine());
- hmm.setBeginStateTransitions(
- fillList(scannerTransitions, NUMBER_OF_TRANSITIONS));
- scannerTransitions.close();
- }
-
- /**
* parses annotations on match emission line
*
* @param scanner
* @param index
* index of node which is beign scanned
*/
- public void parseAnnotations(Scanner scanner, int index)
+ void parseAnnotations(Scanner scanner, int index)
{
- if (hmm.getMapAnnotationFlag())
+ if (hmm.mapIsActive())
{
- hmm.getAlignmentColumnIndexes().add(scanner.nextInt());
+ int column;
+ column = scanner.nextInt();
+ hmm.getNodes().get(index).setAlignmentColumn(column);
+ hmm.getNodeLookup().put(column, index);
}
else
{
scanner.next();
}
- hmm.getAnnotations().add(new HashMap<String, Character>());
- hmm.getAnnotations().get(index).put("CONS", scanner.next().charAt(0));
- hmm.getAnnotations().get(index).put("RF", scanner.next().charAt(0));
- hmm.getAnnotations().get(index).put("MM", scanner.next().charAt(0));
- hmm.getAnnotations().get(index).put("CS", scanner.next().charAt(0));
- }
- /**
- *
- * @param transition
- * type of transition occuring
- * @return index value representing position along stateTransition array.
- */
- public Integer getTransitionType(String transition)
- {
- Integer index;
- switch (transition)
- {
- case "mm":
- index = 0;
- break;
- case "mi":
- index = 1;
- break;
- case "md":
- index = 2;
- break;
- case "im":
- index = 3;
- break;
- case "ii":
- index = 4;
- break;
- case "dm":
- index = 5;
- break;
- case "dd":
- index = 6;
- break;
- default:
- index = null;
- }
- return index;
+
+ char consensusR;
+ consensusR = charValue(scanner.next());
+ hmm.getNodes().get(index).setConsensusResidue(consensusR);
+
+ char reference;
+ reference = charValue(scanner.next());
+ hmm.getNodes().get(index).setReferenceAnnotation(reference);
+
+
+ char value;
+ value = charValue(scanner.next());
+ hmm.getNodes().get(index).setMaskValue(value);
+
+ char consensusS;
+ consensusS = charValue(scanner.next());
+ hmm.getNodes().get(index).setConsensusStructure(consensusS);
}
+
/**
*
* @param input
* number of elements in the list to be filled
* @return filled list
*/
- public static List<Double> fillList(Scanner input,
+ static List<Double> fillList(Scanner input,
int numberOfElements)
{
List<Double> list = new ArrayList<>();
- String next;
for (int i = 0; i < numberOfElements; i++)
{
- next = input.next();
+
+ String next = input.next();
if (next.contains("*")) // state transitions to or from delete states
// occasionally have values of -infinity. These
// values are represented by an * in the .hmm
// file, and by a null value in the
// HiddenMarkovModel class
{
- list.add(null);
+ list.add(Double.NEGATIVE_INFINITY);
}
else
{
- list.add(Double.valueOf(next));
+ double prob = Double.valueOf(next);
+ prob = Math.pow(Math.E, -prob);
+ list.add(prob);
}
}
return list;
}
+
/**
- * writes a HiddenMarkovModel to a file. Needs mode work to make file more
- * readable for humans (align columns)
+ * writes a HiddenMarkovModel to a file
*
* @param exportLocation
* Filename, URL or Pasted String to write to
* @throws FileNotFoundException
* @throws UnsupportedEncodingException
- */
- public void exportFile(String exportLocation)
- throws FileNotFoundException, UnsupportedEncodingException
+ *
+ **/
+
+ public void exportFile(String exportLocation) throws IOException
+ {
+ StringBuilder file = new StringBuilder();
+ appendFileProperties(file);
+ appendModel(file);
+ file.append("//");
+
+ PrintWriter output = new PrintWriter(exportLocation);
+ output.append(file);
+ output.close();
+
+ }
+
+ String addData(int initialColumnSeparation,
+ int columnSeparation, List<String> data)
{
- PrintWriter writer = new PrintWriter(exportLocation, "UTF-8");
- writer.println(fileHeader);
- for (Map.Entry<String, String> entry : hmm.getFileProperties()
- .entrySet())
+ String line = EMPTY;
+ int index = 0;
+ for (String value : data)
{
- writer.println(entry.getKey() + " " + entry.getValue());
+ if (index == 0)
+ {
+ line += String.format("%" + initialColumnSeparation + "s", value);
+ }
+ else
+ {
+ line += String.format("%" + columnSeparation + "s", value);
+ }
+ index++;
}
- writer.println(
- "HMM" + " " + convertCharListToString(hmm.getSymbols()));
- writer.println("m->m m->i m->d i->m i->i d->m d->d");
- if (false == hmm.getAverageMatchStateEmissionProbabilities().isEmpty())
+ return line;
+ }
+
+ List<String> charListToStringList(List<Character> list)
+ {
+ List<String> strList = new ArrayList<>();
+ for (char value : list)
{
- writer.println("COMPO" + " " + convertDoubleListToString(
- hmm.getAverageMatchStateEmissionProbabilities()));
+ String strValue = Character.toString(value);
+ strList.add(strValue);
}
- writer.println(convertDoubleListToString(hmm.getInsertZeroEmissions()));
- writer.println(
- convertDoubleListToString(hmm.getBeginStateTransitions()));
+ return strList;
+ }
- for (Integer i = 0; i < hmm.getLength(); i++)
+ List<String> doubleListToStringList(List<Double> list,
+ int noOfDecimals)
+ {
+ List<String> strList = new ArrayList<>();
+ for (double value : list)
{
- String matchEmissionLine = i.toString() + " "; // adds node index
- matchEmissionLine += convertDoubleListToString(
- hmm.getMatchEmissions().get(i)); // adds match emissions
- matchEmissionLine += " "
- + hmm.getAlignmentColumnIndexes().get(i).toString(); // adds MAP
- // annotation
- matchEmissionLine += " "
- + hmm.getAnnotations().get(i).get("CONS").toString(); // adds CONS
- // annotation
- matchEmissionLine += " "
- + hmm.getAnnotations().get(i).get("RF").toString(); // adds RF
- // annotation
- matchEmissionLine += " "
- + hmm.getAnnotations().get(i).get("MM").toString(); // adds MM
- // annotation
- matchEmissionLine += " "
- + hmm.getAnnotations().get(i).get("CS").toString(); // adds CS
- // annotation
- writer.println(matchEmissionLine);
-
- writer.println(
- convertDoubleListToString(hmm.getInsertEmissions().get(i)));
- writer.println(
- convertDoubleListToString(hmm.getStateTransitions().get(i)));
- }
- writer.println("//");
+ String strValue;
+ if (value > 0)
+ {
+ strValue = String.format("%.5f", value);
+
+ }
+ else if (value == -0.00000d)
+ {
+ strValue = "0.00000";
+ }
+ else
+ {
+ strValue = "*";
+ }
- writer.close();
+ strList.add(strValue);
+ }
+ return strList;
}
- /**
- * converts an list of characters to a string with items separated by spaces
- *
- * @param list
- * character list to be converted
- * @return string value of char list
- */
- public String convertCharListToString(List<Character> list)
+ List<String> stringArrayToStringList(String[] array)
{
- String string = "";
- for (Character item : list)
+ List<String> list = new ArrayList<>();
+ for (String value : array)
{
- string = string + item.toString() + " ";
+ list.add(value);
}
- return string;
+ return list;
}
-
- /**
- * converts an list of doubles to a string with items separated by spaces
- *
- * @param list
- * double list to be converted
- * @return string value of double list
- */
- public String convertDoubleListToString(List<Double> list)
+
+ void appendModel(StringBuilder file)
{
- String string = "";
- for (Double item : list)
+ String symbolLine = "HMM";
+ List<Character> charSymbols = hmm.getSymbols();
+ List<String> strSymbols;
+ strSymbols = charListToStringList(charSymbols);
+ symbolLine += addData(11, 9, strSymbols);
+ file.append(symbolLine + NEW_LINE);
+
+ String transitionTypeLine = "";
+ List<String> transitionTypes;
+ transitionTypes = stringArrayToStringList(hmm.getTransitionTypes());
+ transitionTypeLine += addData(16, 9, transitionTypes);
+ file.append(transitionTypeLine + NEW_LINE);
+
+ int length = hmm.getLength();
+
+ for (int node = 0; node <= length; node++)
{
- if (item != null)
+ String matchLine;
+ if (node == 0)
{
- string = string + item.toString() + " ";
+ matchLine = String.format("%7s", "COMPO");
}
else
{
- string = string + "*" + " ";
+ matchLine = String.format("%7s", node);
}
+ List<String> strMatches;
+ List<Double> doubleMatches;
+ doubleMatches = hmm.getNode(node).getMatchEmissions();
+ convertListToLogSpace(doubleMatches);
+ strMatches = doubleListToStringList(doubleMatches, 5);
+ matchLine += addData(10, 9, strMatches);
+
+
+ if (node != 0)
+ {
+ matchLine += SPACE + hmm.getNodeAlignmentColumn(node);
+ matchLine += SPACE + hmm.getConsensusResidue(node);
+ matchLine += SPACE + hmm.getReferenceAnnotation(node);
+ matchLine += SPACE + hmm.getMaskedValue(node);
+ matchLine += SPACE + hmm.getConsensusStructure(node);
+
+ }
+
+ file.append(matchLine + NEW_LINE);
+
+ String insertLine = EMPTY;
+ List<String> strInserts;
+ List<Double> doubleInserts;
+ doubleInserts = hmm.getNode(node).getInsertEmissions();
+ convertListToLogSpace(doubleInserts);
+ strInserts = doubleListToStringList(doubleInserts, 5);
+ insertLine += addData(17, 9, strInserts);
+
+ file.append(insertLine + NEW_LINE);
+
+ String transitionLine = EMPTY;
+ List<String> strTransitions;
+ List<Double> doubleTransitions;
+ doubleTransitions = hmm.getNode(node).getStateTransitions();
+ convertListToLogSpace(doubleTransitions);
+ strTransitions = doubleListToStringList(doubleTransitions, 5);
+ transitionLine += addData(17, 9, strTransitions);
+
+ file.append(transitionLine + NEW_LINE);
}
+ }
+
+ void appendFileProperties(StringBuilder file)
+ {
+ String line;
+
+ file.append(fileHeader + NEW_LINE);
+
+ line = String.format("%-5s %1s", "NAME", hmm.getName());
+ file.append((line + NEW_LINE));
+
+ if (hmm.getAccessionNumber() != null)
+ {
+ line = String.format("%-5s %1s", "ACC", hmm.getAccessionNumber());
+ file.append((line + NEW_LINE));
+ }
+
+ if (hmm.getDescription() != null)
+ {
+ line = String.format("%-5s %1s", "DESC", hmm.getDescription());
+ file.append((line + NEW_LINE));
+ }
+ line = String.format("%-5s %1s", "LENG", hmm.getLength());
+ file.append((line + NEW_LINE));
+
+ if (hmm.getMaxInstanceLength() != null)
+ {
+ line = String.format("%-5s %1s", "MAXL", hmm.getMaxInstanceLength());
+ file.append((line + NEW_LINE));
+ }
+ line = String.format("%-5s %1s", "ALPH", hmm.getAlphabetType());
+ file.append((line + NEW_LINE));
+
+ boolean status;
+ String statusStr;
+
+ status = hmm.referenceAnnotationIsActive();
+ statusStr = HiddenMarkovModel.findStringFromBoolean(status);
+ line = String.format("%-5s %1s", "RF",
+ statusStr);
+ file.append((line + NEW_LINE));
+
+ status = hmm.maskValueIsActive();
+ statusStr = HiddenMarkovModel.findStringFromBoolean(status);
+ line = String.format("%-5s %1s", "MM",
+ statusStr);
+ file.append((line + NEW_LINE));
+
+ status = hmm.consensusResidueIsActive();
+ statusStr = HiddenMarkovModel.findStringFromBoolean(status);
+ line = String.format("%-5s %1s", "CONS",
+ statusStr);
+ file.append((line + NEW_LINE));
+
+ status = hmm.consensusStructureIsActive();
+ statusStr = HiddenMarkovModel.findStringFromBoolean(status);
+ line = String.format("%-5s %1s", "CS",
+ statusStr);
+ file.append((line + NEW_LINE));
+
+ status = hmm.mapIsActive();
+ statusStr = HiddenMarkovModel.findStringFromBoolean(status);
+ line = String.format("%-5s %1s", "MAP",
+ statusStr);
+ file.append((line + NEW_LINE));
+
+
+ if (hmm.getDate() != null)
+ {
+ line = String.format("%-5s %1s", "DATE", hmm.getDate());
+ file.append((line + NEW_LINE));
+ }
+ if (hmm.getNumberOfSequences() != null)
+ {
+ line = String.format("%-5s %1s", "NSEQ", hmm.getNumberOfSequences());
+ file.append((line + NEW_LINE));
+ }
+ if (hmm.getEffectiveNumberOfSequences() != null)
+ {
+ line = String.format("%-5s %1s", "EFFN",
+ hmm.getEffectiveNumberOfSequences());
+ file.append((line + NEW_LINE));
+ }
+ if (hmm.getCheckSum() != null)
+ {
+ line = String.format("%-5s %1s", "CKSUM", hmm.getCheckSum());
+ file.append((line + NEW_LINE));
+ }
+ if (hmm.getGatheringThreshold() != null)
+ {
+ line = String.format("%-5s %1s", "GA", hmm.getGatheringThreshold());
+ file.append((line + NEW_LINE));
+ }
+
+ if (hmm.getTrustedCutoff() != null)
+ {
+ line = String.format("%-5s %1s", "TC", hmm.getTrustedCutoff());
+ file.append((line + NEW_LINE));
+ }
+ if (hmm.getNoiseCutoff() != null)
+ {
+ line = String.format("%-5s %1s", "NC", hmm.getNoiseCutoff());
+ file.append((line + NEW_LINE));
+ }
+ if (hmm.getMSV() != null)
+ {
+ line = String.format("%-19s %18s", "STATS LOCAL MSV", hmm.getMSV());
+ file.append((line + NEW_LINE));
+
+ line = String.format("%-19s %18s", "STATS LOCAL VITERBI",
+ hmm.getViterbi());
+ file.append((line + NEW_LINE));
+
+ line = String.format("%-19s %18s", "STATS LOCAL FORWARD",
+ hmm.getForward());
+ file.append((line + NEW_LINE));
+ }
+ }
+
+
+
+ char charValue(String string)
+ {
+ char character;
+ character = string.charAt(0);
+ return character;
+ }
+
+ @Override
+ public String print(SequenceI[] seqs, boolean jvsuffix)
+ {
+
+ return null;
+ }
+
+ void convertListToLogSpace(List<Double> list)
+ {
+
+ for (int i = 0; i < list.size(); i++)
+ {
+ double prob = list.get(i);
+ double logProb = -1 * Math.log(prob);
+
+ list.set(i, logProb);
+ }
+
- return string;
}
}