X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FHMMFile.java;h=95c6f3233e2c75be49f5326980c97695238f0fda;hb=7d950017bb9262f2eff563192071b5ed9ccc76b4;hp=7063fe941cb2861fd33f6201349460f695b11ac2;hpb=a6eac8873a084ad41c392ec27566f23258b0d026;p=jalview.git diff --git a/src/jalview/io/HMMFile.java b/src/jalview/io/HMMFile.java index 7063fe9..95c6f32 100644 --- a/src/jalview/io/HMMFile.java +++ b/src/jalview/io/HMMFile.java @@ -1,110 +1,175 @@ package jalview.io; +import jalview.api.AlignExportSettingI; +import jalview.api.AlignmentViewPanel; import jalview.datamodel.HMMNode; import jalview.datamodel.HiddenMarkovModel; +import jalview.datamodel.SequenceI; import java.io.BufferedReader; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileReader; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.List; import java.util.Scanner; /** - * reads in and writes out a HMMER standard file + * Adds capability to read in and write out HMMER3 files. . * * * @author TZVanaalten * */ -public class HMMFile extends FileParse +public class HMMFile extends AlignFile + implements AlignmentFileReaderI, AlignmentFileWriterI { // HMM to store file data - HiddenMarkovModel hmm = new HiddenMarkovModel(); + private HiddenMarkovModel hmm; + // number of possible transitions + private static final int NUMBER_OF_TRANSITIONS = 7; - // Source of file - String dataObject; + private String NL = "\n"; - // number of possible transitions - final static int NUMBER_OF_TRANSITIONS = 7; + //number of symbols in the alphabet used in the hidden Markov model + int numberOfSymbols; - final static String NEW_LINE = "\n"; + private final String SPACE = " "; + private final String COMPO = "COMPO"; - // file header - String fileHeader; + private final String EMPTY = ""; - int numberOfSymbols; + //This is a line that needs to be added to each HMMER� file. It is purely for readability. + private static final String TRANSITIONTYPELINE = " m->m m->i m->d i->m i->i d->m d->d"; - final static String SPACE = " "; + /** + * Parses immediately. + * + * @param inFile + * @param type + * @throws IOException + */ + public HMMFile(String inFile, DataSourceType type) throws IOException + { + super(inFile, type); + } - final static String COMPO = "COMPO"; + /** + * Parses immediately. + * + * @param source + * @throws IOException + */ + public HMMFile(FileParse source) throws IOException + { + super(source); + } - final static String EMPTY = ""; + /** + * Default constructor, do not use! + */ + public HMMFile() + { + + } + /** + * Constructor for HMMFile used for exporting. + * + * @param hmm + * @param exportImmediately + */ + public HMMFile(HiddenMarkovModel markov) + { + hmm = markov; + } /** - * Constructor which contains model to be filled or exported + * For testing, do not use. * - * @param dataSource - * Filename, URL or Pasted String to read from + * @param br */ - public HMMFile(String dataSource) + HMMFile(BufferedReader br) { - dataObject = dataSource; + dataIn = br; } - public HiddenMarkovModel getHmm() + /** + * Returns the HMM produced by reading in a HMMER3 file. + * + * @return + */ + public HiddenMarkovModel getHMM() { return hmm; } - public void setHmm(HiddenMarkovModel model) + /** + * Sets the HMM used in this file. + * + * @param model + */ + public void setHMM(HiddenMarkovModel model) { this.hmm = model; } /** - * reads data from HMM file + * Gets the name of the hidden Markov model. * - * @throws IOException + * @return */ - public void parse() throws IOException + public String getName() { - File file = new File(dataObject); - FileReader fr = new FileReader(file); - BufferedReader br = new BufferedReader(fr); - parseFileProperties(br); - parseModel(br); - + return hmm.getName(); } - public String getDataObject() + /** + * Reads the data from HMM file into the HMM field on this object. + * + * @throws IOException + */ + @Override + public void parse() throws IOException { - return dataObject; + try + { + hmm = new HiddenMarkovModel(); + parseFileProperties(dataIn); + parseModel(dataIn); + } catch (Exception e) + { + e.printStackTrace(); + } } - public void setDataObject(String value) + /** + * Reads the data from HMM file into the HMM field on this object. + * + * @throws IOException + */ + + public void parse(BufferedReader br) throws IOException { - this.dataObject = value; + hmm = new HiddenMarkovModel(); + parseFileProperties(br); + parseModel(br); } + + /** - * imports file properties from hmm file + * Imports the file properties from a HMMER3 file. * * @param input - * buffered reader used to read in file + * The buffered reader used to read in the file. * @throws IOException */ - public void parseFileProperties(BufferedReader input) throws IOException + void parseFileProperties(BufferedReader input) throws IOException { boolean readingFile = true; - fileHeader = input.readLine(); + hmm.setFileHeader(input.readLine()); String line = input.readLine(); while (readingFile) { @@ -116,7 +181,7 @@ public class HMMFile extends FileParse // properties) { readingFile = false; - hmm.fillSymbols(parser); + fillSymbols(parser); numberOfSymbols = hmm.getNumberOfSymbols(); } else if ("STATS".equals(next)) @@ -150,31 +215,31 @@ public class HMMFile extends FileParse } /** - * parses the model data from the hmm file + * Parses the model data from the HMMER3 file * * @param input - * buffered reader used to read file + * The buffered reader used to read the file. * @throws IOException */ - public void parseModel(BufferedReader input) throws IOException + void parseModel(BufferedReader input) throws IOException { - for (int i = 0; i < hmm.getLength() + 1; i++) + String line = input.readLine(); + int node = 0; + while (!"//".equals(line)) { hmm.getNodes().add(new HMMNode()); String next; - String line; - line = input.readLine(); Scanner matchReader = new Scanner(line); next = matchReader.next(); - if (next.equals(COMPO) || i > 0) + if (next.equals(COMPO) || node > 0) { // stores match emission line in list List matches = new ArrayList<>(); matches = fillList(matchReader, numberOfSymbols); - hmm.getNodes().get(i).setMatchEmissions(matches); - if (i > 0) + hmm.getNodes().get(node).setMatchEmissions(matches); + if (node > 0) { - parseAnnotations(matchReader, i); + parseAnnotations(matchReader, node); } } matchReader.close(); @@ -183,7 +248,7 @@ public class HMMFile extends FileParse Scanner insertReader = new Scanner(line); List inserts = new ArrayList<>(); inserts = fillList(insertReader, numberOfSymbols); - hmm.getNodes().get(i).setInsertEmissions(inserts); + hmm.getNodes().get(node).setInsertEmissions(inserts); insertReader.close(); // stores state transition line in list @@ -191,99 +256,79 @@ public class HMMFile extends FileParse Scanner transitionReader = new Scanner(line); List transitions = new ArrayList<>(); transitions = fillList(transitionReader, NUMBER_OF_TRANSITIONS); - hmm.getNodes().get(i).setStateTransitions(transitions); + hmm.getNodes().get(node).setStateTransitions(transitions); transitionReader.close(); + line = input.readLine(); + node++; } } /** - * parses annotations on match emission line + * Parses the annotations on the match emission line. * * @param scanner - * scanner which is processing match emission line + * The scanner which is processing match emission line. * @param index - * index of node which is beign scanned + * The index of node which is being scanned. */ - public void parseAnnotations(Scanner scanner, int index) + void parseAnnotations(Scanner scanner, int index) { - if (hmm.mapIsActive()) + if (hmm.mapIsActive() && scanner.hasNext()) { int column; column = scanner.nextInt(); - hmm.getNodes().get(index).setAlignmentColumn(column); + hmm.getNodes().get(index).setAlignmentColumn(column - 1); + hmm.getNodeLookup().put(column - 1, index); } else { scanner.next(); } - char consensusR; - consensusR = charValue(scanner.next()); - hmm.getNodes().get(index).setConsensusResidue(consensusR); + if (scanner.hasNext()) + { + char consensusR; + consensusR = charValue(scanner.next()); + hmm.getNodes().get(index).setConsensusResidue(consensusR); + } + if (scanner.hasNext()) + { char reference; reference = charValue(scanner.next()); hmm.getNodes().get(index).setReferenceAnnotation(reference); + } - + if (scanner.hasNext()) + { char value; value = charValue(scanner.next()); hmm.getNodes().get(index).setMaskValue(value); - - char consensusS; - consensusS = charValue(scanner.next()); - hmm.getNodes().get(index).setConsensusStructure(consensusS); - } - - /** - * - * @param transition - * type of transition occuring - * @return index value representing position along stateTransition array. - */ - public Integer getTransitionType(String transition) - { - Integer index; - switch (transition) + } + if (scanner.hasNext()) { - case "mm": - index = 0; - break; - case "mi": - index = 1; - break; - case "md": - index = 2; - break; - case "im": - index = 3; - break; - case "ii": - index = 4; - break; - case "dm": - index = 5; - break; - case "dd": - index = 6; - break; - default: - index = null; + char consensusS; + consensusS = charValue(scanner.next()); + hmm.getNodes().get(index).setConsensusStructure(consensusS); } - return index; } + + /** + * Fills a list of doubles based on an input line. * * @param input - * scanner for line containing data to be transferred to list + * The scanner for the line containing the data to be transferred to + * the list. * @param numberOfElements - * number of elements in the list to be filled - * @return filled list + * The number of elements in the list to be filled. + * @return filled list Returns the list of doubles. + * @throws IOException */ - public static List fillList(Scanner input, - int numberOfElements) + static List fillList(Scanner input, + int numberOfElements) throws IOException { List list = new ArrayList<>(); for (int i = 0; i < numberOfElements; i++) @@ -293,41 +338,38 @@ public class HMMFile extends FileParse if (next.contains("*")) // state transitions to or from delete states // occasionally have values of -infinity. These // values are represented by an * in the .hmm - // file, and by a null value in the - // HiddenMarkovModel class + // file. { list.add(Double.NEGATIVE_INFINITY); } else { - list.add(Double.valueOf(next)); + double prob = Double.valueOf(next); + prob = Math.pow(Math.E, -prob); + list.add(prob); } } + if (list.size() < numberOfElements) + { + throw new IOException("Incomplete data"); + } return list; } - /** - * writes a HiddenMarkovModel to a file + * Returns a string to be added to the StringBuilder containing the entire + * output String. * - * @param exportLocation - * Filename, URL or Pasted String to write to - * @throws FileNotFoundException - * @throws UnsupportedEncodingException - * - **/ - - public void exportFile(String exportLocation) throws IOException - { - StringBuilder file = new StringBuilder(); - appendFileProperties(file); - appendModel(file); - - file.append("//"); - - } - - public String addData(int initialColumnSeparation, + * @param initialColumnSeparation + * The initial whitespace separation between the left side of the + * file and first character. + * @param columnSeparation + * The separation between subsequent data entries. + * @param data + * The list fo data to be added to the String. + * @return + */ + String addData(int initialColumnSeparation, int columnSeparation, List data) { String line = EMPTY; @@ -347,7 +389,13 @@ public class HMMFile extends FileParse return line; } - public static List charListToStringList(List list) + /** + * Converts list of characters into a list of Strings. + * + * @param list + * @return Returns the list of Strings. + */ + List charListToStringList(List list) { List strList = new ArrayList<>(); for (char value : list) @@ -358,20 +406,32 @@ public class HMMFile extends FileParse return strList; } - public static List doubleListToStringList(List list, - int noOfDecimals) + /** + * Converts a list of doubles into a list of Strings, rounded to the nearest + * 5th decimal place. + * + * @param list + * @param noOfDecimals + * @return + */ + List doubleListToStringList(List list) { List strList = new ArrayList<>(); for (double value : list) { String strValue; - if (value == Double.NEGATIVE_INFINITY) + if (value > 0) { - strValue = "*"; + strValue = String.format("%.5f", value); + + } + else if (value == -0.00000d) + { + strValue = "0.00000"; } else { - strValue = String.format("%.5f", value); + strValue = "*"; } strList.add(strValue); @@ -379,7 +439,13 @@ public class HMMFile extends FileParse return strList; } - public static List stringArrayToStringList(String[] array) + /** + * Converts a primitive array of Strings to a list of Strings. + * + * @param array + * @return + */ + List stringArrayToStringList(String[] array) { List list = new ArrayList<>(); for (String value : array) @@ -390,20 +456,19 @@ public class HMMFile extends FileParse return list; } - void appendModel(StringBuilder file) + /** + * Returns a string containing the model data. + */ + String getModelAsString() { + StringBuilder output = new StringBuilder(); String symbolLine = "HMM"; List charSymbols = hmm.getSymbols(); List strSymbols; strSymbols = charListToStringList(charSymbols); symbolLine += addData(11, 9, strSymbols); - file.append(symbolLine + NEW_LINE); - - String transitionTypeLine = ""; - List transitionTypes; - transitionTypes = stringArrayToStringList(hmm.getTransitionTypes()); - transitionTypeLine += addData(16, 9, transitionTypes); - file.append(transitionTypeLine + NEW_LINE); + output.append(symbolLine); + output.append(NL).append(TRANSITIONTYPELINE); int length = hmm.getLength(); @@ -421,153 +486,294 @@ public class HMMFile extends FileParse List strMatches; List doubleMatches; - doubleMatches = hmm.getNode(node).getMatchEmissions(); - strMatches = doubleListToStringList(doubleMatches, 5); + doubleMatches = convertListToLogSpace( + hmm.getNode(node).getMatchEmissions()); + strMatches = doubleListToStringList(doubleMatches); matchLine += addData(10, 9, strMatches); if (node != 0) { - matchLine += SPACE + hmm.getNodeAlignmentColumn(node); + matchLine += SPACE + (hmm.getNodeAlignmentColumn(node) + 1); matchLine += SPACE + hmm.getConsensusResidue(node); matchLine += SPACE + hmm.getReferenceAnnotation(node); - matchLine += SPACE + hmm.getMaskedValue(node); - matchLine += SPACE + hmm.getConsensusStructure(node); + if (hmm.getFileHeader().contains("HMMER3/f")) + { + matchLine += SPACE + hmm.getMaskedValue(node); + matchLine += SPACE + hmm.getConsensusStructure(node); + } } - file.append(matchLine + NEW_LINE); + output.append(NL).append(matchLine); String insertLine = EMPTY; List strInserts; List doubleInserts; - doubleInserts = hmm.getNode(node).getInsertEmissions(); - strInserts = doubleListToStringList(doubleInserts, 5); + doubleInserts = convertListToLogSpace( + hmm.getNode(node).getInsertEmissions()); + strInserts = doubleListToStringList(doubleInserts); insertLine += addData(17, 9, strInserts); - file.append(insertLine + NEW_LINE); + output.append(NL).append(insertLine); String transitionLine = EMPTY; List strTransitions; List doubleTransitions; - doubleTransitions = hmm.getNode(node).getStateTransitions(); - strTransitions = doubleListToStringList(doubleTransitions, 5); + doubleTransitions = convertListToLogSpace( + hmm.getNode(node).getStateTransitions()); + strTransitions = doubleListToStringList(doubleTransitions); transitionLine += addData(17, 9, strTransitions); - file.append(transitionLine + NEW_LINE); + output.append(NL).append(transitionLine); } + return output.toString(); } - void appendFileProperties(StringBuilder file) + /** + * Returns a String containing the HMM file properties + */ + String getFilePropertiesAsString() { + StringBuffer output = new StringBuffer(); String line; - file.append(fileHeader + NEW_LINE); + output.append(hmm.getFileHeader()); line = String.format("%-5s %1s", "NAME", hmm.getName()); - file.append((line + NEW_LINE)); + output.append(NL + line); if (hmm.getAccessionNumber() != null) { line = String.format("%-5s %1s", "ACC", hmm.getAccessionNumber()); - file.append((line + NEW_LINE)); + output.append(NL + line); } if (hmm.getDescription() != null) { line = String.format("%-5s %1s", "DESC", hmm.getDescription()); - file.append((line + NEW_LINE)); + output.append(NL + line); } line = String.format("%-5s %1s", "LENG", hmm.getLength()); - file.append((line + NEW_LINE)); + output.append(NL + line); if (hmm.getMaxInstanceLength() != null) { line = String.format("%-5s %1s", "MAXL", hmm.getMaxInstanceLength()); - file.append((line + NEW_LINE)); + output.append(NL + line); } line = String.format("%-5s %1s", "ALPH", hmm.getAlphabetType()); - file.append((line + NEW_LINE)); + output.append(NL + line); + boolean status; + String statusStr; + + status = hmm.referenceAnnotationIsActive(); + statusStr = HiddenMarkovModel.findStringFromBoolean(status); line = String.format("%-5s %1s", "RF", - hmm.getFileProperties().get("RF")); - file.append((line + NEW_LINE)); + statusStr); + output.append(NL + line); + status = hmm.maskValueIsActive(); + statusStr = HiddenMarkovModel.findStringFromBoolean(status); line = String.format("%-5s %1s", "MM", - hmm.getFileProperties().get("MM")); - file.append((line + NEW_LINE)); + statusStr); + output.append(NL + line); + status = hmm.consensusResidueIsActive(); + statusStr = HiddenMarkovModel.findStringFromBoolean(status); line = String.format("%-5s %1s", "CONS", - hmm.getFileProperties().get("CONS")); - file.append((line + NEW_LINE)); + statusStr); + output.append(NL + line); + status = hmm.consensusStructureIsActive(); + statusStr = HiddenMarkovModel.findStringFromBoolean(status); line = String.format("%-5s %1s", "CS", - hmm.getFileProperties().get("CS")); - file.append((line + NEW_LINE)); + statusStr); + output.append(NL + line); + status = hmm.mapIsActive(); + statusStr = HiddenMarkovModel.findStringFromBoolean(status); line = String.format("%-5s %1s", "MAP", - hmm.getFileProperties().get("MAP")); - file.append((line + NEW_LINE)); + statusStr); + output.append(NL + line); + if (hmm.getDate() != null) { line = String.format("%-5s %1s", "DATE", hmm.getDate()); - file.append((line + NEW_LINE)); + output.append(NL + line); } if (hmm.getNumberOfSequences() != null) { line = String.format("%-5s %1s", "NSEQ", hmm.getNumberOfSequences()); - file.append((line + NEW_LINE)); + output.append(NL + line); } if (hmm.getEffectiveNumberOfSequences() != null) { line = String.format("%-5s %1s", "EFFN", hmm.getEffectiveNumberOfSequences()); - file.append((line + NEW_LINE)); + output.append(NL + line); } if (hmm.getCheckSum() != null) { line = String.format("%-5s %1s", "CKSUM", hmm.getCheckSum()); - file.append((line + NEW_LINE)); + output.append(NL + line); } if (hmm.getGatheringThreshold() != null) { line = String.format("%-5s %1s", "GA", hmm.getGatheringThreshold()); - file.append((line + NEW_LINE)); + output.append(NL + line); } if (hmm.getTrustedCutoff() != null) { line = String.format("%-5s %1s", "TC", hmm.getTrustedCutoff()); - file.append((line + NEW_LINE)); + output.append(NL + line); } if (hmm.getNoiseCutoff() != null) { line = String.format("%-5s %1s", "NC", hmm.getNoiseCutoff()); - file.append((line + NEW_LINE)); + output.append(NL + line); } if (hmm.getMSV() != null) { line = String.format("%-19s %18s", "STATS LOCAL MSV", hmm.getMSV()); - file.append((line + NEW_LINE)); + output.append(NL + line); line = String.format("%-19s %18s", "STATS LOCAL VITERBI", hmm.getViterbi()); - file.append((line + NEW_LINE)); + output.append(NL + line); line = String.format("%-19s %18s", "STATS LOCAL FORWARD", hmm.getForward()); - file.append((line + NEW_LINE)); + output.append(NL + line); } + return output.toString(); } - - public static char charValue(String string) + /** + * Returns the char value of a single lettered String. + * + * @param string + * @return + */ + char charValue(String string) { char character; character = string.charAt(0); return character; + } + + @Override + public String print(SequenceI[] seqs, boolean jvsuffix) + { + if (seqs[0].getHMM() != null) + { + hmm = seqs[0].getHMM(); + } + return print(); + } + + /** + * Prints the .hmm file to a String. + * + * @return + */ + public String print() + { + StringBuffer output = new StringBuffer(); + output.append(getFilePropertiesAsString()); + output.append(NL); + output.append(getModelAsString()); + output.append(NL + "//"); + return output.toString(); + } + + /** + * Converts the probabilities contained in a list into log space. + * + * @param list + */ + List convertListToLogSpace(List list) + { + + List convertedList = new ArrayList<>(); + for (int i = 0; i < list.size(); i++) + { + double prob = list.get(i); + double logProb = -1 * Math.log(prob); + + convertedList.add(logProb); + } + return convertedList; + + + } + + /** + * Returns the HMM sequence produced by reading a .hmm file. + */ + @Override + public SequenceI[] getSeqsAsArray() + { + SequenceI hmmSeq = hmm.initHMMSequence(); + SequenceI[] seq = new SequenceI[1]; + seq[0] = hmmSeq; + return seq; + + } + + /** + * Fills symbol array and adds each symbol to an index lookup + * + * @param parser + * The scanner scanning the symbol line in the file. + */ + public void fillSymbols(Scanner parser) + { + int i = 0; + while (parser.hasNext()) + { + String strSymbol = parser.next(); + char[] symbol = strSymbol.toCharArray(); + hmm.getSymbols().add(symbol[0]); + hmm.setSymbolIndex(symbol[0], i); + i++; + } + } + + @Override + public void setNewlineString(String newLine) + { + NL = newLine; + } + + @Override + public void setExportSettings(AlignExportSettingI exportSettings) + { + + } + + @Override + public void configureForView(AlignmentViewPanel viewpanel) + { + + } + + @Override + public boolean hasWarningMessage() + { + return false; + } + + @Override + public String getWarningMessage() + { + return "warning message"; + } + }