From a6eac8873a084ad41c392ec27566f23258b0d026 Mon Sep 17 00:00:00 2001 From: TZVanaalten Date: Tue, 20 Jun 2017 17:01:38 +0100 Subject: [PATCH] restructure file exporter --- src/jalview/datamodel/EValueStatistic.java | 40 -- src/jalview/datamodel/HMMNode.java | 97 +++ src/jalview/datamodel/HiddenMarkovModel.java | 693 ++++++++++--------- src/jalview/io/HMMFile.java | 519 +++++++++------ test/jalview/datamodel/HiddenMarkovModelTest.java | 16 - test/jalview/io/HMMFileTest.java | 741 +++++++++------------ 6 files changed, 1107 insertions(+), 999 deletions(-) delete mode 100644 src/jalview/datamodel/EValueStatistic.java create mode 100644 src/jalview/datamodel/HMMNode.java delete mode 100644 test/jalview/datamodel/HiddenMarkovModelTest.java diff --git a/src/jalview/datamodel/EValueStatistic.java b/src/jalview/datamodel/EValueStatistic.java deleted file mode 100644 index 4641d2e..0000000 --- a/src/jalview/datamodel/EValueStatistic.java +++ /dev/null @@ -1,40 +0,0 @@ -package jalview.datamodel; - -/** - * bean which stores e-Value Statistics - * - * @author TZVanaalten - * - */ -public class EValueStatistic -{ - final String alignmentModeConfiguration; - - final double locationOfDistribution; - - final double slopeOfDistribution; - - public EValueStatistic(String configuration, double slope, - double location) - { - alignmentModeConfiguration = configuration; - locationOfDistribution = location; - slopeOfDistribution = slope; - } - - public String getAlignmentModeConfiguration() - { - return alignmentModeConfiguration; - } - - public double getLocationOfDistribution() - { - return locationOfDistribution; - } - - public double getSlopeOfDistribution() - { - return slopeOfDistribution; - } - -} diff --git a/src/jalview/datamodel/HMMNode.java b/src/jalview/datamodel/HMMNode.java new file mode 100644 index 0000000..30a6a8d --- /dev/null +++ b/src/jalview/datamodel/HMMNode.java @@ -0,0 +1,97 @@ +package jalview.datamodel; + +import java.util.ArrayList; +import java.util.List; + +/** + * stores data for each node in the hmm model + * @author TZVanaalten + * + */ +public class HMMNode +{ + //contains the match emissions for each symbol + List matchEmissions = new ArrayList<>(); + //contains the insert emissions for each symbol + List insertEmissions = new ArrayList<>(); + //contains the state transitions for each possible transition. These are bm, bi, bd, im, ii, dm and dd in order (0th position in + // the array indicates the probability of a bm transition) + List stateTransitions = new ArrayList<>(); + + //annotations + Integer alignmentColumn = null; + char consensusResidue; + char referenceAnnotation; + char maskValue; + char consensusStructure; + public List getMatchEmissions() + { + return matchEmissions; + } + + public void setMatchEmissions(List matchEmissionsL) + { + this.matchEmissions = matchEmissionsL; + } + public List getInsertEmissions() + { + return insertEmissions; + } + + public void setInsertEmissions(List insertEmissionsL) + { + this.insertEmissions = insertEmissionsL; + } + public List getStateTransitions() + { + return stateTransitions; + } + + public void setStateTransitions(List stateTransitionsL) + { + this.stateTransitions = stateTransitionsL; + } + + public Integer getAlignmentColumn() + { + return alignmentColumn; + } + public void setAlignmentColumn(int alignmentColumn) + { + this.alignmentColumn = alignmentColumn; + } + public char getConsensusResidue() + { + return consensusResidue; + } + public void setConsensusResidue(char consensusResidue) + { + this.consensusResidue = consensusResidue; + } + public char getReferenceAnnotation() + { + return referenceAnnotation; + } + public void setReferenceAnnotation(char referenceAnnotation) + { + this.referenceAnnotation = referenceAnnotation; + } + public char getMaskValue() + { + return maskValue; + } + public void setMaskValue(char maskValue) + { + this.maskValue = maskValue; + } + public char getConsensusStructure() + { + return consensusStructure; + } + public void setConsensusStructure(char consensusStructure) + { + this.consensusStructure = consensusStructure; + } +} + + diff --git a/src/jalview/datamodel/HiddenMarkovModel.java b/src/jalview/datamodel/HiddenMarkovModel.java index b863264..cd6490e 100644 --- a/src/jalview/datamodel/HiddenMarkovModel.java +++ b/src/jalview/datamodel/HiddenMarkovModel.java @@ -18,552 +18,603 @@ public class HiddenMarkovModel // only string value - use the getter methods. For example, to find the length // of theHMM, use getModelLength()to return an int value Map fileProperties = new HashMap<>(); + + //contains all of the symbols used in this model. The index of each symbol represents its lookup value + List symbols = new ArrayList<>(); - // contains the average emission probabilities for each symbol - List averageMatchStateEmissionProbabilities = new ArrayList<>(); + // contains information for each node in the model. The begin node is at index + // 0. Node 0 contains average emission probabilities for each symbol + List nodes = new ArrayList<>(); - // contains the probabilities of insert 0 emissions for each symbol - List insertZeroEmissions = new ArrayList<>(); + final String YES = "yes"; - // contains the probabilities of transitions from the begin state and insert - // state 0. These are bm, bi, bd, im, ii, dm and dd in order (0th position in - // the array indicates the probability of a bm transition) + final String NO = "no"; - List beginStateTransitions = new ArrayList<>(); + int numberOfSymbols; + + //keys for file properties hashmap + private final String NAME = "NAME"; - // contains the alignment column index for each node - List alignmentColumnIndexes = new ArrayList<>(); + private final String ACCESSION_NUMBER = "ACC"; - // contains all other annotations for each node. These can be the - // consensus(CONS), reference annotation(RF), mask value(MM) or consensus - // structure(CS) - List> annotations = new ArrayList<>(); + private final String DESCRIPTION = "DESC"; - // contains the match emission for each symbol at each node - List> matchEmissions = new ArrayList<>(); + private final String LENGTH = "LENG"; - // contains the insert emission for each symbol at each node - List> insertEmissions = new ArrayList<>(); + private final String MAX_LENGTH = "MAXL"; - // contains the state transition for each state transition. See - // beginStateTransitions field for transition possibilities. - List> stateTransitions = new ArrayList<>(); + private final String ALPHABET = "ALPH"; - // contains cutoffs and thresholds from PFAM - Map pfamData = new HashMap<>(); + private final String DATE = "DATE"; - // contains e-value statistic objects which contain the alignment mode - // configuration, and the slope and location of each distribution - Map eValueStatistics = new HashMap<>(); + private final String COMMAND_LOG = "COM"; - final String yes = "yes"; + private final String NUMBER_OF_SEQUENCES = "NSEQ"; - final String no = "no"; + private final String EFF_NUMBER_OF_SEQUENCES = "EFFN"; - List symbols = new ArrayList<>(); + private final String CHECK_SUM = "CKSUM"; - public List getBeginStateTransitions() - { - return beginStateTransitions; - } + private final String GATHERING_THRESHOLDS = "GA"; - public void setBeginStateTransitions(List beginStateTransitionsL) - { - this.beginStateTransitions = beginStateTransitionsL; - } + private final String TRUSTED_CUTOFFS = "TC"; - public List> getStateTransitions() - { - return stateTransitions; - } + private final String NOISE_CUTOFFS = "NC"; - public void setStateTransitions(List> stateTransitionsL) - { - this.stateTransitions = stateTransitionsL; - } + private final String STATISTICS = "STATS"; - public List getSymbols() - { - return symbols; - } + private final String COMPO = "COMPO"; + + private final String GATHERING_THRESHOLD = "GA"; - public void setSymbols(List symbolsL) - { - this.symbols = symbolsL; - } + private final String TRUSTED_CUTOFF = "TC"; - public List getAverageMatchStateEmissionProbabilities() - { - return averageMatchStateEmissionProbabilities; - } + private final String NOISE_CUTOFF = "NC"; - public void setAverageMatchStateEmissionProbabilities( - List averageMatchStateEmissionProbabilitiesL) - { - this.averageMatchStateEmissionProbabilities = averageMatchStateEmissionProbabilitiesL; - } + private final String VITERBI = "VITERBI"; + private final String MSV = "MSV"; - public List getInsertZeroEmissions() - { - return insertZeroEmissions; - } + private final String FORWARD = "FORWARD"; + + private final String MAP = "MAP"; + + private final String REFERENCE_ANNOTATION = "RF"; + + private final String CONSENSUS_RESIDUE = "CONS"; + + private final String CONSENSUS_STRUCTURE = "CS"; + + private final String MASKED_VALUE = "MM"; + + final static String[] TRANSITION_TYPES = new String[] { "m->m", "m->i", + "m->d", "i->m", "i->i", "d->m", "d->d" }; - public void setInsertZeroEmissions(List insertZeroEmissionsL) + public String getTransitionType(int index) { - this.insertZeroEmissions = insertZeroEmissionsL; + return TRANSITION_TYPES[index]; } - public List> getMatchEmissions() + public String[] getTransitionTypes() { - return matchEmissions; + return TRANSITION_TYPES; } - - public void setMatchEmissions(List> matchEmissionsL) + public char getSymbol(int index) { - this.matchEmissions = matchEmissionsL; + return getSymbols().get(index); } - - public List> getInsertEmissions() + public Map getFileProperties() { - return insertEmissions; + return fileProperties; } - public void setInsertEmissions(List> insertEmissionsL) + public HMMNode getNode(int nodeIndex) { - this.insertEmissions = insertEmissionsL; + return getNodes().get(nodeIndex); } - public void fillSymbols(String line) + + public void setSymbols(List symbolsL) { - Scanner scanner = new Scanner(line); - scanner.next(); - while (scanner.hasNext()) - { - symbols.add(scanner.next().charAt(0)); - } - scanner.close(); + this.symbols = symbolsL; } public String getName() { - return fileProperties.get("NAME"); + return fileProperties.get(NAME); } public String getAccessionNumber() { - return fileProperties.get("ACC"); + return fileProperties.get(ACCESSION_NUMBER); } public void setAccessionNumber(String value) { - fileProperties.put("ACC", value); + fileProperties.put(ACCESSION_NUMBER, value); } public String getDescription() { - return fileProperties.get("DESC"); + return fileProperties.get(DESCRIPTION); } public void setDescription(String value) { - fileProperties.put("DESC", value); + fileProperties.put(DESCRIPTION, value); } public Integer getLength() { - if (fileProperties.get("LENG") == null) + if (fileProperties.get(LENGTH) == null) { return null; } - return Integer.parseInt(fileProperties.get("LENG")); + return Integer.parseInt(fileProperties.get(LENGTH)); } public void setLength(int value) { - fileProperties.put("LENG", String.valueOf(value)); + fileProperties.put(LENGTH, String.valueOf(value)); } public Integer getMaxInstanceLength() { - if (fileProperties.get("MAXL") == null) + if (fileProperties.get(MAX_LENGTH) == null) { return null; } - return Integer.parseInt(fileProperties.get("MAXL")); + return Integer.parseInt(fileProperties.get(MAX_LENGTH)); } public void setMaxInstanceLength(int value) { - fileProperties.put("MAXL", String.valueOf(value)); + fileProperties.put(MAX_LENGTH, String.valueOf(value)); } // gets type of symbol alphabet - "amino", "DNA", "RNA" public String getAlphabetType() { - return fileProperties.get("ALPH"); + return fileProperties.get(ALPHABET); } public void setAlphabetType(String value) { - fileProperties.put("ALPH", value); + fileProperties.put(ALPHABET, value); } - // returns boolean indicating whether the reference annotation character field - // for each match state is valid or ignored - public boolean getReferenceAnnotationFlag() + // not sure whether to implement this with Date object + public String getDate() { - if (fileProperties.get("RF") != null) - { - if (fileProperties.get("RF").equals(yes)) - { - return true; - } - } - return false; + return fileProperties.get(DATE); } - public void setReferenceAnnotationFlag(boolean value) + public void setDate(String value) { - if (value) - { - fileProperties.put("RF", yes); - } - else - { - fileProperties.put("RF", no); - } - + fileProperties.put(DATE, value); } - // returns boolean indicating whether the model mask annotation character - // field - // for each match state is valid or ignored - public boolean getModelMaskedFlag() + // not sure whether to implement this + public String getCommandLineLog() { - if (fileProperties.get("MM") != null) - { - if (fileProperties.get("MM").equals(yes)) - { - return true; - } - } - return false; + return fileProperties.get(COMMAND_LOG); } - public void setModelMaskedFlag(boolean value) + public void setCommandLineLog(String value) { - if (value) - { - fileProperties.put("MM", yes); - } - else - { - fileProperties.put("MM", no); - } + fileProperties.put(COMMAND_LOG, value); } - // returns boolean indicating whether the consensus residue field - // for each match state is valid or ignored - public boolean getConsensusResidueAnnotationFlag() + // gets the number of sequences that the HMM was trained on + public Integer getNumberOfSequences() { - if (fileProperties.get("CONS") != null) + if (fileProperties.get(NUMBER_OF_SEQUENCES) == null) { - if (fileProperties.get("CONS").equals(yes)) - { - return true; - } + return null; } - return false; + return Integer.parseInt(fileProperties.get(NUMBER_OF_SEQUENCES)); } - public void setConsensusResidueeAnnotationFlag(boolean value) + public void setNumberOfSequences(int value) { - if (value) - { - fileProperties.put("CONS", yes); - } - else - { - fileProperties.put("CONS", no); - } + fileProperties.put(NUMBER_OF_SEQUENCES, String.valueOf(value)); } - // returns boolean indicating whether the consensus structure character field - // for each match state is valid or ignored - public boolean getConsensusStructureAnnotationFlag() + // gets the effective number determined during sequence weighting + public Double getEffectiveNumberOfSequences() { - if (fileProperties.get("CS") != null) + if (fileProperties.get(LENGTH) == null) { - if (fileProperties.get("CS").equals(yes)) - { - return true; - } + return null; } - return false; + return Double.parseDouble(fileProperties.get(EFF_NUMBER_OF_SEQUENCES)); } - public void setConsensusStructureAnnotationFlag(boolean value) + public void setEffectiveNumberOfSequences(double value) { - if (value) - { - fileProperties.put("CS", yes); - } - else - { - fileProperties.put("CS", no); - } + fileProperties.put(EFF_NUMBER_OF_SEQUENCES, String.valueOf(value)); } - // returns boolean indicating whether the model mask annotation character - // field - // for each match state is valid or ignored - public boolean getMapAnnotationFlag() + public Long getCheckSum() { - if (fileProperties.get("MAP") != null) + if (fileProperties.get(LENGTH) == null) { - if (fileProperties.get("MAP").equals(yes)) - { - return true; - } + return null; } - return false; + return Long.parseLong(fileProperties.get(CHECK_SUM)); } - public void setMapAnnotationFlag(boolean value) + public void setCheckSum(long value) { - if (value) - { - fileProperties.put("MAP", yes); - } - else - { - fileProperties.put("MAP", no); - } + fileProperties.put(CHECK_SUM, String.valueOf(value)); } - // not sure whether to implement this with Date object - public String getDate() + public List getNodes() { - return fileProperties.get("DATE"); + return nodes; } - public void setDate(String value) + public void setNodes(List nodes) { - fileProperties.put("DATE", value); + this.nodes = nodes; } - - // not sure whether to implement this - public String getCommandLineLog() + + /** + * gets the match emission at a node for a symbol + * @param nodeIndex + * position of node in model + * @param symbolIndex + * index of symbol being searched + * @return + * negative log probability of a match emission of the given symbol + */ + public double getMatchEmission(int nodeIndex, int symbolIndex) + { + double value = nodes.get(nodeIndex).getMatchEmissions().get(symbolIndex); + return value; + } + + /** + * gets the insert emission at a node for a symbol + * @param nodeIndex + * position of node in model + * @param symbolIndex + * index of symbol being searched + * @return + * negative log probability of an insert emission of the given symbol + */ + public double getInsertEmission(int nodeIndex, int symbolIndex) + { + double value = nodes.get(nodeIndex).getInsertEmissions().get(symbolIndex); + return value; + } + + /** + * gets the state transition at a node for a specific transition + * @param nodeIndex + * position of node in model + * @param transitionIndex + * index of stransition being searched + * @return + * negative log probability of a state transition of the given type + */ + public double getStateTransition(int nodeIndex, int transitionIndex) { - return fileProperties.get("COM"); + double value = nodes.get(nodeIndex).getStateTransitions() + .get(transitionIndex); + return value; } - - public void setCommandLineLog(String value) + + public Integer getNodeAlignmentColumn(int nodeIndex) { - fileProperties.put("COM", value); + Integer value = nodes.get(nodeIndex).getAlignmentColumn(); + return value; } - - // gets the number of sequences that the HMM was trained on - public Integer getSequenceNumber() + + public char getConsensusResidue(int nodeIndex) { - if (fileProperties.get("NSEQ") == null) - { - return null; - } - return Integer.parseInt(fileProperties.get("NSEQ")); + char value = nodes.get(nodeIndex).getConsensusResidue(); + return value; } - - public void setSequenceNumber(int value) + + public char getReferenceAnnotation(int nodeIndex) { - fileProperties.put("NSEQ", String.valueOf(value)); + char value = nodes.get(nodeIndex).getReferenceAnnotation(); + return value; } - - // gets the effective number determined during sequence weighting - public Double getEffectiveSequenceNumber() + + public char getMaskedValue(int nodeIndex) { - if (fileProperties.get("LENG") == null) - { - return null; - } - return Double.parseDouble(fileProperties.get("EFFN")); + char value = nodes.get(nodeIndex).getMaskValue(); + return value; } - - public void setEffectiveSequenceNumber(double value) + + public char getConsensusStructure(int nodeIndex) { - fileProperties.put("EFFN", String.valueOf(value)); + char value = nodes.get(nodeIndex).getConsensusStructure(); + return value; + } + + /** + * returns the average match emission for a given symbol + * @param symbolIndex + * index of symbol + * @return + * average negative log propbability of a match emission of the given symbol + */ + public double getAverageMatchEmission(int symbolIndex) + { + double value = nodes.get(0).getMatchEmissions().get(symbolIndex); + return value; } - public Long getCheckSum() + public int getNumberOfSymbols() { - if (fileProperties.get("LENG") == null) - { - return null; - } - return Long.parseLong(fileProperties.get("CKSUM")); + return numberOfSymbols; } - public void setCheckSum(long value) + public void setNumberOfSymbols(int numberOfSymbols) { - fileProperties.put("CKSUM", String.valueOf(value)); + this.numberOfSymbols = numberOfSymbols; } - public Double getGatheringThreshold1() + public List getSymbols() { - try - { - return pfamData.get("GA")[0]; - } catch (NullPointerException e) - { - return null; + return symbols; + } + + + /** + * fills symbol array and also finds numberOfSymbols + * + * @param parser + * scanner scanning symbol line in file + */ + public void fillSymbols(Scanner parser) + { + while (parser.hasNext()) + { + String strSymbol = parser.next(); + char[] symbol = strSymbol.toCharArray(); + symbols.add(symbol[0]); } + numberOfSymbols = symbols.size(); } - public void setPFAMData(String key, Double[] data) + /** + * adds file property + * + * @param key + * @param value + */ + public void addFileProperty(String key, String value) { - pfamData.put(key, data); + fileProperties.put(key, value); } - public Double getGatheringThreshold2() + public boolean referenceAnnotationIsActive() { - try - { - return pfamData.get("GA")[1]; - } catch (NullPointerException e) + String status; + status = fileProperties.get(REFERENCE_ANNOTATION); + if (status == null) { - return null; + return false; + } + switch (status) + { + case YES: + return true; + case NO: + return false; + default: + return false; } } - public Double getTrustedCutoff1() + public boolean maskValueIsActive() { - try - { - return pfamData.get("TC")[0]; - } catch (NullPointerException e) + String status; + status = fileProperties.get(MASKED_VALUE); + if (status == null) { - return null; + return false; + } + switch (status) + { + case YES: + return true; + case NO: + return false; + default: + return false; } } - public Double getTrustedCutoff2() + public boolean consensusResidueIsActive() { - try - { - return pfamData.get("TC")[1]; - } catch (NullPointerException e) + String status; + status = fileProperties.get(CONSENSUS_RESIDUE); + if (status == null) { - return null; + return false; + } + switch (status) + { + case YES: + return true; + case NO: + return false; + default: + return false; } } - public Double getNoiseCutoff1() + public boolean consensusStructureIsActive() { - try - { - return pfamData.get("NC")[0]; - } catch (NullPointerException e) + String status; + status = fileProperties.get(CONSENSUS_STRUCTURE); + if (status == null) { - return null; + return false; + } + switch (status) + { + case YES: + return true; + case NO: + return false; + default: + return false; } } - public Double getNoiseCutoff2() + public boolean mapIsActive() { - try - { - return pfamData.get("NC")[1]; - } catch (NullPointerException e) + String status; + status = fileProperties.get(MAP); + if (status == null) { - return null; + return false; + } + switch (status) + { + case YES: + return true; + case NO: + return false; + default: + return false; } } - public String getAlignmentModeConfiguration(String key) + public void setAlignmentColumn(int nodeIndex, int column) { - return eValueStatistics.get(key).alignmentModeConfiguration; + nodes.get(nodeIndex).setAlignmentColumn(column); } - public Double getSlopeOfDistribution(String scoreDistribution) + public void setReferenceAnnotation(int nodeIndex, char value) { - try - { - return eValueStatistics.get(scoreDistribution).slopeOfDistribution; - } catch (NullPointerException e) - { - return null; - } + nodes.get(nodeIndex).setReferenceAnnotation(value); } - public Double getLocationOfDistribution(String scoreDistribution) + public void setConsensusResidue(int nodeIndex, char value) { - try - { - return eValueStatistics.get(scoreDistribution).locationOfDistribution; - } catch (NullPointerException e) - { - return null; - } + nodes.get(nodeIndex).setConsensusResidue(value); } - public void addStatistic(String name, EValueStatistic stats) + public void setConsensusStructure(int nodeIndex, char value) { - eValueStatistics.put(name, stats); + nodes.get(nodeIndex).setConsensusStructure(value); } - /** - * public double getBeginStateTransitions(Character symbol) { return - * beginStateTransitions.get(symbol); } - **/ + public void setMaskValue(int nodeIndex, char value) + { + nodes.get(nodeIndex).setMaskValue(value); + } - public void put(String key, String value) + public String getGatheringThreshold() { - fileProperties.put(key, value); + String value; + value = fileProperties.get("GA"); + return value; } - public Map getEValueStatistics() + public String getNoiseCutoff() { - return eValueStatistics; + String value; + value = fileProperties.get("NC"); + return value; } - public void setEValueStatistics( - Map eValueStatisticsM) + public String getTrustedCutoff() { - this.eValueStatistics = eValueStatisticsM; + String value; + value = fileProperties.get("TC"); + return value; } - public List getAlignmentColumnIndexes() + public String getViterbi() { - return alignmentColumnIndexes; + String value; + value = fileProperties.get(VITERBI); + return value; } - public void setAlignmentColumnIndexes( - List alignmentColumnIndexesL) + public String getMSV() { - this.alignmentColumnIndexes = alignmentColumnIndexesL; + String value; + value = fileProperties.get(MSV); + return value; } - public List> getAnnotations() + public String getForward() { - return annotations; + String value; + value = fileProperties.get(FORWARD); + return value; } - public void setAnnotations(List> annotationsL) + public void setMAPStatus(boolean status) { - this.annotations = annotationsL; + if (status == true) + { + fileProperties.put(MAP, YES); + } + else + { + fileProperties.put(MAP, NO); + } } - public Map getFileProperties() + public void setReferenceAnnotationStatus(boolean status) { - return fileProperties; + if (status == true) + { + fileProperties.put(REFERENCE_ANNOTATION, YES); + } + else + { + fileProperties.put(REFERENCE_ANNOTATION, NO); + } + } + + public void setMaskedValueStatus(boolean status) + { + if (status == true) + { + fileProperties.put(MASKED_VALUE, YES); + } + else + { + fileProperties.put(MASKED_VALUE, NO); + } + } + + public void setConsensusResidueStatus(boolean status) + { + if (status == true) + { + fileProperties.put(CONSENSUS_RESIDUE, YES); + } + else + { + fileProperties.put(CONSENSUS_RESIDUE, NO); + } } - public void setFileProperties(Map fileProperties) + public void setConsensusStructureStatus(boolean status) { - this.fileProperties = fileProperties; + if (status == true) + { + fileProperties.put(CONSENSUS_STRUCTURE, YES); + } + else + { + fileProperties.put(CONSENSUS_STRUCTURE, NO); + } } } diff --git a/src/jalview/io/HMMFile.java b/src/jalview/io/HMMFile.java index 764db7f..7063fe9 100644 --- a/src/jalview/io/HMMFile.java +++ b/src/jalview/io/HMMFile.java @@ -1,6 +1,6 @@ package jalview.io; -import jalview.datamodel.EValueStatistic; +import jalview.datamodel.HMMNode; import jalview.datamodel.HiddenMarkovModel; import java.io.BufferedReader; @@ -8,14 +8,12 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; -import java.io.PrintWriter; import java.io.UnsupportedEncodingException; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Scanner; + /** * reads in and writes out a HMMER standard file * @@ -28,18 +26,28 @@ public class HMMFile extends FileParse // HMM to store file data HiddenMarkovModel hmm = new HiddenMarkovModel(); + // Source of file String dataObject; - // number of symbols - int numberOfSymbols; - // number of possible transitions - final int NUMBER_OF_TRANSITIONS = 7; + final static int NUMBER_OF_TRANSITIONS = 7; + + final static String NEW_LINE = "\n"; + // file header String fileHeader; + int numberOfSymbols; + + final static String SPACE = " "; + + final static String COMPO = "COMPO"; + + final static String EMPTY = ""; + + /** * Constructor which contains model to be filled or exported * @@ -51,6 +59,16 @@ public class HMMFile extends FileParse dataObject = dataSource; } + public HiddenMarkovModel getHmm() + { + return hmm; + } + + public void setHmm(HiddenMarkovModel model) + { + this.hmm = model; + } + /** * reads data from HMM file * @@ -66,6 +84,16 @@ public class HMMFile extends FileParse } + public String getDataObject() + { + return dataObject; + } + + public void setDataObject(String value) + { + this.dataObject = value; + } + /** * imports file properties from hmm file * @@ -88,30 +116,17 @@ public class HMMFile extends FileParse // properties) { readingFile = false; - hmm.fillSymbols(line); - numberOfSymbols = hmm.getSymbols().size(); + hmm.fillSymbols(parser); + numberOfSymbols = hmm.getNumberOfSymbols(); } - else if ("STATS".equals(next)) // reads e-value stats into separate - // field - // on HMM object + else if ("STATS".equals(next)) { - readStats(parser); - } - else if ("GA".equals(next) || "TC".equals(next) - || "NC".equals(next)) // reads - // pfam - // data - // into - // separate - // field - // on - // HMM - // object - { - Double[] data = new Double[2]; - data[0] = parser.nextDouble(); - data[1] = parser.nextDouble(); - hmm.setPFAMData(next, data); + parser.next(); + String key; + String value; + key = parser.next(); + value = parser.next() + SPACE + SPACE + parser.next(); + hmm.addFileProperty(key, value); } else { @@ -119,9 +134,9 @@ public class HMMFile extends FileParse String value = parser.next(); while (parser.hasNext()) { - value = value + " " + parser.next(); + value = value + SPACE + parser.next(); } - hmm.put(key, value); + hmm.addFileProperty(key, value); } parser.close(); } @@ -135,31 +150,6 @@ public class HMMFile extends FileParse } /** - * creates a new EValueStatistic object to store stats - * - * @param parser - * Scanner which contains data for STATS line - * - */ - public void readStats(Scanner parser) - { - if (parser.hasNext()) - { - String name; - double slope; - double location; - String configuration; - - configuration = parser.next(); - name = parser.next(); - slope = parser.nextDouble(); - location = parser.nextDouble(); - hmm.addStatistic(name, - new EValueStatistic(configuration, slope, location)); - } - } - - /** * parses the model data from the hmm file * * @param input @@ -168,63 +158,46 @@ public class HMMFile extends FileParse */ public void parseModel(BufferedReader input) throws IOException { - - String line = input.readLine(); - Scanner scanner = new Scanner(line); - String next = scanner.next(); - if ("COMPO".equals(next)) // checks to and stores COMPO data if present + for (int i = 0; i < hmm.getLength() + 1; i++) { - for (int i = 0; i < numberOfSymbols; i++) - + hmm.getNodes().add(new HMMNode()); + String next; + String line; + line = input.readLine(); + Scanner matchReader = new Scanner(line); + next = matchReader.next(); + if (next.equals(COMPO) || i > 0) { - hmm.getAverageMatchStateEmissionProbabilities() - .add(scanner.nextDouble()); + // stores match emission line in list + List matches = new ArrayList<>(); + matches = fillList(matchReader, numberOfSymbols); + hmm.getNodes().get(i).setMatchEmissions(matches); + if (i > 0) + { + parseAnnotations(matchReader, i); + } } - } - scanner.close(); - parseBeginNodeData(input); - for (int i = 0; i < hmm.getLength(); i++) - { - Scanner matchReader = new Scanner(input.readLine()); - matchReader.nextInt(); // skips number indicating position in HMM - hmm.getMatchEmissions() - .add(fillList(matchReader, numberOfSymbols)); - parseAnnotations(matchReader, i); matchReader.close(); - Scanner insertReader = new Scanner(input.readLine()); - hmm.getInsertEmissions().add(fillList(insertReader, numberOfSymbols)); + // stores insert emission line in list + line = input.readLine(); + Scanner insertReader = new Scanner(line); + List inserts = new ArrayList<>(); + inserts = fillList(insertReader, numberOfSymbols); + hmm.getNodes().get(i).setInsertEmissions(inserts); insertReader.close(); - Scanner transitionReader = new Scanner(input.readLine()); - hmm.getStateTransitions() - .add(fillList(transitionReader, NUMBER_OF_TRANSITIONS)); + + // stores state transition line in list + line = input.readLine(); + Scanner transitionReader = new Scanner(line); + List transitions = new ArrayList<>(); + transitions = fillList(transitionReader, NUMBER_OF_TRANSITIONS); + hmm.getNodes().get(i).setStateTransitions(transitions); transitionReader.close(); } } /** - * parses the begin state transitions and insert 0 emissions - * - * @param input - * buffered reader used to read model - * @param currentline - * string contain all data on current line of buffered reader - * @throws IOException - */ - - public void parseBeginNodeData(BufferedReader input) - throws IOException - { - Scanner scanner = new Scanner(input.readLine()); - hmm.setInsertZeroEmissions(fillList(scanner, hmm.getSymbols().size())); - scanner.close(); - Scanner scannerTransitions = new Scanner(input.readLine()); - hmm.setBeginStateTransitions( - fillList(scannerTransitions, NUMBER_OF_TRANSITIONS)); - scannerTransitions.close(); - } - - /** * parses annotations on match emission line * * @param scanner @@ -234,20 +207,35 @@ public class HMMFile extends FileParse */ public void parseAnnotations(Scanner scanner, int index) { - if (hmm.getMapAnnotationFlag()) + if (hmm.mapIsActive()) { - hmm.getAlignmentColumnIndexes().add(scanner.nextInt()); + int column; + column = scanner.nextInt(); + hmm.getNodes().get(index).setAlignmentColumn(column); } else { scanner.next(); } - hmm.getAnnotations().add(new HashMap()); - hmm.getAnnotations().get(index).put("CONS", scanner.next().charAt(0)); - hmm.getAnnotations().get(index).put("RF", scanner.next().charAt(0)); - hmm.getAnnotations().get(index).put("MM", scanner.next().charAt(0)); - hmm.getAnnotations().get(index).put("CS", scanner.next().charAt(0)); + + char consensusR; + consensusR = charValue(scanner.next()); + hmm.getNodes().get(index).setConsensusResidue(consensusR); + + char reference; + reference = charValue(scanner.next()); + hmm.getNodes().get(index).setReferenceAnnotation(reference); + + + char value; + value = charValue(scanner.next()); + hmm.getNodes().get(index).setMaskValue(value); + + char consensusS; + consensusS = charValue(scanner.next()); + hmm.getNodes().get(index).setConsensusStructure(consensusS); } + /** * * @param transition @@ -298,17 +286,17 @@ public class HMMFile extends FileParse int numberOfElements) { List list = new ArrayList<>(); - String next; for (int i = 0; i < numberOfElements; i++) { - next = input.next(); + + String next = input.next(); if (next.contains("*")) // state transitions to or from delete states // occasionally have values of -infinity. These // values are represented by an * in the .hmm // file, and by a null value in the // HiddenMarkovModel class { - list.add(null); + list.add(Double.NEGATIVE_INFINITY); } else { @@ -318,111 +306,268 @@ public class HMMFile extends FileParse return list; } + /** - * writes a HiddenMarkovModel to a file. Needs mode work to make file more - * readable for humans (align columns) + * writes a HiddenMarkovModel to a file * * @param exportLocation * Filename, URL or Pasted String to write to * @throws FileNotFoundException * @throws UnsupportedEncodingException - */ - public void exportFile(String exportLocation) - throws FileNotFoundException, UnsupportedEncodingException + * + **/ + + public void exportFile(String exportLocation) throws IOException { - PrintWriter writer = new PrintWriter(exportLocation, "UTF-8"); - writer.println(fileHeader); - for (Map.Entry entry : hmm.getFileProperties() - .entrySet()) + StringBuilder file = new StringBuilder(); + appendFileProperties(file); + appendModel(file); + + file.append("//"); + + } + + public String addData(int initialColumnSeparation, + int columnSeparation, List data) + { + String line = EMPTY; + int index = 0; + for (String value : data) { - writer.println(entry.getKey() + " " + entry.getValue()); + if (index == 0) + { + line += String.format("%" + initialColumnSeparation + "s", value); + } + else + { + line += String.format("%" + columnSeparation + "s", value); + } + index++; } - writer.println( - "HMM" + " " + convertCharListToString(hmm.getSymbols())); - writer.println("m->m m->i m->d i->m i->i d->m d->d"); - if (false == hmm.getAverageMatchStateEmissionProbabilities().isEmpty()) + return line; + } + + public static List charListToStringList(List list) + { + List strList = new ArrayList<>(); + for (char value : list) { - writer.println("COMPO" + " " + convertDoubleListToString( - hmm.getAverageMatchStateEmissionProbabilities())); + String strValue = Character.toString(value); + strList.add(strValue); } - writer.println(convertDoubleListToString(hmm.getInsertZeroEmissions())); - writer.println( - convertDoubleListToString(hmm.getBeginStateTransitions())); + return strList; + } - for (Integer i = 0; i < hmm.getLength(); i++) + public static List doubleListToStringList(List list, + int noOfDecimals) + { + List strList = new ArrayList<>(); + for (double value : list) { - String matchEmissionLine = i.toString() + " "; // adds node index - matchEmissionLine += convertDoubleListToString( - hmm.getMatchEmissions().get(i)); // adds match emissions - matchEmissionLine += " " - + hmm.getAlignmentColumnIndexes().get(i).toString(); // adds MAP - // annotation - matchEmissionLine += " " - + hmm.getAnnotations().get(i).get("CONS").toString(); // adds CONS - // annotation - matchEmissionLine += " " - + hmm.getAnnotations().get(i).get("RF").toString(); // adds RF - // annotation - matchEmissionLine += " " - + hmm.getAnnotations().get(i).get("MM").toString(); // adds MM - // annotation - matchEmissionLine += " " - + hmm.getAnnotations().get(i).get("CS").toString(); // adds CS - // annotation - writer.println(matchEmissionLine); - - writer.println( - convertDoubleListToString(hmm.getInsertEmissions().get(i))); - writer.println( - convertDoubleListToString(hmm.getStateTransitions().get(i))); - } - writer.println("//"); + String strValue; + if (value == Double.NEGATIVE_INFINITY) + { + strValue = "*"; + } + else + { + strValue = String.format("%.5f", value); + } - writer.close(); + strList.add(strValue); + } + return strList; } - /** - * converts an list of characters to a string with items separated by spaces - * - * @param list - * character list to be converted - * @return string value of char list - */ - public String convertCharListToString(List list) + public static List stringArrayToStringList(String[] array) { - String string = ""; - for (Character item : list) + List list = new ArrayList<>(); + for (String value : array) { - string = string + item.toString() + " "; + list.add(value); } - return string; + return list; } - - /** - * converts an list of doubles to a string with items separated by spaces - * - * @param list - * double list to be converted - * @return string value of double list - */ - public String convertDoubleListToString(List list) + + void appendModel(StringBuilder file) { - String string = ""; - for (Double item : list) + String symbolLine = "HMM"; + List charSymbols = hmm.getSymbols(); + List strSymbols; + strSymbols = charListToStringList(charSymbols); + symbolLine += addData(11, 9, strSymbols); + file.append(symbolLine + NEW_LINE); + + String transitionTypeLine = ""; + List transitionTypes; + transitionTypes = stringArrayToStringList(hmm.getTransitionTypes()); + transitionTypeLine += addData(16, 9, transitionTypes); + file.append(transitionTypeLine + NEW_LINE); + + int length = hmm.getLength(); + + for (int node = 0; node <= length; node++) { - if (item != null) + String matchLine; + if (node == 0) { - string = string + item.toString() + " "; + matchLine = String.format("%7s", "COMPO"); } else { - string = string + "*" + " "; + matchLine = String.format("%7s", node); + } + + List strMatches; + List doubleMatches; + doubleMatches = hmm.getNode(node).getMatchEmissions(); + strMatches = doubleListToStringList(doubleMatches, 5); + matchLine += addData(10, 9, strMatches); + + + if (node != 0) + { + matchLine += SPACE + hmm.getNodeAlignmentColumn(node); + matchLine += SPACE + hmm.getConsensusResidue(node); + matchLine += SPACE + hmm.getReferenceAnnotation(node); + matchLine += SPACE + hmm.getMaskedValue(node); + matchLine += SPACE + hmm.getConsensusStructure(node); + } + file.append(matchLine + NEW_LINE); + + String insertLine = EMPTY; + List strInserts; + List doubleInserts; + doubleInserts = hmm.getNode(node).getInsertEmissions(); + strInserts = doubleListToStringList(doubleInserts, 5); + insertLine += addData(17, 9, strInserts); + + file.append(insertLine + NEW_LINE); + + String transitionLine = EMPTY; + List strTransitions; + List doubleTransitions; + doubleTransitions = hmm.getNode(node).getStateTransitions(); + strTransitions = doubleListToStringList(doubleTransitions, 5); + transitionLine += addData(17, 9, strTransitions); + + file.append(transitionLine + NEW_LINE); } + } + + void appendFileProperties(StringBuilder file) + { + String line; + + file.append(fileHeader + NEW_LINE); + + line = String.format("%-5s %1s", "NAME", hmm.getName()); + file.append((line + NEW_LINE)); - return string; + if (hmm.getAccessionNumber() != null) + { + line = String.format("%-5s %1s", "ACC", hmm.getAccessionNumber()); + file.append((line + NEW_LINE)); + } + + if (hmm.getDescription() != null) + { + line = String.format("%-5s %1s", "DESC", hmm.getDescription()); + file.append((line + NEW_LINE)); + } + line = String.format("%-5s %1s", "LENG", hmm.getLength()); + file.append((line + NEW_LINE)); + + if (hmm.getMaxInstanceLength() != null) + { + line = String.format("%-5s %1s", "MAXL", hmm.getMaxInstanceLength()); + file.append((line + NEW_LINE)); + } + line = String.format("%-5s %1s", "ALPH", hmm.getAlphabetType()); + file.append((line + NEW_LINE)); + + line = String.format("%-5s %1s", "RF", + hmm.getFileProperties().get("RF")); + file.append((line + NEW_LINE)); + + line = String.format("%-5s %1s", "MM", + hmm.getFileProperties().get("MM")); + file.append((line + NEW_LINE)); + + line = String.format("%-5s %1s", "CONS", + hmm.getFileProperties().get("CONS")); + file.append((line + NEW_LINE)); + + line = String.format("%-5s %1s", "CS", + hmm.getFileProperties().get("CS")); + file.append((line + NEW_LINE)); + + line = String.format("%-5s %1s", "MAP", + hmm.getFileProperties().get("MAP")); + file.append((line + NEW_LINE)); + + if (hmm.getDate() != null) + { + line = String.format("%-5s %1s", "DATE", hmm.getDate()); + file.append((line + NEW_LINE)); + } + if (hmm.getNumberOfSequences() != null) + { + line = String.format("%-5s %1s", "NSEQ", hmm.getNumberOfSequences()); + file.append((line + NEW_LINE)); + } + if (hmm.getEffectiveNumberOfSequences() != null) + { + line = String.format("%-5s %1s", "EFFN", + hmm.getEffectiveNumberOfSequences()); + file.append((line + NEW_LINE)); + } + if (hmm.getCheckSum() != null) + { + line = String.format("%-5s %1s", "CKSUM", hmm.getCheckSum()); + file.append((line + NEW_LINE)); + } + if (hmm.getGatheringThreshold() != null) + { + line = String.format("%-5s %1s", "GA", hmm.getGatheringThreshold()); + file.append((line + NEW_LINE)); + } + + if (hmm.getTrustedCutoff() != null) + { + line = String.format("%-5s %1s", "TC", hmm.getTrustedCutoff()); + file.append((line + NEW_LINE)); + } + if (hmm.getNoiseCutoff() != null) + { + line = String.format("%-5s %1s", "NC", hmm.getNoiseCutoff()); + file.append((line + NEW_LINE)); + } + if (hmm.getMSV() != null) + { + line = String.format("%-19s %18s", "STATS LOCAL MSV", hmm.getMSV()); + file.append((line + NEW_LINE)); + + line = String.format("%-19s %18s", "STATS LOCAL VITERBI", + hmm.getViterbi()); + file.append((line + NEW_LINE)); + + line = String.format("%-19s %18s", "STATS LOCAL FORWARD", + hmm.getForward()); + file.append((line + NEW_LINE)); + } + } + + + + public static char charValue(String string) + { + char character; + character = string.charAt(0); + return character; } } diff --git a/test/jalview/datamodel/HiddenMarkovModelTest.java b/test/jalview/datamodel/HiddenMarkovModelTest.java deleted file mode 100644 index ae1bf55..0000000 --- a/test/jalview/datamodel/HiddenMarkovModelTest.java +++ /dev/null @@ -1,16 +0,0 @@ -package jalview.datamodel; - -import org.testng.annotations.Test; - -public class HiddenMarkovModelTest -{ - HiddenMarkovModel hmm = new HiddenMarkovModel(); - - @Test - public void testGetGatheringThresholdGA1() - { - hmm.put("GA1", "10.1"); - // assertEquals(hmm.getGatheringThresholdGA1(), 10.1); - } - -} \ No newline at end of file diff --git a/test/jalview/io/HMMFileTest.java b/test/jalview/io/HMMFileTest.java index 5fef79b..9beaab0 100644 --- a/test/jalview/io/HMMFileTest.java +++ b/test/jalview/io/HMMFileTest.java @@ -3,6 +3,9 @@ package jalview.io; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNull; +import jalview.datamodel.HMMNode; +import jalview.datamodel.HiddenMarkovModel; + import java.io.BufferedReader; import java.io.File; import java.io.FileReader; @@ -15,418 +18,233 @@ import org.testng.annotations.Test; public class HMMFileTest { - HMMFile testFile = new HMMFile("H:/HMMERFile.txt"); - - File file = new File("H:/HMMERFile.txt"); - - HMMFile testFile2 = new HMMFile("H:/EmptyFile.txt"); - - File file2 = new File("H:/EmptyFile.txt"); + HMMFile fn3 = new HMMFile("H:/fn3.hmm"); - HMMFile testFile3 = new HMMFile("H:/HMMERFile2.txt"); + HMMFile emptyFile = new HMMFile("H:/EmptyFile.hmm"); - File file3 = new File("H:/HMMERFile2.txt"); + HMMFile pKinase = new HMMFile("H:/Pkinase.hmm"); - HMMFile testFile4 = new HMMFile("H:/HMMERFile.txt"); - - File file4 = new File("H:/HMMERFile.txt"); + HMMFile made1 = new HMMFile("H:/MADE1.hmm"); @Test public void testParse() throws IOException { - HMMFile integrationTestFile = new HMMFile("H:/HMMTutorialExample.hmm"); - integrationTestFile.parse(); - - // file properties - assertEquals(integrationTestFile.hmm.getName(), "MADE1"); - assertEquals(integrationTestFile.hmm.getAccessionNumber(), - "DF0000629.2"); - assertEquals(integrationTestFile.hmm.getDescription(), - "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon"); - assertEquals(integrationTestFile.hmm.getLength().intValue(), 80); - assertEquals(integrationTestFile.hmm.getMaxInstanceLength().intValue(), - 426); - assertEquals(integrationTestFile.hmm.getAlphabetType(), "DNA"); - assertEquals(integrationTestFile.hmm.getReferenceAnnotationFlag(), - true); - assertEquals(integrationTestFile.hmm.getModelMaskedFlag(), false); - assertEquals( - integrationTestFile.hmm.getConsensusResidueAnnotationFlag(), + + pKinase.parse(); + + assertEquals(pKinase.hmm.getName(), "Pkinase"); + assertEquals(pKinase.hmm.getAccessionNumber(), "PF00069.17"); + assertEquals(pKinase.hmm.getDescription(), "Protein kinase domain"); + assertEquals(pKinase.hmm.getLength().intValue(), 260); + assertNull(pKinase.hmm.getMaxInstanceLength()); + assertEquals(pKinase.hmm.getAlphabetType(), "amino"); + assertEquals(pKinase.hmm.referenceAnnotationIsActive(), false); + assertEquals(pKinase.hmm.maskValueIsActive(), false); + assertEquals(pKinase.hmm.consensusResidueIsActive(), true); + assertEquals(pKinase.hmm.consensusStructureIsActive(), true); - assertEquals( - integrationTestFile.hmm.getConsensusStructureAnnotationFlag(), - false); - assertEquals(integrationTestFile.hmm.getMapAnnotationFlag(), true); - assertEquals(integrationTestFile.hmm.getDate(), - "Tue Feb 19 20:33:41 2013"); - assertNull(integrationTestFile.hmm.getCommandLineLog()); - assertEquals(integrationTestFile.hmm.getSequenceNumber().intValue(), - 1997); - assertEquals(integrationTestFile.hmm.getEffectiveSequenceNumber(), - 3.911818, 4d); - assertEquals(integrationTestFile.hmm.getCheckSum().longValue(), - 3015610723l); - assertNull(integrationTestFile.hmm.getGatheringThreshold1()); - assertNull(integrationTestFile.hmm.getGatheringThreshold2()); - assertNull(integrationTestFile.hmm.getTrustedCutoff1()); - assertNull(integrationTestFile.hmm.getTrustedCutoff2()); - assertNull(integrationTestFile.hmm.getNoiseCutoff1()); - assertNull(integrationTestFile.hmm.getNoiseCutoff2()); - assertEquals(integrationTestFile.hmm.getSlopeOfDistribution("MSV"), - -8.5786, 4d); - assertEquals(integrationTestFile.hmm.getSlopeOfDistribution("VITERBI"), - -9.3632, 4d); - assertEquals(integrationTestFile.hmm.getSlopeOfDistribution("FORWARD"), - -3.4823, 4d); - assertEquals(integrationTestFile.hmm.getLocationOfDistribution("MSV"), - 0.71858, 4d); - assertEquals( - integrationTestFile.hmm.getLocationOfDistribution("VITERBI"), - 0.71858, 4d); - assertEquals( - integrationTestFile.hmm.getLocationOfDistribution("FORWARD"), - 0.71858, 4d); + assertEquals(pKinase.hmm.mapIsActive(), true); + assertEquals(pKinase.hmm.getDate(), "Thu Jun 16 11:44:06 2011"); + assertNull(pKinase.hmm.getCommandLineLog()); + assertEquals(pKinase.hmm.getNumberOfSequences().intValue(), 54); + assertEquals(pKinase.hmm.getEffectiveNumberOfSequences(), 3.358521, 4d); + assertEquals(pKinase.hmm.getCheckSum().longValue(), 3106786190l); + assertEquals(pKinase.hmm.getGatheringThreshold(), "70.30 70.30"); + assertEquals(pKinase.hmm.getTrustedCutoff(), "70.30 70.30"); + assertEquals(pKinase.hmm.getNoiseCutoff(), "70.20 70.20"); List symbols = new ArrayList<>(); symbols.add('A'); symbols.add('C'); + symbols.add('D'); + symbols.add('E'); + symbols.add('F'); symbols.add('G'); + symbols.add('H'); + symbols.add('I'); + symbols.add('K'); + symbols.add('L'); + symbols.add('M'); + symbols.add('N'); + symbols.add('P'); + symbols.add('Q'); + symbols.add('R'); + symbols.add('S'); symbols.add('T'); - - assertEquals(integrationTestFile.hmm.getSymbols(), symbols); - - List averages = new ArrayList<>(); - averages.add(1.24257); - averages.add(1.59430); - averages.add(1.62906); - averages.add(1.16413); - - assertEquals(integrationTestFile.hmm - .getAverageMatchStateEmissionProbabilities(), averages); - - assertEquals(integrationTestFile.hmm.getInsertZeroEmissions().get(2), - 1.38629); - assertEquals(integrationTestFile.hmm.getInsertZeroEmissions().get(3), - 1.38629); - - assertEquals(integrationTestFile.hmm.getBeginStateTransitions().get(1), - 3.94183); - assertEquals(integrationTestFile.hmm.getBeginStateTransitions().get(4), - 0.26236); - - assertEquals(integrationTestFile.hmm.getMatchEmissions().get(1).get(1), - 2.37873); - assertEquals(integrationTestFile.hmm.getMatchEmissions().get(8).get(0), - 2.16916); - assertEquals(integrationTestFile.hmm.getMatchEmissions().get(12).get(2), - 2.32214); - assertEquals(integrationTestFile.hmm.getMatchEmissions().get(43).get(3), - 2.60783); - assertEquals(integrationTestFile.hmm.getMatchEmissions().get(54).get(2), - 2.46442); - assertEquals(integrationTestFile.hmm.getMatchEmissions().get(23).get(2), - 2.50691); - assertEquals(integrationTestFile.hmm.getMatchEmissions().get(56).get(1), - 2.32720); - assertEquals(integrationTestFile.hmm.getMatchEmissions().get(65).get(0), - 2.79349); - assertEquals(integrationTestFile.hmm.getMatchEmissions().get(21).get(0), - 2.54484); - assertEquals(integrationTestFile.hmm.getMatchEmissions().get(79).get(3), - 2.88183); - assertEquals(integrationTestFile.hmm.getMatchEmissions().get(76).get(3), - 1.84373); - - assertEquals(integrationTestFile.hmm.getInsertEmissions().get(23).get(0), - 1.35803); - assertEquals(integrationTestFile.hmm.getInsertEmissions().get(54).get(3), - 1.46331); - assertEquals(integrationTestFile.hmm.getInsertEmissions().get(65).get(3), - 1.39101); - assertEquals(integrationTestFile.hmm.getInsertEmissions().get(57).get(2), - 1.38112); - assertEquals(integrationTestFile.hmm.getInsertEmissions().get(42).get(1), - 1.58747); - assertEquals(integrationTestFile.hmm.getInsertEmissions().get(12).get(3), - 1.38740); - assertEquals(integrationTestFile.hmm.getInsertEmissions().get(6).get(1), - 1.38524); - assertEquals(integrationTestFile.hmm.getInsertEmissions().get(59).get(0), - 1.03649); - assertEquals(integrationTestFile.hmm.getInsertEmissions().get(78).get(0), - 1.38629); - assertEquals(integrationTestFile.hmm.getInsertEmissions().get(17).get(2), - 1.39937); - assertEquals(integrationTestFile.hmm.getInsertEmissions().get(0).get(2), - 1.38629); - - assertEquals( - integrationTestFile.hmm.getStateTransitions().get(13).get(1), - 4.02482); - assertEquals( - integrationTestFile.hmm.getStateTransitions().get(64).get(2), - 4.03073); - assertEquals( - integrationTestFile.hmm.getStateTransitions().get(45).get(6), - 0.42814); - assertEquals( - integrationTestFile.hmm.getStateTransitions().get(71).get(4), - 0.28542); - assertEquals( - integrationTestFile.hmm.getStateTransitions().get(32).get(5), - 1.18729); - assertEquals( - integrationTestFile.hmm.getStateTransitions().get(9).get(0), - 0.03536); - assertEquals( - integrationTestFile.hmm.getStateTransitions().get(0).get(3), - 1.46634); - assertEquals( - integrationTestFile.hmm.getStateTransitions().get(31).get(6), - 0.44749); - assertNull( - integrationTestFile.hmm.getStateTransitions().get(79).get(2)); - assertEquals( - integrationTestFile.hmm.getStateTransitions().get(3).get(1), - 4.05203); - assertEquals( - integrationTestFile.hmm.getStateTransitions().get(16).get(4), - 0.26771); - - assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(0) - .intValue(), 1); - assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(9) - .intValue(), 18); - assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(12) - .intValue(), 28); - assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(56) - .intValue(), 999); - assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(79) - .intValue(), 1112); - - assertEquals(integrationTestFile.hmm.getAnnotations().get(0).get("RF") - .charValue(), 'x'); - assertEquals(integrationTestFile.hmm.getAnnotations().get(3).get("CS") - .charValue(), '-'); - assertEquals(integrationTestFile.hmm.getAnnotations().get(65) - .get("CONS").charValue(), 't'); - assertEquals(integrationTestFile.hmm.getAnnotations().get(23).get("MM") - .charValue(), '-'); - assertEquals(integrationTestFile.hmm.getAnnotations().get(56).get("MM") - .charValue(), '-'); - assertEquals(integrationTestFile.hmm.getAnnotations().get(76).get("RF") - .charValue(), 'x'); - assertEquals(integrationTestFile.hmm.getAnnotations().get(79) - .get("CONS").charValue(), 'a'); + symbols.add('V'); + symbols.add('W'); + symbols.add('Y'); + + assertEquals(pKinase.hmm.getSymbols(), symbols); + + assertEquals(pKinase.hmm.getMatchEmission(0, 19), 3.43274); + assertEquals(pKinase.hmm.getMatchEmission(12, 12), 4.33979); + assertEquals(pKinase.hmm.getMatchEmission(23, 7), 3.65600); + assertEquals(pKinase.hmm.getMatchEmission(54, 1), 4.76187); + assertEquals(pKinase.hmm.getMatchEmission(79, 0), 2.81579); + assertEquals(pKinase.hmm.getMatchEmission(100, 0), 1.86496); + assertEquals(pKinase.hmm.getMatchEmission(112, 14), 2.77179); + assertEquals(pKinase.hmm.getMatchEmission(143, 17), 5.10478); + assertEquals(pKinase.hmm.getMatchEmission(156, 4), 4.69372); + assertEquals(pKinase.hmm.getMatchEmission(178, 3), 2.52594); + assertEquals(pKinase.hmm.getMatchEmission(210, 2), 4.23598); + assertEquals(pKinase.hmm.getMatchEmission(260, 19), 3.81122); + + assertEquals(pKinase.hmm.getInsertEmission(2, 1), 4.42225); + assertEquals(pKinase.hmm.getInsertEmission(15, 6), 3.72501); + assertEquals(pKinase.hmm.getInsertEmission(22, 9), 2.69355); + assertEquals(pKinase.hmm.getInsertEmission(57, 2), 2.77519); + assertEquals(pKinase.hmm.getInsertEmission(62, 14), 2.89801); + assertEquals(pKinase.hmm.getInsertEmission(95, 17), 2.98532); + assertEquals(pKinase.hmm.getInsertEmission(105, 4), 3.46354); + assertEquals(pKinase.hmm.getInsertEmission(134, 1), 4.42225); + assertEquals(pKinase.hmm.getInsertEmission(143, 0), 2.68618); + assertEquals(pKinase.hmm.getInsertEmission(152, 16), 2.77519); + assertEquals(pKinase.hmm.getInsertEmission(203, 16), 2.77519); + assertEquals(pKinase.hmm.getInsertEmission(255, 12), 2.73739); + + assertEquals(pKinase.hmm.getStateTransition(0, 6), + Double.NEGATIVE_INFINITY); + assertEquals(pKinase.hmm.getStateTransition(3, 6), 0.95510); + assertEquals(pKinase.hmm.getStateTransition(29, 3), 0.61958); + assertEquals(pKinase.hmm.getStateTransition(46, 4), 0.77255); + assertEquals(pKinase.hmm.getStateTransition(53, 1), 5.01631); + assertEquals(pKinase.hmm.getStateTransition(79, 2), 5.73865); + assertEquals(pKinase.hmm.getStateTransition(101, 2), 5.73865); + assertEquals(pKinase.hmm.getStateTransition(120, 5), 0.48576); + assertEquals(pKinase.hmm.getStateTransition(146, 5), 0.70219); + assertEquals(pKinase.hmm.getStateTransition(169, 3), 1.23224); + assertEquals(pKinase.hmm.getStateTransition(209, 0), 0.01003); + assertEquals(pKinase.hmm.getStateTransition(243, 1), 5.01631); + + assertEquals(pKinase.hmm.getNodeAlignmentColumn(3).intValue(), 3); + assertEquals(pKinase.hmm.getReferenceAnnotation(7), '-'); + assertEquals(pKinase.hmm.getConsensusResidue(23), 't'); + assertEquals(pKinase.hmm.getMaskedValue(30), '-'); + assertEquals(pKinase.hmm.getConsensusStructure(56), 'S'); + + assertEquals(pKinase.hmm.getNodeAlignmentColumn(78).intValue(), 136); + assertEquals(pKinase.hmm.getReferenceAnnotation(93), '-'); + assertEquals(pKinase.hmm.getConsensusResidue(145), 'a'); + assertEquals(pKinase.hmm.getMaskedValue(183), '-'); + assertEquals(pKinase.hmm.getConsensusStructure(240), 'H'); } @Test public void testParseFileProperties() throws IOException { - FileReader fr = new FileReader(file); + FileReader fr = new FileReader(fn3.getDataObject()); BufferedReader br = new BufferedReader(fr); - testFile.parseFileProperties(br); + fn3.parseFileProperties(br); + HiddenMarkovModel testHMM = new HiddenMarkovModel(); + testHMM = fn3.getHmm(); br.close(); fr.close(); - assertEquals(testFile.hmm.getName(), "fn3"); - assertEquals(testFile.hmm.getAccessionNumber(), "PF00041.13"); - assertEquals(testFile.hmm.getDescription(), + + assertEquals(testHMM.getName(), "fn3"); + assertEquals(testHMM.getAccessionNumber(), "PF00041.13"); + assertEquals(testHMM.getDescription(), "Fibronectin type III domain"); - assertEquals(testFile.hmm.getLength().intValue(), 4); - assertNull(testFile.hmm.getMaxInstanceLength()); - assertEquals(testFile.hmm.getAlphabetType(), "amino"); - assertEquals(testFile.hmm.getReferenceAnnotationFlag(), false); - assertEquals(testFile.hmm.getModelMaskedFlag(), false); - assertEquals(testFile.hmm.getConsensusResidueAnnotationFlag(), true); - assertEquals(testFile.hmm.getConsensusStructureAnnotationFlag(), true); - assertEquals(testFile.hmm.getMapAnnotationFlag(), true); - assertEquals(testFile.hmm.getDate(), "Fri Feb 15 06:04:13 2013"); - assertNull(testFile.hmm.getCommandLineLog()); - assertEquals(testFile.hmm.getSequenceNumber().intValue(), 106); - assertEquals(testFile.hmm.getEffectiveSequenceNumber(), 11.415833, 4d); - assertEquals(testFile.hmm.getCheckSum().longValue(), 3564431818l); - assertEquals(testFile.hmm.getGatheringThreshold1(), 8.00, 2d); - assertEquals(testFile.hmm.getGatheringThreshold2(), 7.20, 2d); - assertEquals(testFile.hmm.getTrustedCutoff1(), 8.00, 2d); - assertEquals(testFile.hmm.getTrustedCutoff2(), 7.20, 2d); - assertEquals(testFile.hmm.getNoiseCutoff1(), 7.90, 2d); - assertEquals(testFile.hmm.getNoiseCutoff2(), 7.90, 2d); - assertEquals(testFile.hmm.getSlopeOfDistribution("MSV"), -9.4043, 4d); - assertEquals(testFile.hmm.getSlopeOfDistribution("VITERBI"), -9.7737, - 4d); - assertEquals(testFile.hmm.getSlopeOfDistribution("FORWARD"), -3.8341, - 4d); - assertEquals(testFile.hmm.getLocationOfDistribution("MSV"), 0.71847, - 4d); - assertEquals(testFile.hmm.getLocationOfDistribution("VITERBI"), 0.71847, - 4d); - assertEquals(testFile.hmm.getLocationOfDistribution("FORWARD"), 0.71847, - 4d); - - FileReader fr2 = new FileReader(file2); + assertEquals(testHMM.getLength().intValue(), 86); + assertNull(testHMM.getMaxInstanceLength()); + assertEquals(testHMM.getAlphabetType(), "amino"); + assertEquals(testHMM.referenceAnnotationIsActive(), false); + assertEquals(testHMM.maskValueIsActive(), false); + assertEquals(testHMM.consensusResidueIsActive(), true); + assertEquals(testHMM.consensusStructureIsActive(), true); + assertEquals(testHMM.mapIsActive(), true); + assertEquals(testHMM.getDate(), "Fri Jun 20 08:22:31 2014"); + assertNull(testHMM.getCommandLineLog()); + assertEquals(testHMM.getNumberOfSequences().intValue(), 106); + assertEquals(testHMM.getEffectiveNumberOfSequences(), 11.415833, 4d); + assertEquals(testHMM.getCheckSum().longValue(), 3564431818l); + assertEquals(testHMM.getGatheringThreshold(), "8.00 7.20"); + assertEquals(testHMM.getTrustedCutoff(), "8.00 7.20"); + assertEquals(testHMM.getNoiseCutoff(), "7.90 7.90"); + assertEquals(testHMM.getViterbi(), "-9.7737 0.71847"); + assertEquals(testHMM.getMSV(), "-9.4043 0.71847"); + assertEquals(testHMM.getForward(), "-3.8341 0.71847"); + + FileReader fr2 = new FileReader(emptyFile.getDataObject()); BufferedReader br2 = new BufferedReader(fr2); - testFile2.parseFileProperties(br2); + emptyFile.parseFileProperties(br2); + testHMM = emptyFile.getHmm(); br2.close(); fr2.close(); - assertNull(testFile2.hmm.getName()); - assertNull(testFile2.hmm.getAccessionNumber()); - assertNull(testFile2.hmm.getDescription()); - assertNull(testFile2.hmm.getLength()); - assertNull(testFile2.hmm.getMaxInstanceLength()); - assertNull(testFile2.hmm.getAlphabetType()); - assertEquals(testFile2.hmm.getReferenceAnnotationFlag(), false); - assertEquals(testFile2.hmm.getModelMaskedFlag(), false); - assertEquals(testFile2.hmm.getConsensusResidueAnnotationFlag(), false); - assertEquals(testFile2.hmm.getConsensusStructureAnnotationFlag(), + assertNull(testHMM.getName()); + assertNull(testHMM.getAccessionNumber()); + assertNull(testHMM.getDescription()); + assertNull(testHMM.getLength()); + assertNull(testHMM.getMaxInstanceLength()); + assertNull(testHMM.getAlphabetType()); + assertEquals(testHMM.referenceAnnotationIsActive(), false); + assertEquals(testHMM.maskValueIsActive(), false); + assertEquals(testHMM.consensusResidueIsActive(), false); + assertEquals(testHMM.consensusStructureIsActive(), false); - assertEquals(testFile2.hmm.getMapAnnotationFlag(), false); - assertNull(testFile2.hmm.getDate()); - assertNull(testFile2.hmm.getCommandLineLog()); - assertNull(testFile2.hmm.getSequenceNumber()); - assertNull(testFile2.hmm.getEffectiveSequenceNumber()); - assertNull(testFile2.hmm.getCheckSum()); - assertNull(testFile2.hmm.getGatheringThreshold1()); - assertNull(testFile2.hmm.getGatheringThreshold2()); - assertNull(testFile2.hmm.getTrustedCutoff1()); - assertNull(testFile2.hmm.getTrustedCutoff2()); - assertNull(testFile2.hmm.getNoiseCutoff1()); - assertNull(testFile2.hmm.getNoiseCutoff2()); - assertNull(testFile2.hmm.getSlopeOfDistribution("MSV")); - assertNull(testFile2.hmm.getSlopeOfDistribution("VITERBI")); - assertNull(testFile2.hmm.getSlopeOfDistribution("FORWARD")); - assertNull(testFile2.hmm.getLocationOfDistribution("MSV")); - assertNull(testFile2.hmm.getLocationOfDistribution("VITERBI")); - assertNull(testFile2.hmm.getLocationOfDistribution("FORWARD")); - - FileReader fr3 = new FileReader(file3); + assertEquals(testHMM.mapIsActive(), false); + assertNull(testHMM.getDate()); + assertNull(testHMM.getCommandLineLog()); + assertNull(testHMM.getNumberOfSequences()); + assertNull(testHMM.getEffectiveNumberOfSequences()); + assertNull(testHMM.getCheckSum()); + assertNull(testHMM.getGatheringThreshold()); + assertNull(testHMM.getGatheringThreshold()); + assertNull(testHMM.getTrustedCutoff()); + assertNull(testHMM.getTrustedCutoff()); + assertNull(testHMM.getNoiseCutoff()); + assertNull(testHMM.getNoiseCutoff()); + assertNull(testHMM.getViterbi()); + assertNull(testHMM.getMSV()); + assertNull(testHMM.getForward()); + + FileReader fr3 = new FileReader(made1.getDataObject()); BufferedReader br3 = new BufferedReader(fr3); - testFile3.parseFileProperties(br3); + made1.parseFileProperties(br3); + testHMM = made1.getHmm(); br3.close(); fr3.close(); - assertEquals(testFile3.hmm.getName(), "th4"); - assertEquals(testFile3.hmm.getAccessionNumber(), "PF99041.16"); - assertEquals(testFile3.hmm.getDescription(), - "Fibronectin type I domain"); - assertEquals(testFile3.hmm.getLength().intValue(), 10); - assertEquals(testFile3.hmm.getMaxInstanceLength().intValue(), 6); - assertEquals(testFile3.hmm.getAlphabetType(), "amino"); - assertEquals(testFile3.hmm.getReferenceAnnotationFlag(), true); - assertEquals(testFile3.hmm.getModelMaskedFlag(), false); - assertEquals(testFile3.hmm.getConsensusResidueAnnotationFlag(), false); - assertEquals(testFile3.hmm.getConsensusStructureAnnotationFlag(), - false); - assertEquals(testFile3.hmm.getMapAnnotationFlag(), false); - assertEquals(testFile3.hmm.getDate(), "Tue Jan 01 11:02:59 2000"); - assertEquals(testFile3.hmm.getCommandLineLog(), "this is the log"); - assertEquals(testFile3.hmm.getSequenceNumber().intValue(), 567); - assertEquals(testFile3.hmm.getEffectiveSequenceNumber(), 15.964683, 4d); - assertEquals(testFile3.hmm.getCheckSum().longValue(), 9485949654l); - assertEquals(testFile3.hmm.getGatheringThreshold1(), 6.40, 2d); - assertEquals(testFile3.hmm.getGatheringThreshold2(), 7.20, 2d); - assertEquals(testFile3.hmm.getTrustedCutoff1(), 2.40, 2d); - assertEquals(testFile3.hmm.getTrustedCutoff2(), 7.00, 2d); - assertNull(testFile3.hmm.getNoiseCutoff1()); - assertNull(testFile3.hmm.getNoiseCutoff2()); - assertNull(testFile3.hmm.getSlopeOfDistribution("MSV")); - assertNull(testFile3.hmm.getSlopeOfDistribution("VITERBI")); - assertNull(testFile3.hmm.getSlopeOfDistribution("FORWARD")); - assertNull(testFile3.hmm.getLocationOfDistribution("MSV")); - assertNull(testFile3.hmm.getLocationOfDistribution("VITERBI")); - assertNull(testFile3.hmm.getLocationOfDistribution("FORWARD")); - } - - /** - * @Test public void testParseModel() throws IOException { HiddenMarkovModel - * hmm = new HiddenMarkovModel(); HMMFile testFile = new HMMFile(hmm, - * "H:/HMMERFile.txt"); File file = new File("H:/HMMERFile.txt"); - * FileReader fr = new FileReader(file); BufferedReader br = new - * BufferedReader(fr); testFile.parseFileProperties(br); - * testFile.parseModel(br); br.close(); fr.close(); - * - * } - **/ - - @Test - public void testGetTransitionType() - { - - assertEquals(testFile.getTransitionType("mm").intValue(), 0); - assertEquals(testFile.getTransitionType("mi").intValue(), 1); - assertEquals(testFile.getTransitionType("md").intValue(), 2); - assertEquals(testFile.getTransitionType("im").intValue(), 3); - assertEquals(testFile.getTransitionType("ii").intValue(), 4); - assertEquals(testFile.getTransitionType("dm").intValue(), 5); - assertEquals(testFile.getTransitionType("dd").intValue(), 6); - assertNull(testFile.getTransitionType("df")); - - } - - @Test - public void testReadStats() - { - Scanner scanner = new Scanner("LOCAL MSV 5.6943 6.2313"); - testFile.readStats(scanner); - assertEquals(testFile.hmm.getEValueStatistics().get("MSV") - .getAlignmentModeConfiguration(), "LOCAL"); - assertEquals( - testFile.hmm.getEValueStatistics().get("MSV") - .getSlopeOfDistribution(), - 5.6943, 4d); - assertEquals(testFile.hmm.getEValueStatistics().get("MSV") - .getLocationOfDistribution(), 6.2313, 4d); - scanner.close(); - - Scanner scanner2 = new Scanner("GLOBAL VITERBI 3 -0.234"); - testFile.readStats(scanner2); - assertEquals(testFile.hmm.getEValueStatistics().get("VITERBI") - .getAlignmentModeConfiguration(), "GLOBAL"); - assertEquals(testFile.hmm.getEValueStatistics().get("VITERBI") - .getSlopeOfDistribution(), 3, 2d); - assertEquals(testFile.hmm.getEValueStatistics().get("VITERBI") - .getLocationOfDistribution(), -0.234, 4d); - scanner.close(); + assertEquals(testHMM.getName(), "MADE1"); + assertEquals(testHMM.getAccessionNumber(), "DF0000629.2"); + assertEquals(testHMM.getDescription(), + "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon"); + assertEquals(testHMM.getLength().intValue(), 80); + assertEquals(testHMM.getMaxInstanceLength().intValue(), 426); + assertEquals(testHMM.getAlphabetType(), "DNA"); + assertEquals(testHMM.referenceAnnotationIsActive(), true); + assertEquals(testHMM.maskValueIsActive(), false); + assertEquals(testHMM.consensusResidueIsActive(), true); + assertEquals(testHMM.consensusStructureIsActive(), false); + assertEquals(testHMM.mapIsActive(), true); + assertEquals(testHMM.getDate(), "Tue Feb 19 20:33:41 2013"); + assertNull(testHMM.getCommandLineLog()); + assertEquals(testHMM.getNumberOfSequences().intValue(), 1997); + assertEquals(testHMM.getEffectiveNumberOfSequences(), 3.911818, 4d); + assertEquals(testHMM.getCheckSum().longValue(), 3015610723l); + assertEquals(testHMM.getGatheringThreshold(), "2.324 4.234"); + assertEquals(testHMM.getTrustedCutoff(), "2.343 1.212"); + assertEquals(testHMM.getNoiseCutoff(), "2.354 5.456"); + assertEquals(testHMM.getViterbi(), "-9.3632 0.71858"); + assertEquals(testHMM.getMSV(), "-8.5786 0.71858"); + assertEquals(testHMM.getForward(), "-3.4823 0.71858"); } @Test - public void testParseBeginNodeData() throws IOException + public void testGetTransitionType() { - FileReader fr = new FileReader(file4); - BufferedReader br = new BufferedReader(fr); - for (int i = 0; i < 24; i++) - { - br.readLine(); // this is done to reach the begin node - // data in the file - } - testFile4.hmm.fillSymbols("HMM A B C D E F G H I"); - testFile4.parseBeginNodeData(br); - ArrayList emissions = new ArrayList<>(); - ArrayList transitions = new ArrayList<>(); - - emissions.add(2.68618); - emissions.add(4.42225); - emissions.add(2.77519); - emissions.add(2.73123); - emissions.add(3.46354); - emissions.add(2.40513); - emissions.add(3.72494); - emissions.add(3.29354); - emissions.add(3.61503); - - transitions.add(0.00338); - transitions.add(6.08833); - transitions.add(6.81068); - transitions.add(0.61958); - transitions.add(0.77255); - transitions.add(0.00000); - transitions.add(null); - - assertEquals(testFile4.hmm.getInsertZeroEmissions(), emissions); - assertEquals(testFile4.hmm.getBeginStateTransitions(), transitions); + assertEquals(fn3.getTransitionType("mm").intValue(), 0); + assertEquals(fn3.getTransitionType("mi").intValue(), 1); + assertEquals(fn3.getTransitionType("md").intValue(), 2); + assertEquals(fn3.getTransitionType("im").intValue(), 3); + assertEquals(fn3.getTransitionType("ii").intValue(), 4); + assertEquals(fn3.getTransitionType("dm").intValue(), 5); + assertEquals(fn3.getTransitionType("dd").intValue(), 6); + assertNull(fn3.getTransitionType("df")); } @@ -457,6 +275,7 @@ public class HMMFileTest { filledArray.add(35.3523645); filledArray.add(12345.3564); filledArray.add(1.4); + assertEquals(HMMFile.fillList(scanner2, 5), filledArray); scanner2.close(); @@ -465,81 +284,133 @@ public class HMMFileTest { @Test public void testParseModel() throws IOException { - FileReader fr = new FileReader(file); + FileReader fr = new FileReader(made1.getDataObject()); BufferedReader br = new BufferedReader(fr); - for (int i = 0; i < 23; i++) + HiddenMarkovModel testHMM = new HiddenMarkovModel(); + for (int i = 0; i < 24; i++) { - br.readLine(); // this is done to reach the begin node - // data in the file + br.readLine(); } + made1.parseModel(br); + testHMM = made1.getHmm(); + br.close(); + fr.close(); + + assertEquals(testHMM.getMatchEmission(0, 2), 1.62906); + assertEquals(testHMM.getMatchEmission(2, 1), 2.37873); + assertEquals(testHMM.getMatchEmission(12, 2), 2.61355); + assertEquals(testHMM.getMatchEmission(26, 0), 1.86925); + assertEquals(testHMM.getMatchEmission(32, 3), 2.58263); + assertEquals(testHMM.getMatchEmission(59, 3), 2.20507); + assertEquals(testHMM.getMatchEmission(63, 0), 0.41244); + assertEquals(testHMM.getMatchEmission(69, 1), 3.17398); + assertEquals(testHMM.getMatchEmission(76, 2), 2.65861); + + assertEquals(testHMM.getInsertEmission(0, 1), 1.38629); + assertEquals(testHMM.getInsertEmission(1, 2), 1.38629); + assertEquals(testHMM.getInsertEmission(31, 3), 1.28150); + assertEquals(testHMM.getInsertEmission(43, 0), 1.32290); + assertEquals(testHMM.getInsertEmission(48, 2), 1.52606); + assertEquals(testHMM.getInsertEmission(52, 1), 1.62259); + assertEquals(testHMM.getInsertEmission(67, 0), 1.38141); + assertEquals(testHMM.getInsertEmission(70, 3), 1.38629); + assertEquals(testHMM.getInsertEmission(80, 3), 1.38629); + + assertEquals(testHMM.getStateTransition(2, 0), 0.03725); + assertEquals(testHMM.getStateTransition(6, 1), 3.89715); + assertEquals(testHMM.getStateTransition(9, 3), 1.38021); + assertEquals(testHMM.getStateTransition(20, 4), 0.23815); + assertEquals(testHMM.getStateTransition(34, 6), 0.33363); + assertEquals(testHMM.getStateTransition(46, 5), 1.05474); + assertEquals(testHMM.getStateTransition(57, 6), 0.31164); + assertEquals(testHMM.getStateTransition(68, 2), 3.99242); + assertEquals(testHMM.getStateTransition(80, 6), + Double.NEGATIVE_INFINITY); - testFile.parseModel(br); - assertEquals(testFile.hmm.getMatchEmissions().get(0).get(0), 3.16986); - assertEquals(testFile.hmm.getMatchEmissions().get(0).get(3), 3.29953); - assertEquals(testFile.hmm.getMatchEmissions().get(1).get(2), 2.24744); - assertEquals(testFile.hmm.getMatchEmissions().get(1).get(8), 4.25623); - assertEquals(testFile.hmm.getMatchEmissions().get(2).get(5), 3.48010); - assertEquals(testFile.hmm.getMatchEmissions().get(2).get(6), 4.51877); - assertEquals(testFile.hmm.getMatchEmissions().get(3).get(4), 5.26587); - assertEquals(testFile.hmm.getMatchEmissions().get(3).get(8), 4.99111); - assertEquals(testFile.hmm.getInsertEmissions().get(0).get(3), 2.73088); - assertEquals(testFile.hmm.getInsertEmissions().get(0).get(6), 3.72505); - assertEquals(testFile.hmm.getInsertEmissions().get(1).get(2), 2.77519); - assertEquals(testFile.hmm.getInsertEmissions().get(1).get(8), 3.61503); - assertEquals(testFile.hmm.getInsertEmissions().get(2).get(0), 2.68618); - assertEquals(testFile.hmm.getInsertEmissions().get(2).get(8), 3.61503); - assertEquals(testFile.hmm.getInsertEmissions().get(3).get(2), 2.77519); - assertEquals(testFile.hmm.getInsertEmissions().get(3).get(3), 2.73123); } @Test public void testParseAnnotations() { - testFile4.hmm.setMapAnnotationFlag(true); - Scanner scanner = new Scanner("1 t - - -"); - testFile4.parseAnnotations(scanner, 0); - - assertEquals( - testFile4.hmm.getAlignmentColumnIndexes().get(0).intValue(), 1); - assertEquals( - testFile4.hmm.getAnnotations().get(0).get("CONS").charValue(), - 't'); - assertEquals( - testFile4.hmm.getAnnotations().get(0).get("RF").charValue(), - '-'); - assertEquals( - testFile4.hmm.getAnnotations().get(0).get("MM").charValue(), - '-'); - assertEquals( - testFile4.hmm.getAnnotations().get(0).get("CS").charValue(), - '-'); - - testFile4.hmm.setMapAnnotationFlag(false); - testFile4.hmm.getAlignmentColumnIndexes().clear(); - testFile4.hmm.getAnnotations().clear(); - Scanner scanner2 = new Scanner("- S g C Y"); - testFile4.parseAnnotations(scanner2, 0); - - assertEquals( - testFile4.hmm.getAnnotations().get(0).get("CONS").charValue(), - 'S'); - assertEquals( - testFile4.hmm.getAnnotations().get(0).get("RF").charValue(), - 'g'); - assertEquals( - testFile4.hmm.getAnnotations().get(0).get("MM").charValue(), - 'C'); - assertEquals( - testFile4.hmm.getAnnotations().get(0).get("CS").charValue(), - 'Y'); + HMMFile testFile = new HMMFile("H:/EmptyFile.hmm"); + testFile.hmm.getNodes().add(new HMMNode()); + testFile.hmm.getNodes().add(new HMMNode()); + + testFile.hmm.setConsensusResidueStatus(true); + testFile.hmm.setMAPStatus(true); + testFile.hmm.setReferenceAnnotationStatus(true); + testFile.hmm.setConsensusStructureStatus(true); + testFile.hmm.setMaskedValueStatus(true); + Scanner scanner = new Scanner("1345 t t t t"); + testFile.parseAnnotations(scanner, 0); + assertEquals(testFile.hmm.getNodeAlignmentColumn(0).intValue(), 1345); + assertEquals(testFile.hmm.getConsensusResidue(0), 't'); + assertEquals(testFile.hmm.getReferenceAnnotation(0), 't'); + assertEquals(testFile.hmm.getMaskedValue(0), 't'); + assertEquals(testFile.hmm.getConsensusStructure(0), 't'); + + scanner.close(); + + testFile.hmm.setConsensusResidueStatus(true); + testFile.hmm.setMAPStatus(false); + testFile.hmm.setReferenceAnnotationStatus(true); + testFile.hmm.setConsensusStructureStatus(false); + testFile.hmm.setMaskedValueStatus(false); + Scanner scanner2 = new Scanner("- y x - -"); + testFile.parseAnnotations(scanner2, 1); + assertNull(testFile.hmm.getNodeAlignmentColumn(1)); + assertEquals(testFile.hmm.getConsensusResidue(1), 'y'); + assertEquals(testFile.hmm.getReferenceAnnotation(1), 'x'); + assertEquals(testFile.hmm.getMaskedValue(1), '-'); + assertEquals(testFile.hmm.getConsensusStructure(1), '-'); + + scanner2.close(); } - @Test + @Test(priority = 2) public void testExportFile() throws IOException { - HMMFile exportTestFile = new HMMFile("H:/HMMTutorialExample.hmm"); - exportTestFile.parse(); - exportTestFile.exportFile("H:/WriteTestFile.hmm"); + File file = new File(fn3.getDataObject()); + FileReader fr = new FileReader(file); + BufferedReader br = new BufferedReader(fr); + for (int i = 0; i < 23; i++) + { + br.readLine(); + } + fn3.parseModel(br); + fn3.exportFile("H:/WriteFileTest.hmm"); + } + @Test(priority = 1) + public void testAppendFileProperties() + { + StringBuilder testBuilder = new StringBuilder(); + fn3.appendFileProperties(testBuilder); + Scanner testScanner = new Scanner(testBuilder.toString()); + + String[] expected = new String[] { "HMMER3/f [3.1b1 | May 2013]", + "NAME fn3", "ACC PF00041.13", + "DESC Fibronectin type III domain", "LENG 86", "ALPH amino", + "RF no", "MM no", "CONS yes", "CS yes", "MAP yes", + "DATE Fri Jun 20 08:22:31 2014", "NSEQ 106", "EFFN 11.415833", + "CKSUM 3564431818", "GA 8.00 7.20", "TC 8.00 7.20", + "NC 7.90 7.90", "STATS LOCAL MSV -9.4043 0.71847", + "STATS LOCAL VITERBI -9.7737 0.71847", + "STATS LOCAL FORWARD -3.8341 0.71847" }; + + for (String value : expected) + { + assertEquals(testScanner.nextLine(), value); + } + + testScanner.close(); + } + + public moveAheadBy(Scanner scanner, int nodeChange, int indexChange) + { + for (int y = 0; y < nodeChange; y++) + { + + } } } -- 1.7.10.2