+++ /dev/null
-package jalview.datamodel;
-
-/**
- * bean which stores e-Value Statistics
- *
- * @author TZVanaalten
- *
- */
-public class EValueStatistic
-{
- final String alignmentModeConfiguration;
-
- final double locationOfDistribution;
-
- final double slopeOfDistribution;
-
- public EValueStatistic(String configuration, double slope,
- double location)
- {
- alignmentModeConfiguration = configuration;
- locationOfDistribution = location;
- slopeOfDistribution = slope;
- }
-
- public String getAlignmentModeConfiguration()
- {
- return alignmentModeConfiguration;
- }
-
- public double getLocationOfDistribution()
- {
- return locationOfDistribution;
- }
-
- public double getSlopeOfDistribution()
- {
- return slopeOfDistribution;
- }
-
-}
--- /dev/null
+package jalview.datamodel;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * stores data for each node in the hmm model
+ * @author TZVanaalten
+ *
+ */
+public class HMMNode
+{
+ //contains the match emissions for each symbol
+ List<Double> matchEmissions = new ArrayList<>();
+ //contains the insert emissions for each symbol
+ List<Double> insertEmissions = new ArrayList<>();
+ //contains the state transitions for each possible transition. These are bm, bi, bd, im, ii, dm and dd in order (0th position in
+ // the array indicates the probability of a bm transition)
+ List<Double> stateTransitions = new ArrayList<>();
+
+ //annotations
+ Integer alignmentColumn = null;
+ char consensusResidue;
+ char referenceAnnotation;
+ char maskValue;
+ char consensusStructure;
+ public List<Double> getMatchEmissions()
+ {
+ return matchEmissions;
+ }
+
+ public void setMatchEmissions(List<Double> matchEmissionsL)
+ {
+ this.matchEmissions = matchEmissionsL;
+ }
+ public List<Double> getInsertEmissions()
+ {
+ return insertEmissions;
+ }
+
+ public void setInsertEmissions(List<Double> insertEmissionsL)
+ {
+ this.insertEmissions = insertEmissionsL;
+ }
+ public List<Double> getStateTransitions()
+ {
+ return stateTransitions;
+ }
+
+ public void setStateTransitions(List<Double> stateTransitionsL)
+ {
+ this.stateTransitions = stateTransitionsL;
+ }
+
+ public Integer getAlignmentColumn()
+ {
+ return alignmentColumn;
+ }
+ public void setAlignmentColumn(int alignmentColumn)
+ {
+ this.alignmentColumn = alignmentColumn;
+ }
+ public char getConsensusResidue()
+ {
+ return consensusResidue;
+ }
+ public void setConsensusResidue(char consensusResidue)
+ {
+ this.consensusResidue = consensusResidue;
+ }
+ public char getReferenceAnnotation()
+ {
+ return referenceAnnotation;
+ }
+ public void setReferenceAnnotation(char referenceAnnotation)
+ {
+ this.referenceAnnotation = referenceAnnotation;
+ }
+ public char getMaskValue()
+ {
+ return maskValue;
+ }
+ public void setMaskValue(char maskValue)
+ {
+ this.maskValue = maskValue;
+ }
+ public char getConsensusStructure()
+ {
+ return consensusStructure;
+ }
+ public void setConsensusStructure(char consensusStructure)
+ {
+ this.consensusStructure = consensusStructure;
+ }
+}
+
+
// only string value - use the getter methods. For example, to find the length
// of theHMM, use getModelLength()to return an int value
Map<String, String> fileProperties = new HashMap<>();
+
+ //contains all of the symbols used in this model. The index of each symbol represents its lookup value
+ List<Character> symbols = new ArrayList<>();
- // contains the average emission probabilities for each symbol
- List<Double> averageMatchStateEmissionProbabilities = new ArrayList<>();
+ // contains information for each node in the model. The begin node is at index
+ // 0. Node 0 contains average emission probabilities for each symbol
+ List<HMMNode> nodes = new ArrayList<>();
- // contains the probabilities of insert 0 emissions for each symbol
- List<Double> insertZeroEmissions = new ArrayList<>();
+ final String YES = "yes";
- // contains the probabilities of transitions from the begin state and insert
- // state 0. These are bm, bi, bd, im, ii, dm and dd in order (0th position in
- // the array indicates the probability of a bm transition)
+ final String NO = "no";
- List<Double> beginStateTransitions = new ArrayList<>();
+ int numberOfSymbols;
+
+ //keys for file properties hashmap
+ private final String NAME = "NAME";
- // contains the alignment column index for each node
- List<Integer> alignmentColumnIndexes = new ArrayList<>();
+ private final String ACCESSION_NUMBER = "ACC";
- // contains all other annotations for each node. These can be the
- // consensus(CONS), reference annotation(RF), mask value(MM) or consensus
- // structure(CS)
- List<HashMap<String, Character>> annotations = new ArrayList<>();
+ private final String DESCRIPTION = "DESC";
- // contains the match emission for each symbol at each node
- List<List<Double>> matchEmissions = new ArrayList<>();
+ private final String LENGTH = "LENG";
- // contains the insert emission for each symbol at each node
- List<List<Double>> insertEmissions = new ArrayList<>();
+ private final String MAX_LENGTH = "MAXL";
- // contains the state transition for each state transition. See
- // beginStateTransitions field for transition possibilities.
- List<List<Double>> stateTransitions = new ArrayList<>();
+ private final String ALPHABET = "ALPH";
- // contains cutoffs and thresholds from PFAM
- Map<String, Double[]> pfamData = new HashMap<>();
+ private final String DATE = "DATE";
- // contains e-value statistic objects which contain the alignment mode
- // configuration, and the slope and location of each distribution
- Map<String, EValueStatistic> eValueStatistics = new HashMap<>();
+ private final String COMMAND_LOG = "COM";
- final String yes = "yes";
+ private final String NUMBER_OF_SEQUENCES = "NSEQ";
- final String no = "no";
+ private final String EFF_NUMBER_OF_SEQUENCES = "EFFN";
- List<Character> symbols = new ArrayList<>();
+ private final String CHECK_SUM = "CKSUM";
- public List<Double> getBeginStateTransitions()
- {
- return beginStateTransitions;
- }
+ private final String GATHERING_THRESHOLDS = "GA";
- public void setBeginStateTransitions(List<Double> beginStateTransitionsL)
- {
- this.beginStateTransitions = beginStateTransitionsL;
- }
+ private final String TRUSTED_CUTOFFS = "TC";
- public List<List<Double>> getStateTransitions()
- {
- return stateTransitions;
- }
+ private final String NOISE_CUTOFFS = "NC";
- public void setStateTransitions(List<List<Double>> stateTransitionsL)
- {
- this.stateTransitions = stateTransitionsL;
- }
+ private final String STATISTICS = "STATS";
- public List<Character> getSymbols()
- {
- return symbols;
- }
+ private final String COMPO = "COMPO";
+
+ private final String GATHERING_THRESHOLD = "GA";
- public void setSymbols(List<Character> symbolsL)
- {
- this.symbols = symbolsL;
- }
+ private final String TRUSTED_CUTOFF = "TC";
- public List<Double> getAverageMatchStateEmissionProbabilities()
- {
- return averageMatchStateEmissionProbabilities;
- }
+ private final String NOISE_CUTOFF = "NC";
- public void setAverageMatchStateEmissionProbabilities(
- List<Double> averageMatchStateEmissionProbabilitiesL)
- {
- this.averageMatchStateEmissionProbabilities = averageMatchStateEmissionProbabilitiesL;
- }
+ private final String VITERBI = "VITERBI";
+ private final String MSV = "MSV";
- public List<Double> getInsertZeroEmissions()
- {
- return insertZeroEmissions;
- }
+ private final String FORWARD = "FORWARD";
+
+ private final String MAP = "MAP";
+
+ private final String REFERENCE_ANNOTATION = "RF";
+
+ private final String CONSENSUS_RESIDUE = "CONS";
+
+ private final String CONSENSUS_STRUCTURE = "CS";
+
+ private final String MASKED_VALUE = "MM";
+
+ final static String[] TRANSITION_TYPES = new String[] { "m->m", "m->i",
+ "m->d", "i->m", "i->i", "d->m", "d->d" };
- public void setInsertZeroEmissions(List<Double> insertZeroEmissionsL)
+ public String getTransitionType(int index)
{
- this.insertZeroEmissions = insertZeroEmissionsL;
+ return TRANSITION_TYPES[index];
}
- public List<List<Double>> getMatchEmissions()
+ public String[] getTransitionTypes()
{
- return matchEmissions;
+ return TRANSITION_TYPES;
}
-
- public void setMatchEmissions(List<List<Double>> matchEmissionsL)
+ public char getSymbol(int index)
{
- this.matchEmissions = matchEmissionsL;
+ return getSymbols().get(index);
}
-
- public List<List<Double>> getInsertEmissions()
+ public Map<String, String> getFileProperties()
{
- return insertEmissions;
+ return fileProperties;
}
- public void setInsertEmissions(List<List<Double>> insertEmissionsL)
+ public HMMNode getNode(int nodeIndex)
{
- this.insertEmissions = insertEmissionsL;
+ return getNodes().get(nodeIndex);
}
- public void fillSymbols(String line)
+
+ public void setSymbols(List<Character> symbolsL)
{
- Scanner scanner = new Scanner(line);
- scanner.next();
- while (scanner.hasNext())
- {
- symbols.add(scanner.next().charAt(0));
- }
- scanner.close();
+ this.symbols = symbolsL;
}
public String getName()
{
- return fileProperties.get("NAME");
+ return fileProperties.get(NAME);
}
public String getAccessionNumber()
{
- return fileProperties.get("ACC");
+ return fileProperties.get(ACCESSION_NUMBER);
}
public void setAccessionNumber(String value)
{
- fileProperties.put("ACC", value);
+ fileProperties.put(ACCESSION_NUMBER, value);
}
public String getDescription()
{
- return fileProperties.get("DESC");
+ return fileProperties.get(DESCRIPTION);
}
public void setDescription(String value)
{
- fileProperties.put("DESC", value);
+ fileProperties.put(DESCRIPTION, value);
}
public Integer getLength()
{
- if (fileProperties.get("LENG") == null)
+ if (fileProperties.get(LENGTH) == null)
{
return null;
}
- return Integer.parseInt(fileProperties.get("LENG"));
+ return Integer.parseInt(fileProperties.get(LENGTH));
}
public void setLength(int value)
{
- fileProperties.put("LENG", String.valueOf(value));
+ fileProperties.put(LENGTH, String.valueOf(value));
}
public Integer getMaxInstanceLength()
{
- if (fileProperties.get("MAXL") == null)
+ if (fileProperties.get(MAX_LENGTH) == null)
{
return null;
}
- return Integer.parseInt(fileProperties.get("MAXL"));
+ return Integer.parseInt(fileProperties.get(MAX_LENGTH));
}
public void setMaxInstanceLength(int value)
{
- fileProperties.put("MAXL", String.valueOf(value));
+ fileProperties.put(MAX_LENGTH, String.valueOf(value));
}
// gets type of symbol alphabet - "amino", "DNA", "RNA"
public String getAlphabetType()
{
- return fileProperties.get("ALPH");
+ return fileProperties.get(ALPHABET);
}
public void setAlphabetType(String value)
{
- fileProperties.put("ALPH", value);
+ fileProperties.put(ALPHABET, value);
}
- // returns boolean indicating whether the reference annotation character field
- // for each match state is valid or ignored
- public boolean getReferenceAnnotationFlag()
+ // not sure whether to implement this with Date object
+ public String getDate()
{
- if (fileProperties.get("RF") != null)
- {
- if (fileProperties.get("RF").equals(yes))
- {
- return true;
- }
- }
- return false;
+ return fileProperties.get(DATE);
}
- public void setReferenceAnnotationFlag(boolean value)
+ public void setDate(String value)
{
- if (value)
- {
- fileProperties.put("RF", yes);
- }
- else
- {
- fileProperties.put("RF", no);
- }
-
+ fileProperties.put(DATE, value);
}
- // returns boolean indicating whether the model mask annotation character
- // field
- // for each match state is valid or ignored
- public boolean getModelMaskedFlag()
+ // not sure whether to implement this
+ public String getCommandLineLog()
{
- if (fileProperties.get("MM") != null)
- {
- if (fileProperties.get("MM").equals(yes))
- {
- return true;
- }
- }
- return false;
+ return fileProperties.get(COMMAND_LOG);
}
- public void setModelMaskedFlag(boolean value)
+ public void setCommandLineLog(String value)
{
- if (value)
- {
- fileProperties.put("MM", yes);
- }
- else
- {
- fileProperties.put("MM", no);
- }
+ fileProperties.put(COMMAND_LOG, value);
}
- // returns boolean indicating whether the consensus residue field
- // for each match state is valid or ignored
- public boolean getConsensusResidueAnnotationFlag()
+ // gets the number of sequences that the HMM was trained on
+ public Integer getNumberOfSequences()
{
- if (fileProperties.get("CONS") != null)
+ if (fileProperties.get(NUMBER_OF_SEQUENCES) == null)
{
- if (fileProperties.get("CONS").equals(yes))
- {
- return true;
- }
+ return null;
}
- return false;
+ return Integer.parseInt(fileProperties.get(NUMBER_OF_SEQUENCES));
}
- public void setConsensusResidueeAnnotationFlag(boolean value)
+ public void setNumberOfSequences(int value)
{
- if (value)
- {
- fileProperties.put("CONS", yes);
- }
- else
- {
- fileProperties.put("CONS", no);
- }
+ fileProperties.put(NUMBER_OF_SEQUENCES, String.valueOf(value));
}
- // returns boolean indicating whether the consensus structure character field
- // for each match state is valid or ignored
- public boolean getConsensusStructureAnnotationFlag()
+ // gets the effective number determined during sequence weighting
+ public Double getEffectiveNumberOfSequences()
{
- if (fileProperties.get("CS") != null)
+ if (fileProperties.get(LENGTH) == null)
{
- if (fileProperties.get("CS").equals(yes))
- {
- return true;
- }
+ return null;
}
- return false;
+ return Double.parseDouble(fileProperties.get(EFF_NUMBER_OF_SEQUENCES));
}
- public void setConsensusStructureAnnotationFlag(boolean value)
+ public void setEffectiveNumberOfSequences(double value)
{
- if (value)
- {
- fileProperties.put("CS", yes);
- }
- else
- {
- fileProperties.put("CS", no);
- }
+ fileProperties.put(EFF_NUMBER_OF_SEQUENCES, String.valueOf(value));
}
- // returns boolean indicating whether the model mask annotation character
- // field
- // for each match state is valid or ignored
- public boolean getMapAnnotationFlag()
+ public Long getCheckSum()
{
- if (fileProperties.get("MAP") != null)
+ if (fileProperties.get(LENGTH) == null)
{
- if (fileProperties.get("MAP").equals(yes))
- {
- return true;
- }
+ return null;
}
- return false;
+ return Long.parseLong(fileProperties.get(CHECK_SUM));
}
- public void setMapAnnotationFlag(boolean value)
+ public void setCheckSum(long value)
{
- if (value)
- {
- fileProperties.put("MAP", yes);
- }
- else
- {
- fileProperties.put("MAP", no);
- }
+ fileProperties.put(CHECK_SUM, String.valueOf(value));
}
- // not sure whether to implement this with Date object
- public String getDate()
+ public List<HMMNode> getNodes()
{
- return fileProperties.get("DATE");
+ return nodes;
}
- public void setDate(String value)
+ public void setNodes(List<HMMNode> nodes)
{
- fileProperties.put("DATE", value);
+ this.nodes = nodes;
}
-
- // not sure whether to implement this
- public String getCommandLineLog()
+
+ /**
+ * gets the match emission at a node for a symbol
+ * @param nodeIndex
+ * position of node in model
+ * @param symbolIndex
+ * index of symbol being searched
+ * @return
+ * negative log probability of a match emission of the given symbol
+ */
+ public double getMatchEmission(int nodeIndex, int symbolIndex)
+ {
+ double value = nodes.get(nodeIndex).getMatchEmissions().get(symbolIndex);
+ return value;
+ }
+
+ /**
+ * gets the insert emission at a node for a symbol
+ * @param nodeIndex
+ * position of node in model
+ * @param symbolIndex
+ * index of symbol being searched
+ * @return
+ * negative log probability of an insert emission of the given symbol
+ */
+ public double getInsertEmission(int nodeIndex, int symbolIndex)
+ {
+ double value = nodes.get(nodeIndex).getInsertEmissions().get(symbolIndex);
+ return value;
+ }
+
+ /**
+ * gets the state transition at a node for a specific transition
+ * @param nodeIndex
+ * position of node in model
+ * @param transitionIndex
+ * index of stransition being searched
+ * @return
+ * negative log probability of a state transition of the given type
+ */
+ public double getStateTransition(int nodeIndex, int transitionIndex)
{
- return fileProperties.get("COM");
+ double value = nodes.get(nodeIndex).getStateTransitions()
+ .get(transitionIndex);
+ return value;
}
-
- public void setCommandLineLog(String value)
+
+ public Integer getNodeAlignmentColumn(int nodeIndex)
{
- fileProperties.put("COM", value);
+ Integer value = nodes.get(nodeIndex).getAlignmentColumn();
+ return value;
}
-
- // gets the number of sequences that the HMM was trained on
- public Integer getSequenceNumber()
+
+ public char getConsensusResidue(int nodeIndex)
{
- if (fileProperties.get("NSEQ") == null)
- {
- return null;
- }
- return Integer.parseInt(fileProperties.get("NSEQ"));
+ char value = nodes.get(nodeIndex).getConsensusResidue();
+ return value;
}
-
- public void setSequenceNumber(int value)
+
+ public char getReferenceAnnotation(int nodeIndex)
{
- fileProperties.put("NSEQ", String.valueOf(value));
+ char value = nodes.get(nodeIndex).getReferenceAnnotation();
+ return value;
}
-
- // gets the effective number determined during sequence weighting
- public Double getEffectiveSequenceNumber()
+
+ public char getMaskedValue(int nodeIndex)
{
- if (fileProperties.get("LENG") == null)
- {
- return null;
- }
- return Double.parseDouble(fileProperties.get("EFFN"));
+ char value = nodes.get(nodeIndex).getMaskValue();
+ return value;
}
-
- public void setEffectiveSequenceNumber(double value)
+
+ public char getConsensusStructure(int nodeIndex)
{
- fileProperties.put("EFFN", String.valueOf(value));
+ char value = nodes.get(nodeIndex).getConsensusStructure();
+ return value;
+ }
+
+ /**
+ * returns the average match emission for a given symbol
+ * @param symbolIndex
+ * index of symbol
+ * @return
+ * average negative log propbability of a match emission of the given symbol
+ */
+ public double getAverageMatchEmission(int symbolIndex)
+ {
+ double value = nodes.get(0).getMatchEmissions().get(symbolIndex);
+ return value;
}
- public Long getCheckSum()
+ public int getNumberOfSymbols()
{
- if (fileProperties.get("LENG") == null)
- {
- return null;
- }
- return Long.parseLong(fileProperties.get("CKSUM"));
+ return numberOfSymbols;
}
- public void setCheckSum(long value)
+ public void setNumberOfSymbols(int numberOfSymbols)
{
- fileProperties.put("CKSUM", String.valueOf(value));
+ this.numberOfSymbols = numberOfSymbols;
}
- public Double getGatheringThreshold1()
+ public List<Character> getSymbols()
{
- try
- {
- return pfamData.get("GA")[0];
- } catch (NullPointerException e)
- {
- return null;
+ return symbols;
+ }
+
+
+ /**
+ * fills symbol array and also finds numberOfSymbols
+ *
+ * @param parser
+ * scanner scanning symbol line in file
+ */
+ public void fillSymbols(Scanner parser)
+ {
+ while (parser.hasNext())
+ {
+ String strSymbol = parser.next();
+ char[] symbol = strSymbol.toCharArray();
+ symbols.add(symbol[0]);
}
+ numberOfSymbols = symbols.size();
}
- public void setPFAMData(String key, Double[] data)
+ /**
+ * adds file property
+ *
+ * @param key
+ * @param value
+ */
+ public void addFileProperty(String key, String value)
{
- pfamData.put(key, data);
+ fileProperties.put(key, value);
}
- public Double getGatheringThreshold2()
+ public boolean referenceAnnotationIsActive()
{
- try
- {
- return pfamData.get("GA")[1];
- } catch (NullPointerException e)
+ String status;
+ status = fileProperties.get(REFERENCE_ANNOTATION);
+ if (status == null)
{
- return null;
+ return false;
+ }
+ switch (status)
+ {
+ case YES:
+ return true;
+ case NO:
+ return false;
+ default:
+ return false;
}
}
- public Double getTrustedCutoff1()
+ public boolean maskValueIsActive()
{
- try
- {
- return pfamData.get("TC")[0];
- } catch (NullPointerException e)
+ String status;
+ status = fileProperties.get(MASKED_VALUE);
+ if (status == null)
{
- return null;
+ return false;
+ }
+ switch (status)
+ {
+ case YES:
+ return true;
+ case NO:
+ return false;
+ default:
+ return false;
}
}
- public Double getTrustedCutoff2()
+ public boolean consensusResidueIsActive()
{
- try
- {
- return pfamData.get("TC")[1];
- } catch (NullPointerException e)
+ String status;
+ status = fileProperties.get(CONSENSUS_RESIDUE);
+ if (status == null)
{
- return null;
+ return false;
+ }
+ switch (status)
+ {
+ case YES:
+ return true;
+ case NO:
+ return false;
+ default:
+ return false;
}
}
- public Double getNoiseCutoff1()
+ public boolean consensusStructureIsActive()
{
- try
- {
- return pfamData.get("NC")[0];
- } catch (NullPointerException e)
+ String status;
+ status = fileProperties.get(CONSENSUS_STRUCTURE);
+ if (status == null)
{
- return null;
+ return false;
+ }
+ switch (status)
+ {
+ case YES:
+ return true;
+ case NO:
+ return false;
+ default:
+ return false;
}
}
- public Double getNoiseCutoff2()
+ public boolean mapIsActive()
{
- try
- {
- return pfamData.get("NC")[1];
- } catch (NullPointerException e)
+ String status;
+ status = fileProperties.get(MAP);
+ if (status == null)
{
- return null;
+ return false;
+ }
+ switch (status)
+ {
+ case YES:
+ return true;
+ case NO:
+ return false;
+ default:
+ return false;
}
}
- public String getAlignmentModeConfiguration(String key)
+ public void setAlignmentColumn(int nodeIndex, int column)
{
- return eValueStatistics.get(key).alignmentModeConfiguration;
+ nodes.get(nodeIndex).setAlignmentColumn(column);
}
- public Double getSlopeOfDistribution(String scoreDistribution)
+ public void setReferenceAnnotation(int nodeIndex, char value)
{
- try
- {
- return eValueStatistics.get(scoreDistribution).slopeOfDistribution;
- } catch (NullPointerException e)
- {
- return null;
- }
+ nodes.get(nodeIndex).setReferenceAnnotation(value);
}
- public Double getLocationOfDistribution(String scoreDistribution)
+ public void setConsensusResidue(int nodeIndex, char value)
{
- try
- {
- return eValueStatistics.get(scoreDistribution).locationOfDistribution;
- } catch (NullPointerException e)
- {
- return null;
- }
+ nodes.get(nodeIndex).setConsensusResidue(value);
}
- public void addStatistic(String name, EValueStatistic stats)
+ public void setConsensusStructure(int nodeIndex, char value)
{
- eValueStatistics.put(name, stats);
+ nodes.get(nodeIndex).setConsensusStructure(value);
}
- /**
- * public double getBeginStateTransitions(Character symbol) { return
- * beginStateTransitions.get(symbol); }
- **/
+ public void setMaskValue(int nodeIndex, char value)
+ {
+ nodes.get(nodeIndex).setMaskValue(value);
+ }
- public void put(String key, String value)
+ public String getGatheringThreshold()
{
- fileProperties.put(key, value);
+ String value;
+ value = fileProperties.get("GA");
+ return value;
}
- public Map<String, EValueStatistic> getEValueStatistics()
+ public String getNoiseCutoff()
{
- return eValueStatistics;
+ String value;
+ value = fileProperties.get("NC");
+ return value;
}
- public void setEValueStatistics(
- Map<String, EValueStatistic> eValueStatisticsM)
+ public String getTrustedCutoff()
{
- this.eValueStatistics = eValueStatisticsM;
+ String value;
+ value = fileProperties.get("TC");
+ return value;
}
- public List<Integer> getAlignmentColumnIndexes()
+ public String getViterbi()
{
- return alignmentColumnIndexes;
+ String value;
+ value = fileProperties.get(VITERBI);
+ return value;
}
- public void setAlignmentColumnIndexes(
- List<Integer> alignmentColumnIndexesL)
+ public String getMSV()
{
- this.alignmentColumnIndexes = alignmentColumnIndexesL;
+ String value;
+ value = fileProperties.get(MSV);
+ return value;
}
- public List<HashMap<String, Character>> getAnnotations()
+ public String getForward()
{
- return annotations;
+ String value;
+ value = fileProperties.get(FORWARD);
+ return value;
}
- public void setAnnotations(List<HashMap<String, Character>> annotationsL)
+ public void setMAPStatus(boolean status)
{
- this.annotations = annotationsL;
+ if (status == true)
+ {
+ fileProperties.put(MAP, YES);
+ }
+ else
+ {
+ fileProperties.put(MAP, NO);
+ }
}
- public Map<String, String> getFileProperties()
+ public void setReferenceAnnotationStatus(boolean status)
{
- return fileProperties;
+ if (status == true)
+ {
+ fileProperties.put(REFERENCE_ANNOTATION, YES);
+ }
+ else
+ {
+ fileProperties.put(REFERENCE_ANNOTATION, NO);
+ }
+ }
+
+ public void setMaskedValueStatus(boolean status)
+ {
+ if (status == true)
+ {
+ fileProperties.put(MASKED_VALUE, YES);
+ }
+ else
+ {
+ fileProperties.put(MASKED_VALUE, NO);
+ }
+ }
+
+ public void setConsensusResidueStatus(boolean status)
+ {
+ if (status == true)
+ {
+ fileProperties.put(CONSENSUS_RESIDUE, YES);
+ }
+ else
+ {
+ fileProperties.put(CONSENSUS_RESIDUE, NO);
+ }
}
- public void setFileProperties(Map<String, String> fileProperties)
+ public void setConsensusStructureStatus(boolean status)
{
- this.fileProperties = fileProperties;
+ if (status == true)
+ {
+ fileProperties.put(CONSENSUS_STRUCTURE, YES);
+ }
+ else
+ {
+ fileProperties.put(CONSENSUS_STRUCTURE, NO);
+ }
}
}
package jalview.io;
-import jalview.datamodel.EValueStatistic;
+import jalview.datamodel.HMMNode;
import jalview.datamodel.HiddenMarkovModel;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
-import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import java.util.Scanner;
+
/**
* reads in and writes out a HMMER standard file
*
// HMM to store file data
HiddenMarkovModel hmm = new HiddenMarkovModel();
+
// Source of file
String dataObject;
- // number of symbols
- int numberOfSymbols;
-
// number of possible transitions
- final int NUMBER_OF_TRANSITIONS = 7;
+ final static int NUMBER_OF_TRANSITIONS = 7;
+
+ final static String NEW_LINE = "\n";
+
// file header
String fileHeader;
+ int numberOfSymbols;
+
+ final static String SPACE = " ";
+
+ final static String COMPO = "COMPO";
+
+ final static String EMPTY = "";
+
+
/**
* Constructor which contains model to be filled or exported
*
dataObject = dataSource;
}
+ public HiddenMarkovModel getHmm()
+ {
+ return hmm;
+ }
+
+ public void setHmm(HiddenMarkovModel model)
+ {
+ this.hmm = model;
+ }
+
/**
* reads data from HMM file
*
}
+ public String getDataObject()
+ {
+ return dataObject;
+ }
+
+ public void setDataObject(String value)
+ {
+ this.dataObject = value;
+ }
+
/**
* imports file properties from hmm file
*
// properties)
{
readingFile = false;
- hmm.fillSymbols(line);
- numberOfSymbols = hmm.getSymbols().size();
+ hmm.fillSymbols(parser);
+ numberOfSymbols = hmm.getNumberOfSymbols();
}
- else if ("STATS".equals(next)) // reads e-value stats into separate
- // field
- // on HMM object
+ else if ("STATS".equals(next))
{
- readStats(parser);
- }
- else if ("GA".equals(next) || "TC".equals(next)
- || "NC".equals(next)) // reads
- // pfam
- // data
- // into
- // separate
- // field
- // on
- // HMM
- // object
- {
- Double[] data = new Double[2];
- data[0] = parser.nextDouble();
- data[1] = parser.nextDouble();
- hmm.setPFAMData(next, data);
+ parser.next();
+ String key;
+ String value;
+ key = parser.next();
+ value = parser.next() + SPACE + SPACE + parser.next();
+ hmm.addFileProperty(key, value);
}
else
{
String value = parser.next();
while (parser.hasNext())
{
- value = value + " " + parser.next();
+ value = value + SPACE + parser.next();
}
- hmm.put(key, value);
+ hmm.addFileProperty(key, value);
}
parser.close();
}
}
/**
- * creates a new EValueStatistic object to store stats
- *
- * @param parser
- * Scanner which contains data for STATS line
- *
- */
- public void readStats(Scanner parser)
- {
- if (parser.hasNext())
- {
- String name;
- double slope;
- double location;
- String configuration;
-
- configuration = parser.next();
- name = parser.next();
- slope = parser.nextDouble();
- location = parser.nextDouble();
- hmm.addStatistic(name,
- new EValueStatistic(configuration, slope, location));
- }
- }
-
- /**
* parses the model data from the hmm file
*
* @param input
*/
public void parseModel(BufferedReader input) throws IOException
{
-
- String line = input.readLine();
- Scanner scanner = new Scanner(line);
- String next = scanner.next();
- if ("COMPO".equals(next)) // checks to and stores COMPO data if present
+ for (int i = 0; i < hmm.getLength() + 1; i++)
{
- for (int i = 0; i < numberOfSymbols; i++)
-
+ hmm.getNodes().add(new HMMNode());
+ String next;
+ String line;
+ line = input.readLine();
+ Scanner matchReader = new Scanner(line);
+ next = matchReader.next();
+ if (next.equals(COMPO) || i > 0)
{
- hmm.getAverageMatchStateEmissionProbabilities()
- .add(scanner.nextDouble());
+ // stores match emission line in list
+ List<Double> matches = new ArrayList<>();
+ matches = fillList(matchReader, numberOfSymbols);
+ hmm.getNodes().get(i).setMatchEmissions(matches);
+ if (i > 0)
+ {
+ parseAnnotations(matchReader, i);
+ }
}
- }
- scanner.close();
- parseBeginNodeData(input);
- for (int i = 0; i < hmm.getLength(); i++)
- {
- Scanner matchReader = new Scanner(input.readLine());
- matchReader.nextInt(); // skips number indicating position in HMM
- hmm.getMatchEmissions()
- .add(fillList(matchReader, numberOfSymbols));
- parseAnnotations(matchReader, i);
matchReader.close();
- Scanner insertReader = new Scanner(input.readLine());
- hmm.getInsertEmissions().add(fillList(insertReader, numberOfSymbols));
+ // stores insert emission line in list
+ line = input.readLine();
+ Scanner insertReader = new Scanner(line);
+ List<Double> inserts = new ArrayList<>();
+ inserts = fillList(insertReader, numberOfSymbols);
+ hmm.getNodes().get(i).setInsertEmissions(inserts);
insertReader.close();
- Scanner transitionReader = new Scanner(input.readLine());
- hmm.getStateTransitions()
- .add(fillList(transitionReader, NUMBER_OF_TRANSITIONS));
+
+ // stores state transition line in list
+ line = input.readLine();
+ Scanner transitionReader = new Scanner(line);
+ List<Double> transitions = new ArrayList<>();
+ transitions = fillList(transitionReader, NUMBER_OF_TRANSITIONS);
+ hmm.getNodes().get(i).setStateTransitions(transitions);
transitionReader.close();
}
}
/**
- * parses the begin state transitions and insert 0 emissions
- *
- * @param input
- * buffered reader used to read model
- * @param currentline
- * string contain all data on current line of buffered reader
- * @throws IOException
- */
-
- public void parseBeginNodeData(BufferedReader input)
- throws IOException
- {
- Scanner scanner = new Scanner(input.readLine());
- hmm.setInsertZeroEmissions(fillList(scanner, hmm.getSymbols().size()));
- scanner.close();
- Scanner scannerTransitions = new Scanner(input.readLine());
- hmm.setBeginStateTransitions(
- fillList(scannerTransitions, NUMBER_OF_TRANSITIONS));
- scannerTransitions.close();
- }
-
- /**
* parses annotations on match emission line
*
* @param scanner
*/
public void parseAnnotations(Scanner scanner, int index)
{
- if (hmm.getMapAnnotationFlag())
+ if (hmm.mapIsActive())
{
- hmm.getAlignmentColumnIndexes().add(scanner.nextInt());
+ int column;
+ column = scanner.nextInt();
+ hmm.getNodes().get(index).setAlignmentColumn(column);
}
else
{
scanner.next();
}
- hmm.getAnnotations().add(new HashMap<String, Character>());
- hmm.getAnnotations().get(index).put("CONS", scanner.next().charAt(0));
- hmm.getAnnotations().get(index).put("RF", scanner.next().charAt(0));
- hmm.getAnnotations().get(index).put("MM", scanner.next().charAt(0));
- hmm.getAnnotations().get(index).put("CS", scanner.next().charAt(0));
+
+ char consensusR;
+ consensusR = charValue(scanner.next());
+ hmm.getNodes().get(index).setConsensusResidue(consensusR);
+
+ char reference;
+ reference = charValue(scanner.next());
+ hmm.getNodes().get(index).setReferenceAnnotation(reference);
+
+
+ char value;
+ value = charValue(scanner.next());
+ hmm.getNodes().get(index).setMaskValue(value);
+
+ char consensusS;
+ consensusS = charValue(scanner.next());
+ hmm.getNodes().get(index).setConsensusStructure(consensusS);
}
+
/**
*
* @param transition
int numberOfElements)
{
List<Double> list = new ArrayList<>();
- String next;
for (int i = 0; i < numberOfElements; i++)
{
- next = input.next();
+
+ String next = input.next();
if (next.contains("*")) // state transitions to or from delete states
// occasionally have values of -infinity. These
// values are represented by an * in the .hmm
// file, and by a null value in the
// HiddenMarkovModel class
{
- list.add(null);
+ list.add(Double.NEGATIVE_INFINITY);
}
else
{
return list;
}
+
/**
- * writes a HiddenMarkovModel to a file. Needs mode work to make file more
- * readable for humans (align columns)
+ * writes a HiddenMarkovModel to a file
*
* @param exportLocation
* Filename, URL or Pasted String to write to
* @throws FileNotFoundException
* @throws UnsupportedEncodingException
- */
- public void exportFile(String exportLocation)
- throws FileNotFoundException, UnsupportedEncodingException
+ *
+ **/
+
+ public void exportFile(String exportLocation) throws IOException
{
- PrintWriter writer = new PrintWriter(exportLocation, "UTF-8");
- writer.println(fileHeader);
- for (Map.Entry<String, String> entry : hmm.getFileProperties()
- .entrySet())
+ StringBuilder file = new StringBuilder();
+ appendFileProperties(file);
+ appendModel(file);
+
+ file.append("//");
+
+ }
+
+ public String addData(int initialColumnSeparation,
+ int columnSeparation, List<String> data)
+ {
+ String line = EMPTY;
+ int index = 0;
+ for (String value : data)
{
- writer.println(entry.getKey() + " " + entry.getValue());
+ if (index == 0)
+ {
+ line += String.format("%" + initialColumnSeparation + "s", value);
+ }
+ else
+ {
+ line += String.format("%" + columnSeparation + "s", value);
+ }
+ index++;
}
- writer.println(
- "HMM" + " " + convertCharListToString(hmm.getSymbols()));
- writer.println("m->m m->i m->d i->m i->i d->m d->d");
- if (false == hmm.getAverageMatchStateEmissionProbabilities().isEmpty())
+ return line;
+ }
+
+ public static List<String> charListToStringList(List<Character> list)
+ {
+ List<String> strList = new ArrayList<>();
+ for (char value : list)
{
- writer.println("COMPO" + " " + convertDoubleListToString(
- hmm.getAverageMatchStateEmissionProbabilities()));
+ String strValue = Character.toString(value);
+ strList.add(strValue);
}
- writer.println(convertDoubleListToString(hmm.getInsertZeroEmissions()));
- writer.println(
- convertDoubleListToString(hmm.getBeginStateTransitions()));
+ return strList;
+ }
- for (Integer i = 0; i < hmm.getLength(); i++)
+ public static List<String> doubleListToStringList(List<Double> list,
+ int noOfDecimals)
+ {
+ List<String> strList = new ArrayList<>();
+ for (double value : list)
{
- String matchEmissionLine = i.toString() + " "; // adds node index
- matchEmissionLine += convertDoubleListToString(
- hmm.getMatchEmissions().get(i)); // adds match emissions
- matchEmissionLine += " "
- + hmm.getAlignmentColumnIndexes().get(i).toString(); // adds MAP
- // annotation
- matchEmissionLine += " "
- + hmm.getAnnotations().get(i).get("CONS").toString(); // adds CONS
- // annotation
- matchEmissionLine += " "
- + hmm.getAnnotations().get(i).get("RF").toString(); // adds RF
- // annotation
- matchEmissionLine += " "
- + hmm.getAnnotations().get(i).get("MM").toString(); // adds MM
- // annotation
- matchEmissionLine += " "
- + hmm.getAnnotations().get(i).get("CS").toString(); // adds CS
- // annotation
- writer.println(matchEmissionLine);
-
- writer.println(
- convertDoubleListToString(hmm.getInsertEmissions().get(i)));
- writer.println(
- convertDoubleListToString(hmm.getStateTransitions().get(i)));
- }
- writer.println("//");
+ String strValue;
+ if (value == Double.NEGATIVE_INFINITY)
+ {
+ strValue = "*";
+ }
+ else
+ {
+ strValue = String.format("%.5f", value);
+ }
- writer.close();
+ strList.add(strValue);
+ }
+ return strList;
}
- /**
- * converts an list of characters to a string with items separated by spaces
- *
- * @param list
- * character list to be converted
- * @return string value of char list
- */
- public String convertCharListToString(List<Character> list)
+ public static List<String> stringArrayToStringList(String[] array)
{
- String string = "";
- for (Character item : list)
+ List<String> list = new ArrayList<>();
+ for (String value : array)
{
- string = string + item.toString() + " ";
+ list.add(value);
}
- return string;
+ return list;
}
-
- /**
- * converts an list of doubles to a string with items separated by spaces
- *
- * @param list
- * double list to be converted
- * @return string value of double list
- */
- public String convertDoubleListToString(List<Double> list)
+
+ void appendModel(StringBuilder file)
{
- String string = "";
- for (Double item : list)
+ String symbolLine = "HMM";
+ List<Character> charSymbols = hmm.getSymbols();
+ List<String> strSymbols;
+ strSymbols = charListToStringList(charSymbols);
+ symbolLine += addData(11, 9, strSymbols);
+ file.append(symbolLine + NEW_LINE);
+
+ String transitionTypeLine = "";
+ List<String> transitionTypes;
+ transitionTypes = stringArrayToStringList(hmm.getTransitionTypes());
+ transitionTypeLine += addData(16, 9, transitionTypes);
+ file.append(transitionTypeLine + NEW_LINE);
+
+ int length = hmm.getLength();
+
+ for (int node = 0; node <= length; node++)
{
- if (item != null)
+ String matchLine;
+ if (node == 0)
{
- string = string + item.toString() + " ";
+ matchLine = String.format("%7s", "COMPO");
}
else
{
- string = string + "*" + " ";
+ matchLine = String.format("%7s", node);
+ }
+
+ List<String> strMatches;
+ List<Double> doubleMatches;
+ doubleMatches = hmm.getNode(node).getMatchEmissions();
+ strMatches = doubleListToStringList(doubleMatches, 5);
+ matchLine += addData(10, 9, strMatches);
+
+
+ if (node != 0)
+ {
+ matchLine += SPACE + hmm.getNodeAlignmentColumn(node);
+ matchLine += SPACE + hmm.getConsensusResidue(node);
+ matchLine += SPACE + hmm.getReferenceAnnotation(node);
+ matchLine += SPACE + hmm.getMaskedValue(node);
+ matchLine += SPACE + hmm.getConsensusStructure(node);
+
}
+ file.append(matchLine + NEW_LINE);
+
+ String insertLine = EMPTY;
+ List<String> strInserts;
+ List<Double> doubleInserts;
+ doubleInserts = hmm.getNode(node).getInsertEmissions();
+ strInserts = doubleListToStringList(doubleInserts, 5);
+ insertLine += addData(17, 9, strInserts);
+
+ file.append(insertLine + NEW_LINE);
+
+ String transitionLine = EMPTY;
+ List<String> strTransitions;
+ List<Double> doubleTransitions;
+ doubleTransitions = hmm.getNode(node).getStateTransitions();
+ strTransitions = doubleListToStringList(doubleTransitions, 5);
+ transitionLine += addData(17, 9, strTransitions);
+
+ file.append(transitionLine + NEW_LINE);
}
+ }
+
+ void appendFileProperties(StringBuilder file)
+ {
+ String line;
+
+ file.append(fileHeader + NEW_LINE);
+
+ line = String.format("%-5s %1s", "NAME", hmm.getName());
+ file.append((line + NEW_LINE));
- return string;
+ if (hmm.getAccessionNumber() != null)
+ {
+ line = String.format("%-5s %1s", "ACC", hmm.getAccessionNumber());
+ file.append((line + NEW_LINE));
+ }
+
+ if (hmm.getDescription() != null)
+ {
+ line = String.format("%-5s %1s", "DESC", hmm.getDescription());
+ file.append((line + NEW_LINE));
+ }
+ line = String.format("%-5s %1s", "LENG", hmm.getLength());
+ file.append((line + NEW_LINE));
+
+ if (hmm.getMaxInstanceLength() != null)
+ {
+ line = String.format("%-5s %1s", "MAXL", hmm.getMaxInstanceLength());
+ file.append((line + NEW_LINE));
+ }
+ line = String.format("%-5s %1s", "ALPH", hmm.getAlphabetType());
+ file.append((line + NEW_LINE));
+
+ line = String.format("%-5s %1s", "RF",
+ hmm.getFileProperties().get("RF"));
+ file.append((line + NEW_LINE));
+
+ line = String.format("%-5s %1s", "MM",
+ hmm.getFileProperties().get("MM"));
+ file.append((line + NEW_LINE));
+
+ line = String.format("%-5s %1s", "CONS",
+ hmm.getFileProperties().get("CONS"));
+ file.append((line + NEW_LINE));
+
+ line = String.format("%-5s %1s", "CS",
+ hmm.getFileProperties().get("CS"));
+ file.append((line + NEW_LINE));
+
+ line = String.format("%-5s %1s", "MAP",
+ hmm.getFileProperties().get("MAP"));
+ file.append((line + NEW_LINE));
+
+ if (hmm.getDate() != null)
+ {
+ line = String.format("%-5s %1s", "DATE", hmm.getDate());
+ file.append((line + NEW_LINE));
+ }
+ if (hmm.getNumberOfSequences() != null)
+ {
+ line = String.format("%-5s %1s", "NSEQ", hmm.getNumberOfSequences());
+ file.append((line + NEW_LINE));
+ }
+ if (hmm.getEffectiveNumberOfSequences() != null)
+ {
+ line = String.format("%-5s %1s", "EFFN",
+ hmm.getEffectiveNumberOfSequences());
+ file.append((line + NEW_LINE));
+ }
+ if (hmm.getCheckSum() != null)
+ {
+ line = String.format("%-5s %1s", "CKSUM", hmm.getCheckSum());
+ file.append((line + NEW_LINE));
+ }
+ if (hmm.getGatheringThreshold() != null)
+ {
+ line = String.format("%-5s %1s", "GA", hmm.getGatheringThreshold());
+ file.append((line + NEW_LINE));
+ }
+
+ if (hmm.getTrustedCutoff() != null)
+ {
+ line = String.format("%-5s %1s", "TC", hmm.getTrustedCutoff());
+ file.append((line + NEW_LINE));
+ }
+ if (hmm.getNoiseCutoff() != null)
+ {
+ line = String.format("%-5s %1s", "NC", hmm.getNoiseCutoff());
+ file.append((line + NEW_LINE));
+ }
+ if (hmm.getMSV() != null)
+ {
+ line = String.format("%-19s %18s", "STATS LOCAL MSV", hmm.getMSV());
+ file.append((line + NEW_LINE));
+
+ line = String.format("%-19s %18s", "STATS LOCAL VITERBI",
+ hmm.getViterbi());
+ file.append((line + NEW_LINE));
+
+ line = String.format("%-19s %18s", "STATS LOCAL FORWARD",
+ hmm.getForward());
+ file.append((line + NEW_LINE));
+ }
+ }
+
+
+
+ public static char charValue(String string)
+ {
+ char character;
+ character = string.charAt(0);
+ return character;
}
}
+++ /dev/null
-package jalview.datamodel;
-
-import org.testng.annotations.Test;
-
-public class HiddenMarkovModelTest
-{
- HiddenMarkovModel hmm = new HiddenMarkovModel();
-
- @Test
- public void testGetGatheringThresholdGA1()
- {
- hmm.put("GA1", "10.1");
- // assertEquals(hmm.getGatheringThresholdGA1(), 10.1);
- }
-
-}
\ No newline at end of file
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNull;
+import jalview.datamodel.HMMNode;
+import jalview.datamodel.HiddenMarkovModel;
+
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
public class HMMFileTest {
- HMMFile testFile = new HMMFile("H:/HMMERFile.txt");
-
- File file = new File("H:/HMMERFile.txt");
-
- HMMFile testFile2 = new HMMFile("H:/EmptyFile.txt");
-
- File file2 = new File("H:/EmptyFile.txt");
+ HMMFile fn3 = new HMMFile("H:/fn3.hmm");
- HMMFile testFile3 = new HMMFile("H:/HMMERFile2.txt");
+ HMMFile emptyFile = new HMMFile("H:/EmptyFile.hmm");
- File file3 = new File("H:/HMMERFile2.txt");
+ HMMFile pKinase = new HMMFile("H:/Pkinase.hmm");
- HMMFile testFile4 = new HMMFile("H:/HMMERFile.txt");
-
- File file4 = new File("H:/HMMERFile.txt");
+ HMMFile made1 = new HMMFile("H:/MADE1.hmm");
@Test
public void testParse() throws IOException
{
- HMMFile integrationTestFile = new HMMFile("H:/HMMTutorialExample.hmm");
- integrationTestFile.parse();
-
- // file properties
- assertEquals(integrationTestFile.hmm.getName(), "MADE1");
- assertEquals(integrationTestFile.hmm.getAccessionNumber(),
- "DF0000629.2");
- assertEquals(integrationTestFile.hmm.getDescription(),
- "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon");
- assertEquals(integrationTestFile.hmm.getLength().intValue(), 80);
- assertEquals(integrationTestFile.hmm.getMaxInstanceLength().intValue(),
- 426);
- assertEquals(integrationTestFile.hmm.getAlphabetType(), "DNA");
- assertEquals(integrationTestFile.hmm.getReferenceAnnotationFlag(),
- true);
- assertEquals(integrationTestFile.hmm.getModelMaskedFlag(), false);
- assertEquals(
- integrationTestFile.hmm.getConsensusResidueAnnotationFlag(),
+
+ pKinase.parse();
+
+ assertEquals(pKinase.hmm.getName(), "Pkinase");
+ assertEquals(pKinase.hmm.getAccessionNumber(), "PF00069.17");
+ assertEquals(pKinase.hmm.getDescription(), "Protein kinase domain");
+ assertEquals(pKinase.hmm.getLength().intValue(), 260);
+ assertNull(pKinase.hmm.getMaxInstanceLength());
+ assertEquals(pKinase.hmm.getAlphabetType(), "amino");
+ assertEquals(pKinase.hmm.referenceAnnotationIsActive(), false);
+ assertEquals(pKinase.hmm.maskValueIsActive(), false);
+ assertEquals(pKinase.hmm.consensusResidueIsActive(), true);
+ assertEquals(pKinase.hmm.consensusStructureIsActive(),
true);
- assertEquals(
- integrationTestFile.hmm.getConsensusStructureAnnotationFlag(),
- false);
- assertEquals(integrationTestFile.hmm.getMapAnnotationFlag(), true);
- assertEquals(integrationTestFile.hmm.getDate(),
- "Tue Feb 19 20:33:41 2013");
- assertNull(integrationTestFile.hmm.getCommandLineLog());
- assertEquals(integrationTestFile.hmm.getSequenceNumber().intValue(),
- 1997);
- assertEquals(integrationTestFile.hmm.getEffectiveSequenceNumber(),
- 3.911818, 4d);
- assertEquals(integrationTestFile.hmm.getCheckSum().longValue(),
- 3015610723l);
- assertNull(integrationTestFile.hmm.getGatheringThreshold1());
- assertNull(integrationTestFile.hmm.getGatheringThreshold2());
- assertNull(integrationTestFile.hmm.getTrustedCutoff1());
- assertNull(integrationTestFile.hmm.getTrustedCutoff2());
- assertNull(integrationTestFile.hmm.getNoiseCutoff1());
- assertNull(integrationTestFile.hmm.getNoiseCutoff2());
- assertEquals(integrationTestFile.hmm.getSlopeOfDistribution("MSV"),
- -8.5786, 4d);
- assertEquals(integrationTestFile.hmm.getSlopeOfDistribution("VITERBI"),
- -9.3632, 4d);
- assertEquals(integrationTestFile.hmm.getSlopeOfDistribution("FORWARD"),
- -3.4823, 4d);
- assertEquals(integrationTestFile.hmm.getLocationOfDistribution("MSV"),
- 0.71858, 4d);
- assertEquals(
- integrationTestFile.hmm.getLocationOfDistribution("VITERBI"),
- 0.71858, 4d);
- assertEquals(
- integrationTestFile.hmm.getLocationOfDistribution("FORWARD"),
- 0.71858, 4d);
+ assertEquals(pKinase.hmm.mapIsActive(), true);
+ assertEquals(pKinase.hmm.getDate(), "Thu Jun 16 11:44:06 2011");
+ assertNull(pKinase.hmm.getCommandLineLog());
+ assertEquals(pKinase.hmm.getNumberOfSequences().intValue(), 54);
+ assertEquals(pKinase.hmm.getEffectiveNumberOfSequences(), 3.358521, 4d);
+ assertEquals(pKinase.hmm.getCheckSum().longValue(), 3106786190l);
+ assertEquals(pKinase.hmm.getGatheringThreshold(), "70.30 70.30");
+ assertEquals(pKinase.hmm.getTrustedCutoff(), "70.30 70.30");
+ assertEquals(pKinase.hmm.getNoiseCutoff(), "70.20 70.20");
List<Character> symbols = new ArrayList<>();
symbols.add('A');
symbols.add('C');
+ symbols.add('D');
+ symbols.add('E');
+ symbols.add('F');
symbols.add('G');
+ symbols.add('H');
+ symbols.add('I');
+ symbols.add('K');
+ symbols.add('L');
+ symbols.add('M');
+ symbols.add('N');
+ symbols.add('P');
+ symbols.add('Q');
+ symbols.add('R');
+ symbols.add('S');
symbols.add('T');
-
- assertEquals(integrationTestFile.hmm.getSymbols(), symbols);
-
- List<Double> averages = new ArrayList<>();
- averages.add(1.24257);
- averages.add(1.59430);
- averages.add(1.62906);
- averages.add(1.16413);
-
- assertEquals(integrationTestFile.hmm
- .getAverageMatchStateEmissionProbabilities(), averages);
-
- assertEquals(integrationTestFile.hmm.getInsertZeroEmissions().get(2),
- 1.38629);
- assertEquals(integrationTestFile.hmm.getInsertZeroEmissions().get(3),
- 1.38629);
-
- assertEquals(integrationTestFile.hmm.getBeginStateTransitions().get(1),
- 3.94183);
- assertEquals(integrationTestFile.hmm.getBeginStateTransitions().get(4),
- 0.26236);
-
- assertEquals(integrationTestFile.hmm.getMatchEmissions().get(1).get(1),
- 2.37873);
- assertEquals(integrationTestFile.hmm.getMatchEmissions().get(8).get(0),
- 2.16916);
- assertEquals(integrationTestFile.hmm.getMatchEmissions().get(12).get(2),
- 2.32214);
- assertEquals(integrationTestFile.hmm.getMatchEmissions().get(43).get(3),
- 2.60783);
- assertEquals(integrationTestFile.hmm.getMatchEmissions().get(54).get(2),
- 2.46442);
- assertEquals(integrationTestFile.hmm.getMatchEmissions().get(23).get(2),
- 2.50691);
- assertEquals(integrationTestFile.hmm.getMatchEmissions().get(56).get(1),
- 2.32720);
- assertEquals(integrationTestFile.hmm.getMatchEmissions().get(65).get(0),
- 2.79349);
- assertEquals(integrationTestFile.hmm.getMatchEmissions().get(21).get(0),
- 2.54484);
- assertEquals(integrationTestFile.hmm.getMatchEmissions().get(79).get(3),
- 2.88183);
- assertEquals(integrationTestFile.hmm.getMatchEmissions().get(76).get(3),
- 1.84373);
-
- assertEquals(integrationTestFile.hmm.getInsertEmissions().get(23).get(0),
- 1.35803);
- assertEquals(integrationTestFile.hmm.getInsertEmissions().get(54).get(3),
- 1.46331);
- assertEquals(integrationTestFile.hmm.getInsertEmissions().get(65).get(3),
- 1.39101);
- assertEquals(integrationTestFile.hmm.getInsertEmissions().get(57).get(2),
- 1.38112);
- assertEquals(integrationTestFile.hmm.getInsertEmissions().get(42).get(1),
- 1.58747);
- assertEquals(integrationTestFile.hmm.getInsertEmissions().get(12).get(3),
- 1.38740);
- assertEquals(integrationTestFile.hmm.getInsertEmissions().get(6).get(1),
- 1.38524);
- assertEquals(integrationTestFile.hmm.getInsertEmissions().get(59).get(0),
- 1.03649);
- assertEquals(integrationTestFile.hmm.getInsertEmissions().get(78).get(0),
- 1.38629);
- assertEquals(integrationTestFile.hmm.getInsertEmissions().get(17).get(2),
- 1.39937);
- assertEquals(integrationTestFile.hmm.getInsertEmissions().get(0).get(2),
- 1.38629);
-
- assertEquals(
- integrationTestFile.hmm.getStateTransitions().get(13).get(1),
- 4.02482);
- assertEquals(
- integrationTestFile.hmm.getStateTransitions().get(64).get(2),
- 4.03073);
- assertEquals(
- integrationTestFile.hmm.getStateTransitions().get(45).get(6),
- 0.42814);
- assertEquals(
- integrationTestFile.hmm.getStateTransitions().get(71).get(4),
- 0.28542);
- assertEquals(
- integrationTestFile.hmm.getStateTransitions().get(32).get(5),
- 1.18729);
- assertEquals(
- integrationTestFile.hmm.getStateTransitions().get(9).get(0),
- 0.03536);
- assertEquals(
- integrationTestFile.hmm.getStateTransitions().get(0).get(3),
- 1.46634);
- assertEquals(
- integrationTestFile.hmm.getStateTransitions().get(31).get(6),
- 0.44749);
- assertNull(
- integrationTestFile.hmm.getStateTransitions().get(79).get(2));
- assertEquals(
- integrationTestFile.hmm.getStateTransitions().get(3).get(1),
- 4.05203);
- assertEquals(
- integrationTestFile.hmm.getStateTransitions().get(16).get(4),
- 0.26771);
-
- assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(0)
- .intValue(), 1);
- assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(9)
- .intValue(), 18);
- assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(12)
- .intValue(), 28);
- assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(56)
- .intValue(), 999);
- assertEquals(integrationTestFile.hmm.getAlignmentColumnIndexes().get(79)
- .intValue(), 1112);
-
- assertEquals(integrationTestFile.hmm.getAnnotations().get(0).get("RF")
- .charValue(), 'x');
- assertEquals(integrationTestFile.hmm.getAnnotations().get(3).get("CS")
- .charValue(), '-');
- assertEquals(integrationTestFile.hmm.getAnnotations().get(65)
- .get("CONS").charValue(), 't');
- assertEquals(integrationTestFile.hmm.getAnnotations().get(23).get("MM")
- .charValue(), '-');
- assertEquals(integrationTestFile.hmm.getAnnotations().get(56).get("MM")
- .charValue(), '-');
- assertEquals(integrationTestFile.hmm.getAnnotations().get(76).get("RF")
- .charValue(), 'x');
- assertEquals(integrationTestFile.hmm.getAnnotations().get(79)
- .get("CONS").charValue(), 'a');
+ symbols.add('V');
+ symbols.add('W');
+ symbols.add('Y');
+
+ assertEquals(pKinase.hmm.getSymbols(), symbols);
+
+ assertEquals(pKinase.hmm.getMatchEmission(0, 19), 3.43274);
+ assertEquals(pKinase.hmm.getMatchEmission(12, 12), 4.33979);
+ assertEquals(pKinase.hmm.getMatchEmission(23, 7), 3.65600);
+ assertEquals(pKinase.hmm.getMatchEmission(54, 1), 4.76187);
+ assertEquals(pKinase.hmm.getMatchEmission(79, 0), 2.81579);
+ assertEquals(pKinase.hmm.getMatchEmission(100, 0), 1.86496);
+ assertEquals(pKinase.hmm.getMatchEmission(112, 14), 2.77179);
+ assertEquals(pKinase.hmm.getMatchEmission(143, 17), 5.10478);
+ assertEquals(pKinase.hmm.getMatchEmission(156, 4), 4.69372);
+ assertEquals(pKinase.hmm.getMatchEmission(178, 3), 2.52594);
+ assertEquals(pKinase.hmm.getMatchEmission(210, 2), 4.23598);
+ assertEquals(pKinase.hmm.getMatchEmission(260, 19), 3.81122);
+
+ assertEquals(pKinase.hmm.getInsertEmission(2, 1), 4.42225);
+ assertEquals(pKinase.hmm.getInsertEmission(15, 6), 3.72501);
+ assertEquals(pKinase.hmm.getInsertEmission(22, 9), 2.69355);
+ assertEquals(pKinase.hmm.getInsertEmission(57, 2), 2.77519);
+ assertEquals(pKinase.hmm.getInsertEmission(62, 14), 2.89801);
+ assertEquals(pKinase.hmm.getInsertEmission(95, 17), 2.98532);
+ assertEquals(pKinase.hmm.getInsertEmission(105, 4), 3.46354);
+ assertEquals(pKinase.hmm.getInsertEmission(134, 1), 4.42225);
+ assertEquals(pKinase.hmm.getInsertEmission(143, 0), 2.68618);
+ assertEquals(pKinase.hmm.getInsertEmission(152, 16), 2.77519);
+ assertEquals(pKinase.hmm.getInsertEmission(203, 16), 2.77519);
+ assertEquals(pKinase.hmm.getInsertEmission(255, 12), 2.73739);
+
+ assertEquals(pKinase.hmm.getStateTransition(0, 6),
+ Double.NEGATIVE_INFINITY);
+ assertEquals(pKinase.hmm.getStateTransition(3, 6), 0.95510);
+ assertEquals(pKinase.hmm.getStateTransition(29, 3), 0.61958);
+ assertEquals(pKinase.hmm.getStateTransition(46, 4), 0.77255);
+ assertEquals(pKinase.hmm.getStateTransition(53, 1), 5.01631);
+ assertEquals(pKinase.hmm.getStateTransition(79, 2), 5.73865);
+ assertEquals(pKinase.hmm.getStateTransition(101, 2), 5.73865);
+ assertEquals(pKinase.hmm.getStateTransition(120, 5), 0.48576);
+ assertEquals(pKinase.hmm.getStateTransition(146, 5), 0.70219);
+ assertEquals(pKinase.hmm.getStateTransition(169, 3), 1.23224);
+ assertEquals(pKinase.hmm.getStateTransition(209, 0), 0.01003);
+ assertEquals(pKinase.hmm.getStateTransition(243, 1), 5.01631);
+
+ assertEquals(pKinase.hmm.getNodeAlignmentColumn(3).intValue(), 3);
+ assertEquals(pKinase.hmm.getReferenceAnnotation(7), '-');
+ assertEquals(pKinase.hmm.getConsensusResidue(23), 't');
+ assertEquals(pKinase.hmm.getMaskedValue(30), '-');
+ assertEquals(pKinase.hmm.getConsensusStructure(56), 'S');
+
+ assertEquals(pKinase.hmm.getNodeAlignmentColumn(78).intValue(), 136);
+ assertEquals(pKinase.hmm.getReferenceAnnotation(93), '-');
+ assertEquals(pKinase.hmm.getConsensusResidue(145), 'a');
+ assertEquals(pKinase.hmm.getMaskedValue(183), '-');
+ assertEquals(pKinase.hmm.getConsensusStructure(240), 'H');
}
@Test
public void testParseFileProperties() throws IOException
{
- FileReader fr = new FileReader(file);
+ FileReader fr = new FileReader(fn3.getDataObject());
BufferedReader br = new BufferedReader(fr);
- testFile.parseFileProperties(br);
+ fn3.parseFileProperties(br);
+ HiddenMarkovModel testHMM = new HiddenMarkovModel();
+ testHMM = fn3.getHmm();
br.close();
fr.close();
- assertEquals(testFile.hmm.getName(), "fn3");
- assertEquals(testFile.hmm.getAccessionNumber(), "PF00041.13");
- assertEquals(testFile.hmm.getDescription(),
+
+ assertEquals(testHMM.getName(), "fn3");
+ assertEquals(testHMM.getAccessionNumber(), "PF00041.13");
+ assertEquals(testHMM.getDescription(),
"Fibronectin type III domain");
- assertEquals(testFile.hmm.getLength().intValue(), 4);
- assertNull(testFile.hmm.getMaxInstanceLength());
- assertEquals(testFile.hmm.getAlphabetType(), "amino");
- assertEquals(testFile.hmm.getReferenceAnnotationFlag(), false);
- assertEquals(testFile.hmm.getModelMaskedFlag(), false);
- assertEquals(testFile.hmm.getConsensusResidueAnnotationFlag(), true);
- assertEquals(testFile.hmm.getConsensusStructureAnnotationFlag(), true);
- assertEquals(testFile.hmm.getMapAnnotationFlag(), true);
- assertEquals(testFile.hmm.getDate(), "Fri Feb 15 06:04:13 2013");
- assertNull(testFile.hmm.getCommandLineLog());
- assertEquals(testFile.hmm.getSequenceNumber().intValue(), 106);
- assertEquals(testFile.hmm.getEffectiveSequenceNumber(), 11.415833, 4d);
- assertEquals(testFile.hmm.getCheckSum().longValue(), 3564431818l);
- assertEquals(testFile.hmm.getGatheringThreshold1(), 8.00, 2d);
- assertEquals(testFile.hmm.getGatheringThreshold2(), 7.20, 2d);
- assertEquals(testFile.hmm.getTrustedCutoff1(), 8.00, 2d);
- assertEquals(testFile.hmm.getTrustedCutoff2(), 7.20, 2d);
- assertEquals(testFile.hmm.getNoiseCutoff1(), 7.90, 2d);
- assertEquals(testFile.hmm.getNoiseCutoff2(), 7.90, 2d);
- assertEquals(testFile.hmm.getSlopeOfDistribution("MSV"), -9.4043, 4d);
- assertEquals(testFile.hmm.getSlopeOfDistribution("VITERBI"), -9.7737,
- 4d);
- assertEquals(testFile.hmm.getSlopeOfDistribution("FORWARD"), -3.8341,
- 4d);
- assertEquals(testFile.hmm.getLocationOfDistribution("MSV"), 0.71847,
- 4d);
- assertEquals(testFile.hmm.getLocationOfDistribution("VITERBI"), 0.71847,
- 4d);
- assertEquals(testFile.hmm.getLocationOfDistribution("FORWARD"), 0.71847,
- 4d);
-
- FileReader fr2 = new FileReader(file2);
+ assertEquals(testHMM.getLength().intValue(), 86);
+ assertNull(testHMM.getMaxInstanceLength());
+ assertEquals(testHMM.getAlphabetType(), "amino");
+ assertEquals(testHMM.referenceAnnotationIsActive(), false);
+ assertEquals(testHMM.maskValueIsActive(), false);
+ assertEquals(testHMM.consensusResidueIsActive(), true);
+ assertEquals(testHMM.consensusStructureIsActive(), true);
+ assertEquals(testHMM.mapIsActive(), true);
+ assertEquals(testHMM.getDate(), "Fri Jun 20 08:22:31 2014");
+ assertNull(testHMM.getCommandLineLog());
+ assertEquals(testHMM.getNumberOfSequences().intValue(), 106);
+ assertEquals(testHMM.getEffectiveNumberOfSequences(), 11.415833, 4d);
+ assertEquals(testHMM.getCheckSum().longValue(), 3564431818l);
+ assertEquals(testHMM.getGatheringThreshold(), "8.00 7.20");
+ assertEquals(testHMM.getTrustedCutoff(), "8.00 7.20");
+ assertEquals(testHMM.getNoiseCutoff(), "7.90 7.90");
+ assertEquals(testHMM.getViterbi(), "-9.7737 0.71847");
+ assertEquals(testHMM.getMSV(), "-9.4043 0.71847");
+ assertEquals(testHMM.getForward(), "-3.8341 0.71847");
+
+ FileReader fr2 = new FileReader(emptyFile.getDataObject());
BufferedReader br2 = new BufferedReader(fr2);
- testFile2.parseFileProperties(br2);
+ emptyFile.parseFileProperties(br2);
+ testHMM = emptyFile.getHmm();
br2.close();
fr2.close();
- assertNull(testFile2.hmm.getName());
- assertNull(testFile2.hmm.getAccessionNumber());
- assertNull(testFile2.hmm.getDescription());
- assertNull(testFile2.hmm.getLength());
- assertNull(testFile2.hmm.getMaxInstanceLength());
- assertNull(testFile2.hmm.getAlphabetType());
- assertEquals(testFile2.hmm.getReferenceAnnotationFlag(), false);
- assertEquals(testFile2.hmm.getModelMaskedFlag(), false);
- assertEquals(testFile2.hmm.getConsensusResidueAnnotationFlag(), false);
- assertEquals(testFile2.hmm.getConsensusStructureAnnotationFlag(),
+ assertNull(testHMM.getName());
+ assertNull(testHMM.getAccessionNumber());
+ assertNull(testHMM.getDescription());
+ assertNull(testHMM.getLength());
+ assertNull(testHMM.getMaxInstanceLength());
+ assertNull(testHMM.getAlphabetType());
+ assertEquals(testHMM.referenceAnnotationIsActive(), false);
+ assertEquals(testHMM.maskValueIsActive(), false);
+ assertEquals(testHMM.consensusResidueIsActive(), false);
+ assertEquals(testHMM.consensusStructureIsActive(),
false);
- assertEquals(testFile2.hmm.getMapAnnotationFlag(), false);
- assertNull(testFile2.hmm.getDate());
- assertNull(testFile2.hmm.getCommandLineLog());
- assertNull(testFile2.hmm.getSequenceNumber());
- assertNull(testFile2.hmm.getEffectiveSequenceNumber());
- assertNull(testFile2.hmm.getCheckSum());
- assertNull(testFile2.hmm.getGatheringThreshold1());
- assertNull(testFile2.hmm.getGatheringThreshold2());
- assertNull(testFile2.hmm.getTrustedCutoff1());
- assertNull(testFile2.hmm.getTrustedCutoff2());
- assertNull(testFile2.hmm.getNoiseCutoff1());
- assertNull(testFile2.hmm.getNoiseCutoff2());
- assertNull(testFile2.hmm.getSlopeOfDistribution("MSV"));
- assertNull(testFile2.hmm.getSlopeOfDistribution("VITERBI"));
- assertNull(testFile2.hmm.getSlopeOfDistribution("FORWARD"));
- assertNull(testFile2.hmm.getLocationOfDistribution("MSV"));
- assertNull(testFile2.hmm.getLocationOfDistribution("VITERBI"));
- assertNull(testFile2.hmm.getLocationOfDistribution("FORWARD"));
-
- FileReader fr3 = new FileReader(file3);
+ assertEquals(testHMM.mapIsActive(), false);
+ assertNull(testHMM.getDate());
+ assertNull(testHMM.getCommandLineLog());
+ assertNull(testHMM.getNumberOfSequences());
+ assertNull(testHMM.getEffectiveNumberOfSequences());
+ assertNull(testHMM.getCheckSum());
+ assertNull(testHMM.getGatheringThreshold());
+ assertNull(testHMM.getGatheringThreshold());
+ assertNull(testHMM.getTrustedCutoff());
+ assertNull(testHMM.getTrustedCutoff());
+ assertNull(testHMM.getNoiseCutoff());
+ assertNull(testHMM.getNoiseCutoff());
+ assertNull(testHMM.getViterbi());
+ assertNull(testHMM.getMSV());
+ assertNull(testHMM.getForward());
+
+ FileReader fr3 = new FileReader(made1.getDataObject());
BufferedReader br3 = new BufferedReader(fr3);
- testFile3.parseFileProperties(br3);
+ made1.parseFileProperties(br3);
+ testHMM = made1.getHmm();
br3.close();
fr3.close();
- assertEquals(testFile3.hmm.getName(), "th4");
- assertEquals(testFile3.hmm.getAccessionNumber(), "PF99041.16");
- assertEquals(testFile3.hmm.getDescription(),
- "Fibronectin type I domain");
- assertEquals(testFile3.hmm.getLength().intValue(), 10);
- assertEquals(testFile3.hmm.getMaxInstanceLength().intValue(), 6);
- assertEquals(testFile3.hmm.getAlphabetType(), "amino");
- assertEquals(testFile3.hmm.getReferenceAnnotationFlag(), true);
- assertEquals(testFile3.hmm.getModelMaskedFlag(), false);
- assertEquals(testFile3.hmm.getConsensusResidueAnnotationFlag(), false);
- assertEquals(testFile3.hmm.getConsensusStructureAnnotationFlag(),
- false);
- assertEquals(testFile3.hmm.getMapAnnotationFlag(), false);
- assertEquals(testFile3.hmm.getDate(), "Tue Jan 01 11:02:59 2000");
- assertEquals(testFile3.hmm.getCommandLineLog(), "this is the log");
- assertEquals(testFile3.hmm.getSequenceNumber().intValue(), 567);
- assertEquals(testFile3.hmm.getEffectiveSequenceNumber(), 15.964683, 4d);
- assertEquals(testFile3.hmm.getCheckSum().longValue(), 9485949654l);
- assertEquals(testFile3.hmm.getGatheringThreshold1(), 6.40, 2d);
- assertEquals(testFile3.hmm.getGatheringThreshold2(), 7.20, 2d);
- assertEquals(testFile3.hmm.getTrustedCutoff1(), 2.40, 2d);
- assertEquals(testFile3.hmm.getTrustedCutoff2(), 7.00, 2d);
- assertNull(testFile3.hmm.getNoiseCutoff1());
- assertNull(testFile3.hmm.getNoiseCutoff2());
- assertNull(testFile3.hmm.getSlopeOfDistribution("MSV"));
- assertNull(testFile3.hmm.getSlopeOfDistribution("VITERBI"));
- assertNull(testFile3.hmm.getSlopeOfDistribution("FORWARD"));
- assertNull(testFile3.hmm.getLocationOfDistribution("MSV"));
- assertNull(testFile3.hmm.getLocationOfDistribution("VITERBI"));
- assertNull(testFile3.hmm.getLocationOfDistribution("FORWARD"));
- }
-
- /**
- * @Test public void testParseModel() throws IOException { HiddenMarkovModel
- * hmm = new HiddenMarkovModel(); HMMFile testFile = new HMMFile(hmm,
- * "H:/HMMERFile.txt"); File file = new File("H:/HMMERFile.txt");
- * FileReader fr = new FileReader(file); BufferedReader br = new
- * BufferedReader(fr); testFile.parseFileProperties(br);
- * testFile.parseModel(br); br.close(); fr.close();
- *
- * }
- **/
-
- @Test
- public void testGetTransitionType()
- {
-
- assertEquals(testFile.getTransitionType("mm").intValue(), 0);
- assertEquals(testFile.getTransitionType("mi").intValue(), 1);
- assertEquals(testFile.getTransitionType("md").intValue(), 2);
- assertEquals(testFile.getTransitionType("im").intValue(), 3);
- assertEquals(testFile.getTransitionType("ii").intValue(), 4);
- assertEquals(testFile.getTransitionType("dm").intValue(), 5);
- assertEquals(testFile.getTransitionType("dd").intValue(), 6);
- assertNull(testFile.getTransitionType("df"));
-
- }
-
- @Test
- public void testReadStats()
- {
- Scanner scanner = new Scanner("LOCAL MSV 5.6943 6.2313");
- testFile.readStats(scanner);
- assertEquals(testFile.hmm.getEValueStatistics().get("MSV")
- .getAlignmentModeConfiguration(), "LOCAL");
- assertEquals(
- testFile.hmm.getEValueStatistics().get("MSV")
- .getSlopeOfDistribution(),
- 5.6943, 4d);
- assertEquals(testFile.hmm.getEValueStatistics().get("MSV")
- .getLocationOfDistribution(), 6.2313, 4d);
- scanner.close();
-
- Scanner scanner2 = new Scanner("GLOBAL VITERBI 3 -0.234");
- testFile.readStats(scanner2);
- assertEquals(testFile.hmm.getEValueStatistics().get("VITERBI")
- .getAlignmentModeConfiguration(), "GLOBAL");
- assertEquals(testFile.hmm.getEValueStatistics().get("VITERBI")
- .getSlopeOfDistribution(), 3, 2d);
- assertEquals(testFile.hmm.getEValueStatistics().get("VITERBI")
- .getLocationOfDistribution(), -0.234, 4d);
- scanner.close();
+ assertEquals(testHMM.getName(), "MADE1");
+ assertEquals(testHMM.getAccessionNumber(), "DF0000629.2");
+ assertEquals(testHMM.getDescription(),
+ "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon");
+ assertEquals(testHMM.getLength().intValue(), 80);
+ assertEquals(testHMM.getMaxInstanceLength().intValue(), 426);
+ assertEquals(testHMM.getAlphabetType(), "DNA");
+ assertEquals(testHMM.referenceAnnotationIsActive(), true);
+ assertEquals(testHMM.maskValueIsActive(), false);
+ assertEquals(testHMM.consensusResidueIsActive(), true);
+ assertEquals(testHMM.consensusStructureIsActive(), false);
+ assertEquals(testHMM.mapIsActive(), true);
+ assertEquals(testHMM.getDate(), "Tue Feb 19 20:33:41 2013");
+ assertNull(testHMM.getCommandLineLog());
+ assertEquals(testHMM.getNumberOfSequences().intValue(), 1997);
+ assertEquals(testHMM.getEffectiveNumberOfSequences(), 3.911818, 4d);
+ assertEquals(testHMM.getCheckSum().longValue(), 3015610723l);
+ assertEquals(testHMM.getGatheringThreshold(), "2.324 4.234");
+ assertEquals(testHMM.getTrustedCutoff(), "2.343 1.212");
+ assertEquals(testHMM.getNoiseCutoff(), "2.354 5.456");
+ assertEquals(testHMM.getViterbi(), "-9.3632 0.71858");
+ assertEquals(testHMM.getMSV(), "-8.5786 0.71858");
+ assertEquals(testHMM.getForward(), "-3.4823 0.71858");
}
@Test
- public void testParseBeginNodeData() throws IOException
+ public void testGetTransitionType()
{
- FileReader fr = new FileReader(file4);
- BufferedReader br = new BufferedReader(fr);
- for (int i = 0; i < 24; i++)
- {
- br.readLine(); // this is done to reach the begin node
- // data in the file
- }
- testFile4.hmm.fillSymbols("HMM A B C D E F G H I");
- testFile4.parseBeginNodeData(br);
- ArrayList<Double> emissions = new ArrayList<>();
- ArrayList<Double> transitions = new ArrayList<>();
-
- emissions.add(2.68618);
- emissions.add(4.42225);
- emissions.add(2.77519);
- emissions.add(2.73123);
- emissions.add(3.46354);
- emissions.add(2.40513);
- emissions.add(3.72494);
- emissions.add(3.29354);
- emissions.add(3.61503);
-
- transitions.add(0.00338);
- transitions.add(6.08833);
- transitions.add(6.81068);
- transitions.add(0.61958);
- transitions.add(0.77255);
- transitions.add(0.00000);
- transitions.add(null);
-
- assertEquals(testFile4.hmm.getInsertZeroEmissions(), emissions);
- assertEquals(testFile4.hmm.getBeginStateTransitions(), transitions);
+ assertEquals(fn3.getTransitionType("mm").intValue(), 0);
+ assertEquals(fn3.getTransitionType("mi").intValue(), 1);
+ assertEquals(fn3.getTransitionType("md").intValue(), 2);
+ assertEquals(fn3.getTransitionType("im").intValue(), 3);
+ assertEquals(fn3.getTransitionType("ii").intValue(), 4);
+ assertEquals(fn3.getTransitionType("dm").intValue(), 5);
+ assertEquals(fn3.getTransitionType("dd").intValue(), 6);
+ assertNull(fn3.getTransitionType("df"));
}
filledArray.add(35.3523645);
filledArray.add(12345.3564);
filledArray.add(1.4);
+
assertEquals(HMMFile.fillList(scanner2, 5), filledArray);
scanner2.close();
@Test
public void testParseModel() throws IOException
{
- FileReader fr = new FileReader(file);
+ FileReader fr = new FileReader(made1.getDataObject());
BufferedReader br = new BufferedReader(fr);
- for (int i = 0; i < 23; i++)
+ HiddenMarkovModel testHMM = new HiddenMarkovModel();
+ for (int i = 0; i < 24; i++)
{
- br.readLine(); // this is done to reach the begin node
- // data in the file
+ br.readLine();
}
+ made1.parseModel(br);
+ testHMM = made1.getHmm();
+ br.close();
+ fr.close();
+
+ assertEquals(testHMM.getMatchEmission(0, 2), 1.62906);
+ assertEquals(testHMM.getMatchEmission(2, 1), 2.37873);
+ assertEquals(testHMM.getMatchEmission(12, 2), 2.61355);
+ assertEquals(testHMM.getMatchEmission(26, 0), 1.86925);
+ assertEquals(testHMM.getMatchEmission(32, 3), 2.58263);
+ assertEquals(testHMM.getMatchEmission(59, 3), 2.20507);
+ assertEquals(testHMM.getMatchEmission(63, 0), 0.41244);
+ assertEquals(testHMM.getMatchEmission(69, 1), 3.17398);
+ assertEquals(testHMM.getMatchEmission(76, 2), 2.65861);
+
+ assertEquals(testHMM.getInsertEmission(0, 1), 1.38629);
+ assertEquals(testHMM.getInsertEmission(1, 2), 1.38629);
+ assertEquals(testHMM.getInsertEmission(31, 3), 1.28150);
+ assertEquals(testHMM.getInsertEmission(43, 0), 1.32290);
+ assertEquals(testHMM.getInsertEmission(48, 2), 1.52606);
+ assertEquals(testHMM.getInsertEmission(52, 1), 1.62259);
+ assertEquals(testHMM.getInsertEmission(67, 0), 1.38141);
+ assertEquals(testHMM.getInsertEmission(70, 3), 1.38629);
+ assertEquals(testHMM.getInsertEmission(80, 3), 1.38629);
+
+ assertEquals(testHMM.getStateTransition(2, 0), 0.03725);
+ assertEquals(testHMM.getStateTransition(6, 1), 3.89715);
+ assertEquals(testHMM.getStateTransition(9, 3), 1.38021);
+ assertEquals(testHMM.getStateTransition(20, 4), 0.23815);
+ assertEquals(testHMM.getStateTransition(34, 6), 0.33363);
+ assertEquals(testHMM.getStateTransition(46, 5), 1.05474);
+ assertEquals(testHMM.getStateTransition(57, 6), 0.31164);
+ assertEquals(testHMM.getStateTransition(68, 2), 3.99242);
+ assertEquals(testHMM.getStateTransition(80, 6),
+ Double.NEGATIVE_INFINITY);
- testFile.parseModel(br);
- assertEquals(testFile.hmm.getMatchEmissions().get(0).get(0), 3.16986);
- assertEquals(testFile.hmm.getMatchEmissions().get(0).get(3), 3.29953);
- assertEquals(testFile.hmm.getMatchEmissions().get(1).get(2), 2.24744);
- assertEquals(testFile.hmm.getMatchEmissions().get(1).get(8), 4.25623);
- assertEquals(testFile.hmm.getMatchEmissions().get(2).get(5), 3.48010);
- assertEquals(testFile.hmm.getMatchEmissions().get(2).get(6), 4.51877);
- assertEquals(testFile.hmm.getMatchEmissions().get(3).get(4), 5.26587);
- assertEquals(testFile.hmm.getMatchEmissions().get(3).get(8), 4.99111);
- assertEquals(testFile.hmm.getInsertEmissions().get(0).get(3), 2.73088);
- assertEquals(testFile.hmm.getInsertEmissions().get(0).get(6), 3.72505);
- assertEquals(testFile.hmm.getInsertEmissions().get(1).get(2), 2.77519);
- assertEquals(testFile.hmm.getInsertEmissions().get(1).get(8), 3.61503);
- assertEquals(testFile.hmm.getInsertEmissions().get(2).get(0), 2.68618);
- assertEquals(testFile.hmm.getInsertEmissions().get(2).get(8), 3.61503);
- assertEquals(testFile.hmm.getInsertEmissions().get(3).get(2), 2.77519);
- assertEquals(testFile.hmm.getInsertEmissions().get(3).get(3), 2.73123);
}
@Test
public void testParseAnnotations()
{
- testFile4.hmm.setMapAnnotationFlag(true);
- Scanner scanner = new Scanner("1 t - - -");
- testFile4.parseAnnotations(scanner, 0);
-
- assertEquals(
- testFile4.hmm.getAlignmentColumnIndexes().get(0).intValue(), 1);
- assertEquals(
- testFile4.hmm.getAnnotations().get(0).get("CONS").charValue(),
- 't');
- assertEquals(
- testFile4.hmm.getAnnotations().get(0).get("RF").charValue(),
- '-');
- assertEquals(
- testFile4.hmm.getAnnotations().get(0).get("MM").charValue(),
- '-');
- assertEquals(
- testFile4.hmm.getAnnotations().get(0).get("CS").charValue(),
- '-');
-
- testFile4.hmm.setMapAnnotationFlag(false);
- testFile4.hmm.getAlignmentColumnIndexes().clear();
- testFile4.hmm.getAnnotations().clear();
- Scanner scanner2 = new Scanner("- S g C Y");
- testFile4.parseAnnotations(scanner2, 0);
-
- assertEquals(
- testFile4.hmm.getAnnotations().get(0).get("CONS").charValue(),
- 'S');
- assertEquals(
- testFile4.hmm.getAnnotations().get(0).get("RF").charValue(),
- 'g');
- assertEquals(
- testFile4.hmm.getAnnotations().get(0).get("MM").charValue(),
- 'C');
- assertEquals(
- testFile4.hmm.getAnnotations().get(0).get("CS").charValue(),
- 'Y');
+ HMMFile testFile = new HMMFile("H:/EmptyFile.hmm");
+ testFile.hmm.getNodes().add(new HMMNode());
+ testFile.hmm.getNodes().add(new HMMNode());
+
+ testFile.hmm.setConsensusResidueStatus(true);
+ testFile.hmm.setMAPStatus(true);
+ testFile.hmm.setReferenceAnnotationStatus(true);
+ testFile.hmm.setConsensusStructureStatus(true);
+ testFile.hmm.setMaskedValueStatus(true);
+ Scanner scanner = new Scanner("1345 t t t t");
+ testFile.parseAnnotations(scanner, 0);
+ assertEquals(testFile.hmm.getNodeAlignmentColumn(0).intValue(), 1345);
+ assertEquals(testFile.hmm.getConsensusResidue(0), 't');
+ assertEquals(testFile.hmm.getReferenceAnnotation(0), 't');
+ assertEquals(testFile.hmm.getMaskedValue(0), 't');
+ assertEquals(testFile.hmm.getConsensusStructure(0), 't');
+
+ scanner.close();
+
+ testFile.hmm.setConsensusResidueStatus(true);
+ testFile.hmm.setMAPStatus(false);
+ testFile.hmm.setReferenceAnnotationStatus(true);
+ testFile.hmm.setConsensusStructureStatus(false);
+ testFile.hmm.setMaskedValueStatus(false);
+ Scanner scanner2 = new Scanner("- y x - -");
+ testFile.parseAnnotations(scanner2, 1);
+ assertNull(testFile.hmm.getNodeAlignmentColumn(1));
+ assertEquals(testFile.hmm.getConsensusResidue(1), 'y');
+ assertEquals(testFile.hmm.getReferenceAnnotation(1), 'x');
+ assertEquals(testFile.hmm.getMaskedValue(1), '-');
+ assertEquals(testFile.hmm.getConsensusStructure(1), '-');
+
+ scanner2.close();
}
- @Test
+ @Test(priority = 2)
public void testExportFile() throws IOException
{
- HMMFile exportTestFile = new HMMFile("H:/HMMTutorialExample.hmm");
- exportTestFile.parse();
- exportTestFile.exportFile("H:/WriteTestFile.hmm");
+ File file = new File(fn3.getDataObject());
+ FileReader fr = new FileReader(file);
+ BufferedReader br = new BufferedReader(fr);
+ for (int i = 0; i < 23; i++)
+ {
+ br.readLine();
+ }
+ fn3.parseModel(br);
+ fn3.exportFile("H:/WriteFileTest.hmm");
+ }
+ @Test(priority = 1)
+ public void testAppendFileProperties()
+ {
+ StringBuilder testBuilder = new StringBuilder();
+ fn3.appendFileProperties(testBuilder);
+ Scanner testScanner = new Scanner(testBuilder.toString());
+
+ String[] expected = new String[] { "HMMER3/f [3.1b1 | May 2013]",
+ "NAME fn3", "ACC PF00041.13",
+ "DESC Fibronectin type III domain", "LENG 86", "ALPH amino",
+ "RF no", "MM no", "CONS yes", "CS yes", "MAP yes",
+ "DATE Fri Jun 20 08:22:31 2014", "NSEQ 106", "EFFN 11.415833",
+ "CKSUM 3564431818", "GA 8.00 7.20", "TC 8.00 7.20",
+ "NC 7.90 7.90", "STATS LOCAL MSV -9.4043 0.71847",
+ "STATS LOCAL VITERBI -9.7737 0.71847",
+ "STATS LOCAL FORWARD -3.8341 0.71847" };
+
+ for (String value : expected)
+ {
+ assertEquals(testScanner.nextLine(), value);
+ }
+
+ testScanner.close();
+ }
+
+ public moveAheadBy(Scanner scanner, int nodeChange, int indexChange)
+ {
+ for (int y = 0; y < nodeChange; y++)
+ {
+
+ }
}
}