public class HMMFile extends AlignFile
implements AlignmentFileReaderI, AlignmentFileWriterI
{
+ private static final String TERMINATOR = "//";
+
/*
* keys to data in HMM file, used to store as properties of the HiddenMarkovModel
*/
- private static final String HMM = "HMM";
+ public static final String HMM = "HMM";
public static final String NAME = "NAME";
public static final String ALPHABET = "ALPH";
- private static final String ALPH_AMINO = "amino";
-
- private static final String ALPH_DNA = "DNA";
-
- private static final String ALPH_RNA = "RNA";
-
- private static final String ALPHABET_AMINO = "ACDEFGHIKLMNPQRSTVWY";
-
- private static final String ALPHABET_DNA = "ACGT";
-
- private static final String ALPHABET_RNA = "ACGU";
-
public static final String DATE = "DATE";
public static final String COMMAND_LOG = "COM";
public static final String MASKED_VALUE = "MM";
+ private static final String ALPH_AMINO = "amino";
+
+ private static final String ALPH_DNA = "DNA";
+
+ private static final String ALPH_RNA = "RNA";
+
+ private static final String ALPHABET_AMINO = "ACDEFGHIKLMNPQRSTVWY";
+
+ private static final String ALPHABET_DNA = "ACGT";
+
+ private static final String ALPHABET_RNA = "ACGU";
+
private static final int NUMBER_OF_TRANSITIONS = 7;
private static final String SPACE = " ";
}
/**
- * Parses the model data from the HMMER3 file
+ * Parses the model data from the HMMER3 file. The input buffer should be
+ * positioned at the (optional) COMPO line if there is one, else at the insert
+ * emissions line for the BEGIN node of the model.
*
* @param input
* @throws IOException
*/
void parseModel(BufferedReader input) throws IOException
{
- boolean first = true;
- // specification says there must always be an HMM header
- // and one more header which is skipped here
+ /*
+ * specification says there must always be an HMM header (already read)
+ * and one more header (guide headings) which is skipped here
+ */
+ int nodeNo = 0;
String line = input.readLine();
- while (!"//".equals(line))
+ while (line != null && !TERMINATOR.equals(line))
{
HMMNode node = new HMMNode();
hmm.addNode(node);
- Scanner matchReader = new Scanner(line);
- String next = matchReader.next();
- if (next.equals(COMPO) || !first)
+ Scanner scanner = new Scanner(line);
+ String next = scanner.next();
+
+ /*
+ * expect COMPO (optional) for average match emissions
+ * or a node number followed by node's match emissions
+ */
+ if (COMPO.equals(next) || nodeNo > 0)
{
- // stores match emission line in list
- double[] matches = parseDoubles(matchReader, numberOfSymbols);
+ /*
+ * parse match emissions
+ */
+ double[] matches = parseDoubles(scanner, numberOfSymbols);
node.setMatchEmissions(matches);
- if (!first)
+ if (!COMPO.equals(next))
{
- // TODO handle files with no column map (make our own)
- int column = parseAnnotations(matchReader, node);
- hmm.setAlignmentColumn(node, column - 1);
+ int column = parseAnnotations(scanner, node);
+ if (column == 0)
+ {
+ /*
+ * no MAP annotation provided, just number off from 0 (begin node)
+ */
+ column = nodeNo;
+ }
+ hmm.setAlignmentColumn(node, column - 1); // node 1 <==> column 0
}
+ line = input.readLine();
}
- matchReader.close();
- // stores insert emission line in list
- line = input.readLine();
- Scanner insertReader = new Scanner(line);
- double[] inserts = parseDoubles(insertReader, numberOfSymbols);
+ scanner.close();
+
+ /*
+ * parse insert emissions
+ */
+ scanner = new Scanner(line);
+ double[] inserts = parseDoubles(scanner, numberOfSymbols);
node.setInsertEmissions(inserts);
- insertReader.close();
+ scanner.close();
- // stores state transition line in list
+ /*
+ * parse state transitions
+ */
line = input.readLine();
- Scanner transitionReader = new Scanner(line);
- double[] transitions = parseDoubles(transitionReader,
+ scanner = new Scanner(line);
+ double[] transitions = parseDoubles(scanner,
NUMBER_OF_TRANSITIONS);
node.setStateTransitions(transitions);
- transitionReader.close();
+ scanner.close();
line = input.readLine();
- first = false;
+ nodeNo++;
}
}
* HMM counts columns from 1, convert to base 0 for Jalview
*/
int column = 0;
- if (hmm.getBooleanProperty(MAP) && scanner.hasNext())
- {
- column = scanner.nextInt();
- node.setAlignmentColumn(column - 1);
- }
- else
+ String value;
+ if (scanner.hasNext())
{
- scanner.next();
+ value = scanner.next();
+ if (!"-".equals(value))
+ {
+ try
+ {
+ column = Integer.parseInt(value);
+ node.setAlignmentColumn(column - 1);
+ } catch (NumberFormatException e)
+ {
+ // ignore
+ }
+ }
}
/*
- * hmm consensus residue if provided, else -
+ * hmm consensus residue if provided, else '-'
*/
if (scanner.hasNext())
{
- char consensusR;
- consensusR = charValue(scanner.next());
- node.setConsensusResidue(consensusR);
+ node.setConsensusResidue(scanner.next().charAt(0));
}
/*
- * RF reference annotation, if provided, else -
+ * RF reference annotation, if provided, else '-'
*/
if (scanner.hasNext())
{
- char reference;
- reference = charValue(scanner.next());
- node.setReferenceAnnotation(reference);
+ node.setReferenceAnnotation(scanner.next().charAt(0));
}
/*
- * 'm' for masked position, if provided, else -
+ * 'm' for masked position, if provided, else '-'
*/
if (scanner.hasNext())
{
- char value;
- value = charValue(scanner.next());
- node.setMaskValue(value);
+ node.setMaskValue(scanner.next().charAt(0));
}
/*
- * structure consensus symbol, if provided, else -
+ * structure consensus symbol, if provided, else '-'
*/
if (scanner.hasNext())
{
- char consensusS;
- consensusS = charValue(scanner.next());
- node.setConsensusStructure(consensusS);
+ node.setConsensusStructure(scanner.next().charAt(0));
}
return column;
for (int nodeNo = 0; nodeNo <= length; nodeNo++)
{
String matchLine = String.format("%7s",
- nodeNo == 0 ? "COMPO" : Integer.toString(nodeNo));
+ nodeNo == 0 ? COMPO : Integer.toString(nodeNo));
double[] doubleMatches = convertToLogSpace(
hmm.getNode(nodeNo).getMatchEmissions());
if (hmm.getMSV() != null)
{
- output.append(String.format("%n%-19s %18s", "STATS LOCAL MSV",
- hmm.getMSV()));
+ format = "%n%-19s %18s";
+ output.append(String.format(format, "STATS LOCAL MSV", hmm.getMSV()));
- output.append(String.format("%n%-19s %18s", "STATS LOCAL VITERBI",
+ output.append(String.format(format, "STATS LOCAL VITERBI",
hmm.getViterbi()));
- output.append(String.format("%n%-19s %18s", "STATS LOCAL FORWARD",
+ output.append(String.format(format, "STATS LOCAL FORWARD",
hmm.getForward()));
}
}
}
}
- /**
- * Returns the char value of a single lettered String.
- *
- * @param string
- * @return
- */
- char charValue(String string)
- {
- char character;
- character = string.charAt(0);
- return character;
- }
-
@Override
- public String print(SequenceI[] seqs, boolean jvsuffix)
+ public String print(SequenceI[] sequences, boolean jvsuffix)
{
- if (seqs[0].getHMM() != null)
+ if (sequences[0].getHMM() != null)
{
- hmm = seqs[0].getHMM();
+ hmm = sequences[0].getHMM();
}
return print();
}
appendProperties(output);
output.append(NL);
appendModelAsString(output);
- output.append(NL + "//");
+ output.append(NL).append(TERMINATOR).append(NL);
return output.toString();
}