src/jalview/datamodel/HiddenMarkovModel.java

   1 package jalview.datamodel;
   2
   3 import jalview.schemes.ResidueProperties;
   4
   5 import java.util.ArrayList;
   6 import java.util.HashMap;
   7 import java.util.List;
   8 import java.util.Map;
   9 import java.util.Scanner;
  10
  11 /**
  12  * Data structure which stores a hidden Markov model. Currently contains file
  13  * properties as well, not sure whether these should be transferred to the
  14  * HMMFile class
  15  *
  16  * @author TZVanaalten
  17  *
  18  */
  19 public class HiddenMarkovModel
  20 {
  21
  22
  23   // Stores file properties. Do not directly access this field as it contains
  24   // only string value - use the getter methods. For example, to find the length
  25   // of theHMM, use getModelLength()to return an int value
  26   Map<String, String> fileProperties = new HashMap<>();
  27
  28   // contains all of the symbols used in this model. The index of each symbol
  29   // represents its lookup value
  30   List<Character> symbols = new ArrayList<>();
  31
  32   // contains information for each node in the model. The begin node is at index
  33   // 0. Node 0 contains average emission probabilities for each symbol
  34   List<HMMNode> nodes = new ArrayList<>();
  35
  36   // contains the HMM node for each alignment column
  37   Map<Integer, Integer> nodeLookup = new HashMap<>();
  38
  39   // contains the symbol index for each symbol
  40   Map<Character, Integer> symbolIndexLookup = new HashMap<>();
  41
  42   final static String YES = "yes";
  43
  44   final static String NO = "no";
  45
  46   int numberOfSymbols;
  47
  48   // keys for file properties hashmap
  49   private final String NAME = "NAME";
  50
  51   private final String ACCESSION_NUMBER = "ACC";
  52
  53   private final String DESCRIPTION = "DESC";
  54
  55   private final String LENGTH = "LENG";
  56
  57   private final String MAX_LENGTH = "MAXL";
  58
  59   private final String ALPHABET = "ALPH";
  60
  61   private final String DATE = "DATE";
  62
  63   private final String COMMAND_LOG = "COM";
  64
  65   private final String NUMBER_OF_SEQUENCES = "NSEQ";
  66
  67   private final String EFF_NUMBER_OF_SEQUENCES = "EFFN";
  68
  69   private final String CHECK_SUM = "CKSUM";
  70
  71   private final String GATHERING_THRESHOLDS = "GA";
  72
  73   private final String TRUSTED_CUTOFFS = "TC";
  74
  75   private final String NOISE_CUTOFFS = "NC";
  76
  77   private final String STATISTICS = "STATS";
  78
  79   private final String COMPO = "COMPO";
  80
  81   private final String GATHERING_THRESHOLD = "GA";
  82
  83   private final String TRUSTED_CUTOFF = "TC";
  84
  85   private final String NOISE_CUTOFF = "NC";
  86
  87   private final String VITERBI = "VITERBI";
  88
  89   private final String MSV = "MSV";
  90
  91   private final String FORWARD = "FORWARD";
  92
  93   private final String MAP = "MAP";
  94
  95   private final String REFERENCE_ANNOTATION = "RF";
  96
  97   private final String CONSENSUS_RESIDUE = "CONS";
  98
  99   private final String CONSENSUS_STRUCTURE = "CS";
 100
 101   private final String MASKED_VALUE = "MM";
 102
 103   public static final int MATCHTOMATCH = 0;
 104
 105   public static final int MATCHTOINSERT = 1;
 106
 107   public static final int MATCHTODELETE = 2;
 108
 109   public static final int INSERTTOMATCH = 3;
 110
 111   public static final int INSERTTOINSERT = 4;
 112
 113   public static final int DELETETOMATCH = 5;
 114
 115   public static final int DELETETODELETE = 6;
 116
 117   /**
 118    * Returns the map containing the matches between nodes and alignment column
 119    * indexes.
 120    *
 121    * @return
 122    *
 123    */
 124   public Map<Integer, Integer> getNodeLookup()
 125   {
 126     return nodeLookup;
 127   }
 128
 129   /**
 130    * Returns the list of symbols used in this hidden Markov model.
 131    *
 132    * @return
 133    */
 134   public List<Character> getSymbols()
 135   {
 136     return symbols;
 137   }
 138
 139   /**
 140    * Returns the file properties.
 141    *
 142    * @return
 143    */
 144   public Map<String, String> getFileProperties()
 145   {
 146     return fileProperties;
 147   }
 148
 149   /**
 150    * Gets the node in the hidden Markov model at the specified position.
 151    *
 152    * @param nodeIndex
 153    *          The index of the node requested. Node 0 optionally contains the
 154    *          average match emission probabilities across the entire model, and
 155    *          always contains the insert emission probabilities and state
 156    *          transition probabilities for the begin node. Node 1 contains the
 157    *          first node in the HMM that can correspond to a column in the
 158    *          alignment.
 159    * @return
 160    */
 161   public HMMNode getNode(int nodeIndex)
 162   {
 163     return getNodes().get(nodeIndex);
 164   }
 165
 166   /**
 167    * Sets the list of symbols used in the hidden Markov model to the list
 168    * specified.
 169    *
 170    * @param symbolsL
 171    *          The list of symbols to which the current list is to be changed.
 172    *
 173    */
 174   public void setSymbols(List<Character> symbolsL)
 175   {
 176     this.symbols = symbolsL;
 177   }
 178
 179   /**
 180    * Returns the name of the sequence alignment on which the HMM is based.
 181    *
 182    * @return
 183    */
 184   public String getName()
 185   {
 186     return fileProperties.get(NAME);
 187   }
 188
 189   /**
 190    * Returns the accession number.
 191    * @return
 192    */
 193   public String getAccessionNumber()
 194   {
 195     return fileProperties.get(ACCESSION_NUMBER);
 196   }
 197
 198   /**
 199    * Returns a description of the sequence alignment on which the hidden Markov
 200    * model is based.
 201    *
 202    * @return
 203    */
 204   public String getDescription()
 205   {
 206     return fileProperties.get(DESCRIPTION);
 207   }
 208
 209   /**
 210    * Returns the length of the hidden Markov model.
 211    *
 212    * @return
 213    */
 214   public Integer getLength()
 215   {
 216     if (fileProperties.get(LENGTH) == null)
 217     {
 218       return null;
 219     }
 220     return Integer.parseInt(fileProperties.get(LENGTH));
 221   }
 222
 223   /**
 224    * Returns the max instance length within the hidden Markov model.
 225    *
 226    * @return
 227    */
 228   public Integer getMaxInstanceLength()
 229   {
 230     if (fileProperties.get(MAX_LENGTH) == null)
 231     {
 232       return null;
 233     }
 234     return Integer.parseInt(fileProperties.get(MAX_LENGTH));
 235   }
 236
 237   /**
 238    * Returns the type of symbol alphabet - "amino", "DNA", "RNA" are the
 239    * options. Other alphabets may be added.
 240    *
 241    * @return
 242    */
 243   public String getAlphabetType()
 244   {
 245     return fileProperties.get(ALPHABET);
 246   }
 247
 248   /**
 249    * Returns the date as a String.
 250    *
 251    * @return
 252    */
 253   public String getDate()
 254   {
 255     return fileProperties.get(DATE);
 256   }
 257
 258   /**
 259    * Returns the command line log.
 260    *
 261    * @return
 262    */
 263   public String getCommandLineLog()
 264   {
 265     return fileProperties.get(COMMAND_LOG);
 266   }
 267
 268   /**
 269    * Returns the number of sequences on which the HMM was trained.
 270    *
 271    * @return
 272    */
 273   public Integer getNumberOfSequences()
 274   {
 275     if (fileProperties.get(NUMBER_OF_SEQUENCES) == null)
 276     {
 277       return null;
 278     }
 279     return Integer.parseInt(fileProperties.get(NUMBER_OF_SEQUENCES));
 280   }
 281
 282   /**
 283    * Returns the effective number of sequences on which the HMM was based.
 284    *
 285    * @param value
 286    */
 287   public Double getEffectiveNumberOfSequences()
 288   {
 289     if (fileProperties.get(LENGTH) == null)
 290     {
 291       return null;
 292     }
 293     return Double.parseDouble(fileProperties.get(EFF_NUMBER_OF_SEQUENCES));
 294   }
 295
 296   /**
 297    * Returns the checksum.
 298    *
 299    * @return
 300    */
 301   public Long getCheckSum()
 302   {
 303     if (fileProperties.get(LENGTH) == null)
 304     {
 305       return null;
 306     }
 307     return Long.parseLong(fileProperties.get(CHECK_SUM));
 308   }
 309
 310   /**
 311    * Returns the list of nodes in this HMM.
 312    *
 313    * @return
 314    */
 315   public List<HMMNode> getNodes()
 316   {
 317     return nodes;
 318   }
 319
 320   /**
 321    * Sets the list of nodes in this HMM to the given list.
 322    *
 323    * @param nodes
 324    *          The list of nodes to which the current list of nodes is being
 325    *          changed.
 326    */
 327   public void setNodes(List<HMMNode> nodes)
 328   {
 329     this.nodes = nodes;
 330   }
 331
 332   /**
 333    * Gets the match emission probability for a given symbol at a column in the
 334    * alignment.
 335    *
 336    * @param alignColumn
 337    *          The index of the alignment column, starting at index 0. Index 0
 338    *          usually corresponds to index 1 in the HMM.
 339    * @param symbol
 340    *          The symbol for which the desired probability is being requested.
 341    * @return
 342    *
 343    */
 344   public Double getMatchEmissionProbability(int alignColumn, char symbol)
 345   {
 346     int symbolIndex;
 347     int nodeIndex;
 348     Double probability;
 349     if (!symbolIndexLookup.containsKey(symbol))
 350     {
 351       return 0d;
 352     }
 353     symbolIndex = symbolIndexLookup.get(symbol);
 354     if (nodeLookup.containsKey(alignColumn + 1))
 355     {
 356       nodeIndex = nodeLookup.get(alignColumn + 1);
 357       probability = getNode(nodeIndex).getMatchEmissions().get(symbolIndex);
 358       return probability;
 359     }
 360     else
 361     {
 362       return 0d;
 363     }
 364
 365   }
 366
 367   /**
 368    * Gets the insert emission probability for a given symbol at a column in the
 369    * alignment.
 370    *
 371    * @param alignColumn
 372    *          The index of the alignment column, starting at index 0. Index 0
 373    *          usually corresponds to index 1 in the HMM.
 374    * @param symbol
 375    *          The symbol for which the desired probability is being requested.
 376    * @return
 377    *
 378    */
 379   public Double getInsertEmissionProbability(int alignColumn, char symbol)
 380   {
 381     int symbolIndex;
 382     int nodeIndex;
 383     Double probability;
 384     if (!symbolIndexLookup.containsKey(symbol))
 385     {
 386       return 0d;
 387     }
 388     symbolIndex = symbolIndexLookup.get(symbol);
 389     if (nodeLookup.containsKey(alignColumn + 1))
 390     {
 391       nodeIndex = nodeLookup.get(alignColumn + 1);
 392       probability = getNode(nodeIndex).getInsertEmissions()
 393               .get(symbolIndex);
 394       return probability;
 395     }
 396     else
 397     {
 398       return 0d;
 399     }
 400
 401   }
 402
 403   /**
 404    * Gets the state transition probability for a given symbol at a column in the
 405    * alignment.
 406    *
 407    * @param alignColumn
 408    *          The index of the alignment column, starting at index 0. Index 0
 409    *          usually corresponds to index 1 in the HMM.
 410    * @param symbol
 411    *          The symbol for which the desired probability is being requested.
 412    * @return
 413    *
 414    */
 415   public Double getStateTransitionProbability(int alignColumn,
 416           int transition)
 417   {
 418     int transitionIndex;
 419     int nodeIndex;
 420     Double probability;
 421     if (nodeLookup.containsKey(alignColumn + 1))
 422     {
 423       nodeIndex = nodeLookup.get(alignColumn + 1);
 424       probability = getNode(nodeIndex).getStateTransitions()
 425               .get(transition);
 426       return probability;
 427     }
 428     else
 429     {
 430       return 0d;
 431     }
 432
 433   }
 434
 435   /**
 436    * Returns the alignment column linked to the node at the given index.
 437    *
 438    * @param nodeIndex
 439    *          The index of the node, starting from index 1. Index 0 is the begin
 440    *          node, which does not correspond to a column in the alignment.
 441    * @return
 442    */
 443   public Integer getNodeAlignmentColumn(int nodeIndex)
 444   {
 445     Integer value = nodes.get(nodeIndex).getAlignmentColumn();
 446     return value - 1;
 447   }
 448
 449   /**
 450    * Returns the consensus residue at the specified node.
 451    *
 452    * @param nodeIndex
 453    *          The index of the specified node.
 454    * @return
 455    */
 456   public char getConsensusResidue(int nodeIndex)
 457   {
 458    char value = nodes.get(nodeIndex).getConsensusResidue();
 459    return value;
 460   }
 461
 462   /**
 463    * Returns the consensus at a given alignment column.
 464    *
 465    * @param columnIndex
 466    *          The index of the column in the alignment for which the consensus
 467    *          is desired. The list of columns starts at index 0.
 468    * @return
 469    */
 470   public char getConsensusAtAlignColumn(int columnIndex)
 471   {
 472     char value;
 473     Integer index = findNodeIndex(columnIndex);
 474     if (index == null)
 475     {
 476       return '-';
 477     }
 478     value = getNodes().get(index).getConsensusResidue();
 479     return value;
 480   }
 481
 482   /**
 483    * Returns the reference annotation at the specified node.
 484    *
 485    * @param nodeIndex
 486    *          The index of the specified node.
 487    * @return
 488    */
 489   public char getReferenceAnnotation(int nodeIndex)
 490   {
 491    char value = nodes.get(nodeIndex).getReferenceAnnotation();
 492    return value;
 493   }
 494
 495   /**
 496    * Returns the mask value at the specified node.
 497    *
 498    * @param nodeIndex
 499    *          The index of the specified node.
 500    * @return
 501    */
 502   public char getMaskedValue(int nodeIndex)
 503   {
 504    char value = nodes.get(nodeIndex).getMaskValue();
 505    return value;
 506   }
 507
 508   /**
 509    * Returns the consensus structure at the specified node.
 510    *
 511    * @param nodeIndex
 512    *          The index of the specified node.
 513    * @return
 514    */
 515   public char getConsensusStructure(int nodeIndex)
 516   {
 517    char value = nodes.get(nodeIndex).getConsensusStructure();
 518    return value;
 519   }
 520
 521   /**
 522    * Returns the average match emission probability for a given symbol
 523    *
 524    * @param symbolIndex
 525    *          The index of the symbol.
 526    * @return
 527    *
 528    */
 529   public double getAverageMatchEmission(int symbolIndex)
 530   {
 531     double value = nodes.get(0).getMatchEmissions().get(symbolIndex);
 532     return value;
 533   }
 534
 535   /**
 536    * Returns the number of symbols in the alphabet used in this HMM.
 537    *
 538    * @return
 539    */
 540   public int getNumberOfSymbols()
 541   {
 542     return numberOfSymbols;
 543   }
 544
 545   /**
 546    * Fills symbol array and whilst doing so, updates the value of the number of
 547    * symbols.
 548    *
 549    * @param parser
 550    *          The scanner scanning the symbol line in the file.
 551    */
 552   public void fillSymbols(Scanner parser)
 553   {
 554     int i = 0;
 555     while (parser.hasNext())
 556     {
 557       String strSymbol = parser.next();
 558       char[] symbol = strSymbol.toCharArray();
 559       symbols.add(symbol[0]);
 560       symbolIndexLookup.put(symbol[0], i);
 561       i++;
 562     }
 563     numberOfSymbols = symbols.size();
 564   }
 565
 566   /**
 567    * Adds a file property.
 568    *
 569    * @param key
 570    * @param value
 571    */
 572   public void addFileProperty(String key, String value)
 573   {
 574     fileProperties.put(key, value);
 575   }
 576
 577   /**
 578    * Returns a boolean indicating whether the reference annotation is active.
 579    *
 580    * @return
 581    */
 582   public boolean referenceAnnotationIsActive()
 583   {
 584     String status;
 585     status = fileProperties.get(REFERENCE_ANNOTATION);
 586     if (status == null)
 587     {
 588       return false;
 589     }
 590     switch (status)
 591     {
 592     case YES:
 593       return true;
 594     case NO:
 595       return false;
 596     default:
 597       return false;
 598     }
 599
 600   }
 601
 602   /**
 603    * Returns a boolean indicating whether the mask value annotation is active.
 604    *
 605    * @return
 606    */
 607   public boolean maskValueIsActive()
 608   {
 609     String status;
 610     status = fileProperties.get(MASKED_VALUE);
 611     if (status == null)
 612     {
 613       return false;
 614     }
 615     switch (status)
 616     {
 617     case YES:
 618       return true;
 619     case NO:
 620       return false;
 621     default:
 622       return false;
 623     }
 624
 625   }
 626
 627   /**
 628    * Returns a boolean indicating whether the consensus residue annotation is
 629    * active.
 630    *
 631    * @return
 632    */
 633   public boolean consensusResidueIsActive()
 634   {
 635     String status;
 636     status = fileProperties.get(CONSENSUS_RESIDUE);
 637     if (status == null)
 638     {
 639       return false;
 640     }
 641     switch (status)
 642     {
 643     case YES:
 644       return true;
 645     case NO:
 646       return false;
 647     default:
 648       return false;
 649     }
 650
 651   }
 652
 653   /**
 654    * Returns a boolean indicating whether the consensus structure annotation is
 655    * active.
 656    *
 657    * @return
 658    */
 659   public boolean consensusStructureIsActive()
 660   {
 661     String status;
 662     status = fileProperties.get(CONSENSUS_STRUCTURE);
 663     if (status == null)
 664     {
 665       return false;
 666     }
 667     switch (status)
 668     {
 669     case YES:
 670       return true;
 671     case NO:
 672       return false;
 673     default:
 674       return false;
 675     }
 676
 677   }
 678
 679   /**
 680    * Returns a boolean indicating whether the MAP annotation is active.
 681    *
 682    * @return
 683    */
 684   public boolean mapIsActive()
 685   {
 686     String status;
 687     status = fileProperties.get(MAP);
 688     if (status == null)
 689     {
 690       return false;
 691     }
 692     switch (status)
 693     {
 694     case YES:
 695       return true;
 696     case NO:
 697       return false;
 698     default:
 699       return false;
 700     }
 701
 702   }
 703
 704   /**
 705    * Sets the alignment column of the specified node.
 706    *
 707    * @param nodeIndex
 708    *
 709    * @param column
 710    *
 711    */
 712   public void setAlignmentColumn(int nodeIndex, int column)
 713   {
 714     nodes.get(nodeIndex).setAlignmentColumn(column);
 715   }
 716
 717   /**
 718    * Sets the reference annotation at a given node.
 719    *
 720    * @param nodeIndex
 721    * @param value
 722    */
 723   public void setReferenceAnnotation(int nodeIndex, char value)
 724   {
 725     nodes.get(nodeIndex).setReferenceAnnotation(value);
 726   }
 727
 728   /**
 729    * Sets the consensus residue at a given node.
 730    *
 731    * @param nodeIndex
 732    * @param value
 733    */
 734   public void setConsensusResidue(int nodeIndex, char value)
 735   {
 736     nodes.get(nodeIndex).setConsensusResidue(value);
 737   }
 738
 739   /**
 740    * Sets the consensus structure at a given node.
 741    *
 742    * @param nodeIndex
 743    * @param value
 744    */
 745   public void setConsensusStructure(int nodeIndex, char value)
 746   {
 747     nodes.get(nodeIndex).setConsensusStructure(value);
 748   }
 749
 750   /**
 751    * Sets the mask value at a given node.
 752    *
 753    * @param nodeIndex
 754    * @param value
 755    */
 756   public void setMaskValue(int nodeIndex, char value)
 757   {
 758     nodes.get(nodeIndex).setMaskValue(value);
 759   }
 760
 761   /**
 762    * Temporary implementation, should not be used.
 763    *
 764    * @return
 765    */
 766   public String getGatheringThreshold()
 767   {
 768     String value;
 769     value = fileProperties.get("GA");
 770     return value;
 771   }
 772
 773   /**
 774    * Temporary implementation, should not be used.
 775    *
 776    * @return
 777    */
 778   public String getNoiseCutoff()
 779   {
 780     String value;
 781     value = fileProperties.get("NC");
 782     return value;
 783   }
 784
 785   /**
 786    * Temporary implementation, should not be used.
 787    *
 788    * @return
 789    */
 790   public String getTrustedCutoff()
 791   {
 792     String value;
 793     value = fileProperties.get("TC");
 794     return value;
 795   }
 796
 797   /**
 798    * Temporary implementation, should not be used.
 799    *
 800    * @return
 801    */
 802   public String getViterbi()
 803   {
 804     String value;
 805     value = fileProperties.get(VITERBI);
 806     return value;
 807   }
 808
 809   /**
 810    * Temporary implementation, should not be used.
 811    *
 812    * @return
 813    */
 814   public String getMSV()
 815   {
 816     String value;
 817     value = fileProperties.get(MSV);
 818     return value;
 819   }
 820
 821   /**
 822    * Temporary implementation, should not be used.
 823    *
 824    * @return
 825    */
 826   public String getForward()
 827   {
 828     String value;
 829     value = fileProperties.get(FORWARD);
 830     return value;
 831   }
 832
 833   /**
 834    * Sets the activation status of the MAP annotation.
 835    *
 836    * @param status
 837    */
 838   public void setMAPStatus(boolean status)
 839   {
 840     fileProperties.put(MAP, status ? YES : NO);
 841   }
 842
 843   /**
 844    * Sets the activation status of the reference annotation.
 845    *
 846    * @param status
 847    */
 848   public void setReferenceAnnotationStatus(boolean status)
 849   {
 850     fileProperties.put(REFERENCE_ANNOTATION, status ? YES : NO);
 851   }
 852
 853   /**
 854    * Sets the activation status of the mask value annotation.
 855    *
 856    * @param status
 857    */
 858   public void setMaskedValueStatus(boolean status)
 859   {
 860     fileProperties.put(MASKED_VALUE, status ? YES : NO);
 861   }
 862
 863   /**
 864    * Sets the activation status of the consensus residue annotation.
 865    *
 866    * @param status
 867    */
 868   public void setConsensusResidueStatus(boolean status)
 869   {
 870     fileProperties.put(CONSENSUS_RESIDUE, status ? YES : NO);
 871   }
 872
 873   /**
 874    * Sets the activation status of the consensus structure annotation.
 875    *
 876    * @param status
 877    */
 878   public void setConsensusStructureStatus(boolean status)
 879   {
 880     fileProperties.put(CONSENSUS_STRUCTURE, status ? YES : NO);
 881   }
 882
 883   /**
 884    * Finds the index of the node in a hidden Markov model based on the column in
 885    * the alignment
 886    *
 887    * @param alignmentColumn
 888    *          The index of the column in the alignment, with the indexes
 889    *          starting from 0.
 890    */
 891
 892   public Integer findNodeIndex(int alignmentColumn)
 893   {
 894     Integer index;
 895     index = nodeLookup.get(alignmentColumn + 1);
 896     return index;
 897   }
 898
 899   /**
 900    * Finds the String values of a boolean. "yes" for true and "no" for false.
 901    *
 902    * @param value
 903    * @return
 904    */
 905   public static String findStringFromBoolean(boolean value)
 906   {
 907     if (value)
 908     {
 909       return YES;
 910     }
 911     else
 912     {
 913       return NO;
 914     }
 915   }
 916
 917   /**
 918    * Creates the HMM Logo alignment annotation, and populates it with
 919    * information content data.
 920    *
 921    * @return The alignment annotation.
 922    */
 923   public AlignmentAnnotation createAnnotation(int length)
 924   {
 925     Annotation[] annotations = new Annotation[length];
 926     float max = 0f;
 927     for (int alignPos = 0; alignPos < length; alignPos++)
 928     {
 929       Float content = getInformationContent(alignPos);
 930       if (content > max)
 931       {
 932         max = content;
 933       }
 934
 935       Character cons;
 936       cons = getConsensusAtAlignColumn(alignPos);
 937       cons = Character.toUpperCase(cons);
 938
 939       String description = String.format("%.3f", content);
 940       description += " bits";
 941       annotations[alignPos] = new Annotation(cons.toString(), description,
 942               ' ',
 943               content);
 944
 945     }
 946     AlignmentAnnotation annotation = new AlignmentAnnotation(
 947             "Information Content",
 948             "The information content of each column, measured in bits",
 949             annotations,
 950             0f, max, AlignmentAnnotation.BAR_GRAPH);
 951     return annotation;
 952   }
 953
 954   /**
 955    * Returns the information content at a specified column.
 956    *
 957    * @param column
 958    *          Index of the column, starting from 0.
 959    * @return
 960    */
 961   public float getInformationContent(int column)
 962   {
 963     float informationContent = 0f;
 964
 965     for (char symbol : symbols)
 966     {
 967       float freq = 0f;
 968       if ("amino".equals(getAlphabetType()))
 969       {
 970         freq = ResidueProperties.aminoBackgroundFrequencies.get(symbol);
 971       }
 972       if ("DNA".equals(getAlphabetType()))
 973       {
 974         freq = ResidueProperties.dnaBackgroundFrequencies.get(symbol);
 975       }
 976       if ("RNA".equals(getAlphabetType()))
 977       {
 978         freq = ResidueProperties.rnaBackgroundFrequencies
 979                 .get(symbol);
 980       }
 981       Double hmmProb = getMatchEmissionProbability(column, symbol);
 982       float prob = hmmProb.floatValue();
 983       informationContent += prob * (Math.log(prob / freq) / Math.log(2));
 984
 985     }
 986
 987     return informationContent;
 988   }
 989
 990   /**
 991    * Returns the consensus sequence based on the most probable symbol at each
 992    * position. The sequence is adjusted to match the length of the existing
 993    * sequence alignment. Gap characters are used as padding.
 994    *
 995    * @param length
 996    *          The length of the longest sequence in the existing alignment.
 997    * @return
 998    */
 999   public Sequence getConsensusSequence(int length)
1000   {
1001     int start;
1002     int end;
1003     int modelLength;
1004     start = getNodeAlignmentColumn(1);
1005     modelLength = getLength();
1006     end = getNodeAlignmentColumn(modelLength);
1007     char[] sequence = new char[length];
1008     for (int index = 0; index < length; index++)
1009     {
1010       Character character;
1011       if (consensusResidueIsActive())
1012       {
1013         character = getConsensusAtAlignColumn(index);
1014       }
1015       else
1016       {
1017         character = findConsensusCharacter(index);
1018       }
1019       if (character == null || character == '-')
1020       {
1021         sequence[index] = '-';
1022       }
1023       else
1024       {
1025         sequence[index] = Character.toUpperCase(character);
1026       }
1027       }
1028
1029
1030     Sequence seq = new Sequence("HMM CONSENSUS", sequence, start, end);
1031     return seq;
1032   }
1033
1034   /**
1035    * Finds the most probable character at a column in an alignment based on the
1036    * HMM.
1037    *
1038    * @param nodeIndex
1039    *          The index of the node.
1040    * @return
1041    */
1042   Character findConsensusCharacter(int column)
1043   {
1044     Character mostLikely = null;
1045     double highestProb = 0;
1046     for (char character : symbols)
1047     {
1048       Double prob = getMatchEmissionProbability(column, character);
1049       if (prob > highestProb)
1050       {
1051         highestProb = prob;
1052         mostLikely = character;
1053       }
1054     }
1055     return mostLikely;
1056   }
1057 }
1058