src/jalview/datamodel/HiddenMarkovModel.java

   1 package jalview.datamodel;
   2
   3 import jalview.schemes.ResidueProperties;
   4
   5 import java.util.ArrayList;
   6 import java.util.HashMap;
   7 import java.util.List;
   8 import java.util.Map;
   9 import java.util.Scanner;
  10
  11 /**
  12  * Data structure which stores a hidden Markov model. Currently contains file
  13  * properties as well, not sure whether these should be transferred to the
  14  * HMMFile class
  15  *
  16  * @author TZVanaalten
  17  *
  18  */
  19 public class HiddenMarkovModel
  20 {
  21
  22
  23   // Stores file properties. Do not directly access this field as it contains
  24   // only string value - use the getter methods. For example, to find the length
  25   // of theHMM, use getModelLength()to return an int value
  26   Map<String, String> fileProperties = new HashMap<>();
  27
  28   // contains all of the symbols used in this model. The index of each symbol
  29   // represents its lookup value
  30   List<Character> symbols = new ArrayList<>();
  31
  32   // contains information for each node in the model. The begin node is at index
  33   // 0. Node 0 contains average emission probabilities for each symbol
  34   List<HMMNode> nodes = new ArrayList<>();
  35
  36   // contains the HMM node for each alignment column
  37   Map<Integer, Integer> nodeLookup = new HashMap<>();
  38
  39   // contains the symbol index for each symbol
  40   Map<Character, Integer> symbolIndexLookup = new HashMap<>();
  41
  42   final static String YES = "yes";
  43
  44   final static String NO = "no";
  45
  46   int numberOfSymbols;
  47
  48   // keys for file properties hashmap
  49   private final String NAME = "NAME";
  50
  51   private final String ACCESSION_NUMBER = "ACC";
  52
  53   private final String DESCRIPTION = "DESC";
  54
  55   private final String LENGTH = "LENG";
  56
  57   private final String MAX_LENGTH = "MAXL";
  58
  59   private final String ALPHABET = "ALPH";
  60
  61   private final String DATE = "DATE";
  62
  63   private final String COMMAND_LOG = "COM";
  64
  65   private final String NUMBER_OF_SEQUENCES = "NSEQ";
  66
  67   private final String EFF_NUMBER_OF_SEQUENCES = "EFFN";
  68
  69   private final String CHECK_SUM = "CKSUM";
  70
  71   private final String GATHERING_THRESHOLDS = "GA";
  72
  73   private final String TRUSTED_CUTOFFS = "TC";
  74
  75   private final String NOISE_CUTOFFS = "NC";
  76
  77   private final String STATISTICS = "STATS";
  78
  79   private final String COMPO = "COMPO";
  80
  81   private final String GATHERING_THRESHOLD = "GA";
  82
  83   private final String TRUSTED_CUTOFF = "TC";
  84
  85   private final String NOISE_CUTOFF = "NC";
  86
  87   private final String VITERBI = "VITERBI";
  88
  89   private final String MSV = "MSV";
  90
  91   private final String FORWARD = "FORWARD";
  92
  93   private final String MAP = "MAP";
  94
  95   private final String REFERENCE_ANNOTATION = "RF";
  96
  97   private final String CONSENSUS_RESIDUE = "CONS";
  98
  99   private final String CONSENSUS_STRUCTURE = "CS";
 100
 101   private final String MASKED_VALUE = "MM";
 102
 103   public static final int MATCHTOMATCH = 0;
 104
 105   public static final int MATCHTOINSERT = 1;
 106
 107   public static final int MATCHTODELETE = 2;
 108
 109   public static final int INSERTTOMATCH = 3;
 110
 111   public static final int INSERTTOINSERT = 4;
 112
 113   public static final int DELETETOMATCH = 5;
 114
 115   public static final int DELETETODELETE = 6;
 116
 117   /**
 118    * Returns the map containing the matches between nodes and alignment column
 119    * indexes.
 120    *
 121    * @return
 122    *
 123    */
 124   public Map<Integer, Integer> getNodeLookup()
 125   {
 126     return nodeLookup;
 127   }
 128
 129   /**
 130    * Returns the list of symbols used in this hidden Markov model.
 131    *
 132    * @return
 133    */
 134   public List<Character> getSymbols()
 135   {
 136     return symbols;
 137   }
 138
 139   /**
 140    * Returns the file properties.
 141    *
 142    * @return
 143    */
 144   public Map<String, String> getFileProperties()
 145   {
 146     return fileProperties;
 147   }
 148
 149   /**
 150    * Gets the node in the hidden Markov model at the specified position.
 151    *
 152    * @param nodeIndex
 153    *          The index of the node requested. Node 0 optionally contains the
 154    *          average match emission probabilities across the entire model, and
 155    *          always contains the insert emission probabilities and state
 156    *          transition probabilities for the begin node. Node 1 contains the
 157    *          first node in the HMM that can correspond to a column in the
 158    *          alignment.
 159    * @return
 160    */
 161   public HMMNode getNode(int nodeIndex)
 162   {
 163     return getNodes().get(nodeIndex);
 164   }
 165
 166   /**
 167    * Sets the list of symbols used in the hidden Markov model to the list
 168    * specified.
 169    *
 170    * @param symbolsL
 171    *          The list of symbols to which the current list is to be changed.
 172    *
 173    */
 174   public void setSymbols(List<Character> symbolsL)
 175   {
 176     this.symbols = symbolsL;
 177   }
 178
 179   /**
 180    * Returns the name of the sequence alignment on which the HMM is based.
 181    *
 182    * @return
 183    */
 184   public String getName()
 185   {
 186     return fileProperties.get(NAME);
 187   }
 188
 189   /**
 190    * Returns the accession number.
 191    * @return
 192    */
 193   public String getAccessionNumber()
 194   {
 195     return fileProperties.get(ACCESSION_NUMBER);
 196   }
 197
 198   /**
 199    * Returns a description of the sequence alignment on which the hidden Markov
 200    * model is based.
 201    *
 202    * @return
 203    */
 204   public String getDescription()
 205   {
 206     return fileProperties.get(DESCRIPTION);
 207   }
 208
 209   /**
 210    * Returns the length of the hidden Markov model.
 211    *
 212    * @return
 213    */
 214   public Integer getLength()
 215   {
 216     if (fileProperties.get(LENGTH) == null)
 217     {
 218       return null;
 219     }
 220     return Integer.parseInt(fileProperties.get(LENGTH));
 221   }
 222
 223   /**
 224    * Returns the max instance length within the hidden Markov model.
 225    *
 226    * @return
 227    */
 228   public Integer getMaxInstanceLength()
 229   {
 230     if (fileProperties.get(MAX_LENGTH) == null)
 231     {
 232       return null;
 233     }
 234     return Integer.parseInt(fileProperties.get(MAX_LENGTH));
 235   }
 236
 237   /**
 238    * Returns the type of symbol alphabet - "amino", "DNA", "RNA" are the
 239    * options. Other alphabets may be added.
 240    *
 241    * @return
 242    */
 243   public String getAlphabetType()
 244   {
 245     return fileProperties.get(ALPHABET);
 246   }
 247
 248   /**
 249    * Returns the date as a String.
 250    *
 251    * @return
 252    */
 253   public String getDate()
 254   {
 255     return fileProperties.get(DATE);
 256   }
 257
 258   /**
 259    * Returns the command line log.
 260    *
 261    * @return
 262    */
 263   public String getCommandLineLog()
 264   {
 265     return fileProperties.get(COMMAND_LOG);
 266   }
 267
 268   /**
 269    * Returns the number of sequences on which the HMM was trained.
 270    *
 271    * @return
 272    */
 273   public Integer getNumberOfSequences()
 274   {
 275     if (fileProperties.get(NUMBER_OF_SEQUENCES) == null)
 276     {
 277       return null;
 278     }
 279     return Integer.parseInt(fileProperties.get(NUMBER_OF_SEQUENCES));
 280   }
 281
 282   /**
 283    * Returns the effective number of sequences on which the HMM was based.
 284    *
 285    * @param value
 286    */
 287   public Double getEffectiveNumberOfSequences()
 288   {
 289     if (fileProperties.get(LENGTH) == null)
 290     {
 291       return null;
 292     }
 293     return Double.parseDouble(fileProperties.get(EFF_NUMBER_OF_SEQUENCES));
 294   }
 295
 296   /**
 297    * Returns the checksum.
 298    *
 299    * @return
 300    */
 301   public Long getCheckSum()
 302   {
 303     if (fileProperties.get(LENGTH) == null)
 304     {
 305       return null;
 306     }
 307     return Long.parseLong(fileProperties.get(CHECK_SUM));
 308   }
 309
 310   /**
 311    * Returns the list of nodes in this HMM.
 312    *
 313    * @return
 314    */
 315   public List<HMMNode> getNodes()
 316   {
 317     return nodes;
 318   }
 319
 320   /**
 321    * Sets the list of nodes in this HMM to the given list.
 322    *
 323    * @param nodes
 324    *          The list of nodes to which the current list of nodes is being
 325    *          changed.
 326    */
 327   public void setNodes(List<HMMNode> nodes)
 328   {
 329     this.nodes = nodes;
 330   }
 331
 332   /**
 333    * Gets the match emission probability for a given symbol at a column in the
 334    * alignment.
 335    *
 336    * @param alignColumn
 337    *          The index of the alignment column, starting at index 0. Index 0
 338    *          usually corresponds to index 1 in the HMM.
 339    * @param symbol
 340    *          The symbol for which the desired probability is being requested.
 341    * @return
 342    *
 343    */
 344   public Double getMatchEmissionProbability(int alignColumn, char symbol)
 345   {
 346     int symbolIndex;
 347     int nodeIndex;
 348     Double probability;
 349     if (!symbolIndexLookup.containsKey(symbol))
 350     {
 351       return 0d;
 352     }
 353     symbolIndex = symbolIndexLookup.get(symbol);
 354     if (nodeLookup.containsKey(alignColumn + 1))
 355     {
 356       nodeIndex = nodeLookup.get(alignColumn + 1);
 357       probability = getNode(nodeIndex).getMatchEmissions().get(symbolIndex);
 358       return probability;
 359     }
 360     else
 361     {
 362       return 0d;
 363     }
 364
 365   }
 366
 367   /**
 368    * Gets the insert emission probability for a given symbol at a column in the
 369    * alignment.
 370    *
 371    * @param alignColumn
 372    *          The index of the alignment column, starting at index 0. Index 0
 373    *          usually corresponds to index 1 in the HMM.
 374    * @param symbol
 375    *          The symbol for which the desired probability is being requested.
 376    * @return
 377    *
 378    */
 379   public Double getInsertEmissionProbability(int alignColumn, char symbol)
 380   {
 381     int symbolIndex;
 382     int nodeIndex;
 383     Double probability;
 384     if (!symbolIndexLookup.containsKey(symbol))
 385     {
 386       return 0d;
 387     }
 388     symbolIndex = symbolIndexLookup.get(symbol);
 389     if (nodeLookup.containsKey(alignColumn + 1))
 390     {
 391       nodeIndex = nodeLookup.get(alignColumn + 1);
 392       probability = getNode(nodeIndex).getInsertEmissions()
 393               .get(symbolIndex);
 394       return probability;
 395     }
 396     else
 397     {
 398       return 0d;
 399     }
 400
 401   }
 402
 403   /**
 404    * Gets the state transition probability for a given symbol at a column in the
 405    * alignment.
 406    *
 407    * @param alignColumn
 408    *          The index of the alignment column, starting at index 0. Index 0
 409    *          usually corresponds to index 1 in the HMM.
 410    * @param symbol
 411    *          The symbol for which the desired probability is being requested.
 412    * @return
 413    *
 414    */
 415   public Double getStateTransitionProbability(int alignColumn,
 416           int transition)
 417   {
 418     int transitionIndex;
 419     int nodeIndex;
 420     Double probability;
 421     if (nodeLookup.containsKey(alignColumn + 1))
 422     {
 423       nodeIndex = nodeLookup.get(alignColumn + 1);
 424       probability = getNode(nodeIndex).getStateTransitions()
 425               .get(transition);
 426       return probability;
 427     }
 428     else
 429     {
 430       return 0d;
 431     }
 432
 433   }
 434
 435   /**
 436    * Returns the alignment column linked to the node at the given index.
 437    *
 438    * @param nodeIndex
 439    *          The index of the node, starting from index 1. Index 0 is the begin
 440    *          node, which does not correspond to a column in the alignment.
 441    * @return
 442    */
 443   public Integer getNodeAlignmentColumn(int nodeIndex)
 444   {
 445     Integer value = nodes.get(nodeIndex).getAlignmentColumn();
 446     return value - 1;
 447   }
 448
 449   /**
 450    * Returns the consensus residue at the specified node.
 451    *
 452    * @param nodeIndex
 453    *          The index of the specified node.
 454    * @return
 455    */
 456   public char getConsensusResidue(int nodeIndex)
 457   {
 458    char value = nodes.get(nodeIndex).getConsensusResidue();
 459    return value;
 460   }
 461
 462   /**
 463    * Returns the consensus at a given alignment column.
 464    *
 465    * @param columnIndex
 466    *          The index of the column in the alignment for which the consensus
 467    *          is desired. The list of columns starts at index 0.
 468    * @return
 469    */
 470   public char getConsensusAtAlignColumn(int columnIndex)
 471   {
 472     char mostLikely = '-';
 473     if (consensusResidueIsActive())
 474     {
 475
 476     Integer index = findNodeIndex(columnIndex);
 477     if (index == null)
 478     {
 479       return '-';
 480     }
 481       mostLikely = getNodes().get(index).getConsensusResidue();
 482       return mostLikely;
 483     }
 484     else
 485     {
 486       double highestProb = 0;
 487       for (char character : symbols)
 488       {
 489         Double prob = getMatchEmissionProbability(columnIndex, character);
 490         if (prob > highestProb)
 491         {
 492           highestProb = prob;
 493           mostLikely = character;
 494         }
 495       }
 496       return mostLikely;
 497     }
 498
 499   }
 500
 501   /**
 502    * Returns the reference annotation at the specified node.
 503    *
 504    * @param nodeIndex
 505    *          The index of the specified node.
 506    * @return
 507    */
 508   public char getReferenceAnnotation(int nodeIndex)
 509   {
 510    char value = nodes.get(nodeIndex).getReferenceAnnotation();
 511    return value;
 512   }
 513
 514   /**
 515    * Returns the mask value at the specified node.
 516    *
 517    * @param nodeIndex
 518    *          The index of the specified node.
 519    * @return
 520    */
 521   public char getMaskedValue(int nodeIndex)
 522   {
 523    char value = nodes.get(nodeIndex).getMaskValue();
 524    return value;
 525   }
 526
 527   /**
 528    * Returns the consensus structure at the specified node.
 529    *
 530    * @param nodeIndex
 531    *          The index of the specified node.
 532    * @return
 533    */
 534   public char getConsensusStructure(int nodeIndex)
 535   {
 536    char value = nodes.get(nodeIndex).getConsensusStructure();
 537    return value;
 538   }
 539
 540   /**
 541    * Returns the average match emission probability for a given symbol
 542    *
 543    * @param symbolIndex
 544    *          The index of the symbol.
 545    * @return
 546    *
 547    */
 548   public double getAverageMatchEmission(int symbolIndex)
 549   {
 550     double value = nodes.get(0).getMatchEmissions().get(symbolIndex);
 551     return value;
 552   }
 553
 554   /**
 555    * Returns the number of symbols in the alphabet used in this HMM.
 556    *
 557    * @return
 558    */
 559   public int getNumberOfSymbols()
 560   {
 561     return numberOfSymbols;
 562   }
 563
 564   /**
 565    * Fills symbol array and whilst doing so, updates the value of the number of
 566    * symbols.
 567    *
 568    * @param parser
 569    *          The scanner scanning the symbol line in the file.
 570    */
 571   public void fillSymbols(Scanner parser)
 572   {
 573     int i = 0;
 574     while (parser.hasNext())
 575     {
 576       String strSymbol = parser.next();
 577       char[] symbol = strSymbol.toCharArray();
 578       symbols.add(symbol[0]);
 579       symbolIndexLookup.put(symbol[0], i);
 580       i++;
 581     }
 582     numberOfSymbols = symbols.size();
 583   }
 584
 585   /**
 586    * Adds a file property.
 587    *
 588    * @param key
 589    * @param value
 590    */
 591   public void addFileProperty(String key, String value)
 592   {
 593     fileProperties.put(key, value);
 594   }
 595
 596   /**
 597    * Returns a boolean indicating whether the reference annotation is active.
 598    *
 599    * @return
 600    */
 601   public boolean referenceAnnotationIsActive()
 602   {
 603     String status;
 604     status = fileProperties.get(REFERENCE_ANNOTATION);
 605     if (status == null)
 606     {
 607       return false;
 608     }
 609     switch (status)
 610     {
 611     case YES:
 612       return true;
 613     case NO:
 614       return false;
 615     default:
 616       return false;
 617     }
 618
 619   }
 620
 621   /**
 622    * Returns a boolean indicating whether the mask value annotation is active.
 623    *
 624    * @return
 625    */
 626   public boolean maskValueIsActive()
 627   {
 628     String status;
 629     status = fileProperties.get(MASKED_VALUE);
 630     if (status == null)
 631     {
 632       return false;
 633     }
 634     switch (status)
 635     {
 636     case YES:
 637       return true;
 638     case NO:
 639       return false;
 640     default:
 641       return false;
 642     }
 643
 644   }
 645
 646   /**
 647    * Returns a boolean indicating whether the consensus residue annotation is
 648    * active.
 649    *
 650    * @return
 651    */
 652   public boolean consensusResidueIsActive()
 653   {
 654     String status;
 655     status = fileProperties.get(CONSENSUS_RESIDUE);
 656     if (status == null)
 657     {
 658       return false;
 659     }
 660     switch (status)
 661     {
 662     case YES:
 663       return true;
 664     case NO:
 665       return false;
 666     default:
 667       return false;
 668     }
 669
 670   }
 671
 672   /**
 673    * Returns a boolean indicating whether the consensus structure annotation is
 674    * active.
 675    *
 676    * @return
 677    */
 678   public boolean consensusStructureIsActive()
 679   {
 680     String status;
 681     status = fileProperties.get(CONSENSUS_STRUCTURE);
 682     if (status == null)
 683     {
 684       return false;
 685     }
 686     switch (status)
 687     {
 688     case YES:
 689       return true;
 690     case NO:
 691       return false;
 692     default:
 693       return false;
 694     }
 695
 696   }
 697
 698   /**
 699    * Returns a boolean indicating whether the MAP annotation is active.
 700    *
 701    * @return
 702    */
 703   public boolean mapIsActive()
 704   {
 705     String status;
 706     status = fileProperties.get(MAP);
 707     if (status == null)
 708     {
 709       return false;
 710     }
 711     switch (status)
 712     {
 713     case YES:
 714       return true;
 715     case NO:
 716       return false;
 717     default:
 718       return false;
 719     }
 720
 721   }
 722
 723   /**
 724    * Sets the alignment column of the specified node.
 725    *
 726    * @param nodeIndex
 727    *
 728    * @param column
 729    *
 730    */
 731   public void setAlignmentColumn(int nodeIndex, int column)
 732   {
 733     nodes.get(nodeIndex).setAlignmentColumn(column);
 734   }
 735
 736   /**
 737    * Sets the reference annotation at a given node.
 738    *
 739    * @param nodeIndex
 740    * @param value
 741    */
 742   public void setReferenceAnnotation(int nodeIndex, char value)
 743   {
 744     nodes.get(nodeIndex).setReferenceAnnotation(value);
 745   }
 746
 747   /**
 748    * Sets the consensus residue at a given node.
 749    *
 750    * @param nodeIndex
 751    * @param value
 752    */
 753   public void setConsensusResidue(int nodeIndex, char value)
 754   {
 755     nodes.get(nodeIndex).setConsensusResidue(value);
 756   }
 757
 758   /**
 759    * Sets the consensus structure at a given node.
 760    *
 761    * @param nodeIndex
 762    * @param value
 763    */
 764   public void setConsensusStructure(int nodeIndex, char value)
 765   {
 766     nodes.get(nodeIndex).setConsensusStructure(value);
 767   }
 768
 769   /**
 770    * Sets the mask value at a given node.
 771    *
 772    * @param nodeIndex
 773    * @param value
 774    */
 775   public void setMaskValue(int nodeIndex, char value)
 776   {
 777     nodes.get(nodeIndex).setMaskValue(value);
 778   }
 779
 780   /**
 781    * Temporary implementation, should not be used.
 782    *
 783    * @return
 784    */
 785   public String getGatheringThreshold()
 786   {
 787     String value;
 788     value = fileProperties.get("GA");
 789     return value;
 790   }
 791
 792   /**
 793    * Temporary implementation, should not be used.
 794    *
 795    * @return
 796    */
 797   public String getNoiseCutoff()
 798   {
 799     String value;
 800     value = fileProperties.get("NC");
 801     return value;
 802   }
 803
 804   /**
 805    * Temporary implementation, should not be used.
 806    *
 807    * @return
 808    */
 809   public String getTrustedCutoff()
 810   {
 811     String value;
 812     value = fileProperties.get("TC");
 813     return value;
 814   }
 815
 816   /**
 817    * Temporary implementation, should not be used.
 818    *
 819    * @return
 820    */
 821   public String getViterbi()
 822   {
 823     String value;
 824     value = fileProperties.get(VITERBI);
 825     return value;
 826   }
 827
 828   /**
 829    * Temporary implementation, should not be used.
 830    *
 831    * @return
 832    */
 833   public String getMSV()
 834   {
 835     String value;
 836     value = fileProperties.get(MSV);
 837     return value;
 838   }
 839
 840   /**
 841    * Temporary implementation, should not be used.
 842    *
 843    * @return
 844    */
 845   public String getForward()
 846   {
 847     String value;
 848     value = fileProperties.get(FORWARD);
 849     return value;
 850   }
 851
 852   /**
 853    * Sets the activation status of the MAP annotation.
 854    *
 855    * @param status
 856    */
 857   public void setMAPStatus(boolean status)
 858   {
 859     fileProperties.put(MAP, status ? YES : NO);
 860   }
 861
 862   /**
 863    * Sets the activation status of the reference annotation.
 864    *
 865    * @param status
 866    */
 867   public void setReferenceAnnotationStatus(boolean status)
 868   {
 869     fileProperties.put(REFERENCE_ANNOTATION, status ? YES : NO);
 870   }
 871
 872   /**
 873    * Sets the activation status of the mask value annotation.
 874    *
 875    * @param status
 876    */
 877   public void setMaskedValueStatus(boolean status)
 878   {
 879     fileProperties.put(MASKED_VALUE, status ? YES : NO);
 880   }
 881
 882   /**
 883    * Sets the activation status of the consensus residue annotation.
 884    *
 885    * @param status
 886    */
 887   public void setConsensusResidueStatus(boolean status)
 888   {
 889     fileProperties.put(CONSENSUS_RESIDUE, status ? YES : NO);
 890   }
 891
 892   /**
 893    * Sets the activation status of the consensus structure annotation.
 894    *
 895    * @param status
 896    */
 897   public void setConsensusStructureStatus(boolean status)
 898   {
 899     fileProperties.put(CONSENSUS_STRUCTURE, status ? YES : NO);
 900   }
 901
 902   /**
 903    * Finds the index of the node in a hidden Markov model based on the column in
 904    * the alignment
 905    *
 906    * @param alignmentColumn
 907    *          The index of the column in the alignment, with the indexes
 908    *          starting from 0.
 909    */
 910
 911   public Integer findNodeIndex(int alignmentColumn)
 912   {
 913     Integer index;
 914     index = nodeLookup.get(alignmentColumn + 1);
 915     return index;
 916   }
 917
 918   /**
 919    * Finds the String values of a boolean. "yes" for true and "no" for false.
 920    *
 921    * @param value
 922    * @return
 923    */
 924   public static String findStringFromBoolean(boolean value)
 925   {
 926     if (value)
 927     {
 928       return YES;
 929     }
 930     else
 931     {
 932       return NO;
 933     }
 934   }
 935
 936   /**
 937    * Creates the HMM Logo alignment annotation, and populates it with
 938    * information content data.
 939    *
 940    * @return The alignment annotation.
 941    */
 942   public AlignmentAnnotation createAnnotation(int length)
 943   {
 944     Annotation[] annotations = new Annotation[length];
 945     float max = 0f;
 946     for (int alignPos = 0; alignPos < length; alignPos++)
 947     {
 948       Float content = getInformationContent(alignPos);
 949       if (content > max)
 950       {
 951         max = content;
 952       }
 953
 954       Character cons;
 955
 956       cons = getConsensusAtAlignColumn(alignPos);
 957
 958       cons = Character.toUpperCase(cons);
 959
 960       String description = String.format("%.3f", content);
 961       description += " bits";
 962       annotations[alignPos] = new Annotation(cons.toString(), description,
 963               ' ',
 964               content);
 965
 966     }
 967     AlignmentAnnotation annotation = new AlignmentAnnotation(
 968             "Information Content",
 969             "The information content of each column, measured in bits",
 970             annotations,
 971             0f, max, AlignmentAnnotation.BAR_GRAPH);
 972     annotation.setHMM(this);
 973     return annotation;
 974   }
 975
 976   /**
 977    * Returns the information content at a specified column.
 978    *
 979    * @param column
 980    *          Index of the column, starting from 0.
 981    * @return
 982    */
 983   public float getInformationContent(int column)
 984   {
 985     float informationContent = 0f;
 986
 987     for (char symbol : symbols)
 988     {
 989       float freq = 0f;
 990       if ("amino".equals(getAlphabetType()))
 991       {
 992         freq = ResidueProperties.aminoBackgroundFrequencies.get(symbol);
 993       }
 994       if ("DNA".equals(getAlphabetType()))
 995       {
 996         freq = ResidueProperties.dnaBackgroundFrequencies.get(symbol);
 997       }
 998       if ("RNA".equals(getAlphabetType()))
 999       {
1000         freq = ResidueProperties.rnaBackgroundFrequencies
1001                 .get(symbol);
1002       }
1003       Double hmmProb = getMatchEmissionProbability(column, symbol);
1004       float prob = hmmProb.floatValue();
1005       informationContent += prob * (Math.log(prob / freq) / Math.log(2));
1006
1007     }
1008
1009     return informationContent;
1010   }
1011
1012   /**
1013    * Returns the consensus sequence based on the most probable symbol at each
1014    * position. The sequence is adjusted to match the length of the existing
1015    * sequence alignment. Gap characters are used as padding.
1016    *
1017    * @param length
1018    *          The length of the longest sequence in the existing alignment.
1019    * @return
1020    */
1021   public Sequence getConsensusSequence(int length)
1022   {
1023     int start;
1024     int end;
1025     int modelLength;
1026     start = getNodeAlignmentColumn(1);
1027     modelLength = getLength();
1028     end = getNodeAlignmentColumn(modelLength);
1029     char[] sequence = new char[length];
1030     for (int index = 0; index < length; index++)
1031     {
1032       Character character;
1033
1034         character = getConsensusAtAlignColumn(index);
1035
1036       if (character == null || character == '-')
1037       {
1038         sequence[index] = '-';
1039       }
1040       else
1041       {
1042         sequence[index] = Character.toUpperCase(character);
1043       }
1044       }
1045
1046
1047     Sequence seq = new Sequence("HMM CONSENSUS", sequence, start, end);
1048     return seq;
1049   }
1050
1051
1052   /**
1053    * Maps the nodes of the hidden Markov model to the reference annotation.
1054    */
1055   public void mapToReferenceAnnotation(AlignmentAnnotation alAnnotation)
1056   {
1057     Annotation[] annots = alAnnotation.annotations;
1058     {
1059       int nodeIndex = 0;
1060       for (int col = 0; col < annots.length; col++)
1061       {
1062         String character = annots[col].displayCharacter;
1063         if ("x".equals(character) || "X".equals(character))
1064         {
1065           nodeIndex++;
1066           if (nodeIndex < nodes.size())
1067           {
1068             nodes.get(nodeIndex).setAlignmentColumn(col + 1);
1069             nodeLookup.put(col + 1, nodeIndex);
1070           }
1071           else
1072           {
1073             System.out.println(
1074                     "The reference annotation contains more consensus columns than the hidden Markov model");
1075             break;
1076           }
1077         }
1078         else
1079         {
1080           nodeLookup.remove(col + 1);
1081         }
1082       }
1083
1084     }
1085   }
1086
1087   public AlignmentI initPlaceholder(AlignmentI alignment)
1088   {
1089     int length = alignment.getWidth();
1090     Sequence consensus = getConsensusSequence(length);
1091     consensus.setHMM(this);
1092     SequenceI[] consensusArr = new Sequence[] { consensus };
1093     AlignmentI newAlignment = new Alignment(consensusArr);
1094     newAlignment.append(alignment);
1095     return newAlignment;
1096   }
1097
1098 }
1099