src/jalview/datamodel/HiddenMarkovModel.java

   1 package jalview.datamodel;
   2
   3 import jalview.gui.AlignFrame;
   4
   5 import java.util.ArrayList;
   6 import java.util.HashMap;
   7 import java.util.List;
   8 import java.util.Map;
   9 import java.util.Scanner;
  10
  11 /**
  12  * Data structure which stores a hidden Markov model. Currently contains file
  13  * properties as well, not sure whether these should be transferred to the
  14  * HMMFile class
  15  *
  16  * @author TZVanaalten
  17  *
  18  */
  19 public class HiddenMarkovModel
  20 {
  21
  22
  23   // Stores file properties. Do not directly access this field as it contains
  24   // only string value - use the getter methods. For example, to find the length
  25   // of theHMM, use getModelLength()to return an int value
  26   Map<String, String> fileProperties = new HashMap<>();
  27
  28   // contains all of the symbols used in this model. The index of each symbol
  29   // represents its lookup value
  30   List<Character> symbols = new ArrayList<>();
  31
  32   // contains information for each node in the model. The begin node is at index
  33   // 0. Node 0 contains average emission probabilities for each symbol
  34   List<HMMNode> nodes = new ArrayList<>();
  35
  36   // contains the HMM node for each alignment column, alignment columns start at
  37   // index 0;
  38   Map<Integer, Integer> nodeLookup = new HashMap<>();
  39
  40   // contains the symbol index for each symbol
  41   Map<Character, Integer> symbolIndexLookup = new HashMap<>();
  42
  43   final static String YES = "yes";
  44
  45   final static String NO = "no";
  46
  47   int numberOfSymbols;
  48
  49   // keys for file properties hashmap
  50   private final String NAME = "NAME";
  51
  52   private final String ACCESSION_NUMBER = "ACC";
  53
  54   private final String DESCRIPTION = "DESC";
  55
  56   private final String LENGTH = "LENG";
  57
  58   private final String MAX_LENGTH = "MAXL";
  59
  60   private final String ALPHABET = "ALPH";
  61
  62   private final String DATE = "DATE";
  63
  64   private final String COMMAND_LOG = "COM";
  65
  66   private final String NUMBER_OF_SEQUENCES = "NSEQ";
  67
  68   private final String EFF_NUMBER_OF_SEQUENCES = "EFFN";
  69
  70   private final String CHECK_SUM = "CKSUM";
  71
  72   private final String GATHERING_THRESHOLDS = "GA";
  73
  74   private final String TRUSTED_CUTOFFS = "TC";
  75
  76   private final String NOISE_CUTOFFS = "NC";
  77
  78   private final String STATISTICS = "STATS";
  79
  80   private final String COMPO = "COMPO";
  81
  82   private final String GATHERING_THRESHOLD = "GA";
  83
  84   private final String TRUSTED_CUTOFF = "TC";
  85
  86   private final String NOISE_CUTOFF = "NC";
  87
  88   private final String VITERBI = "VITERBI";
  89
  90   private final String MSV = "MSV";
  91
  92   private final String FORWARD = "FORWARD";
  93
  94   private final String MAP = "MAP";
  95
  96   private final String REFERENCE_ANNOTATION = "RF";
  97
  98   private final String CONSENSUS_RESIDUE = "CONS";
  99
 100   private final String CONSENSUS_STRUCTURE = "CS";
 101
 102   private final String MASKED_VALUE = "MM";
 103
 104   public static final int MATCHTOMATCH = 0;
 105
 106   public static final int MATCHTOINSERT = 1;
 107
 108   public static final int MATCHTODELETE = 2;
 109
 110   public static final int INSERTTOMATCH = 3;
 111
 112   public static final int INSERTTOINSERT = 4;
 113
 114   public static final int DELETETOMATCH = 5;
 115
 116   public static final int DELETETODELETE = 6;
 117
 118   String fileHeader;
 119
 120   public HiddenMarkovModel()
 121   {
 122
 123   }
 124
 125   public HiddenMarkovModel(HiddenMarkovModel hmm)
 126   {
 127     super();
 128     this.fileProperties = new HashMap<>(hmm.fileProperties);
 129     this.symbols = new ArrayList<>(hmm.symbols);
 130     this.nodes = new ArrayList<>(hmm.nodes);
 131     this.nodeLookup = new HashMap<>(hmm.nodeLookup);
 132     this.symbolIndexLookup = new HashMap<>(
 133             hmm.symbolIndexLookup);
 134     this.numberOfSymbols = hmm.numberOfSymbols;
 135     this.fileHeader = new String(hmm.fileHeader);
 136   }
 137
 138   /**
 139    * Gets the file header of the .hmm file this model came from.
 140    *
 141    * @return
 142    */
 143   public String getFileHeader()
 144   {
 145     return fileHeader;
 146   }
 147
 148   /**
 149    * Sets the file header of this model.
 150    *
 151    * @param header
 152    */
 153   public void setFileHeader(String header)
 154   {
 155     fileHeader = header;
 156   }
 157
 158   /**
 159    * Returns the map containing the matches between nodes and alignment column
 160    * indexes.
 161    *
 162    * @return
 163    *
 164    */
 165   public Map<Integer, Integer> getNodeLookup()
 166   {
 167     return nodeLookup;
 168   }
 169
 170   /**
 171    * Returns the list of symbols used in this hidden Markov model.
 172    *
 173    * @return
 174    */
 175   public List<Character> getSymbols()
 176   {
 177     return symbols;
 178   }
 179
 180   /**
 181    * Returns the file properties.
 182    *
 183    * @return
 184    */
 185   public Map<String, String> getFileProperties()
 186   {
 187     return fileProperties;
 188   }
 189
 190   /**
 191    * Gets the node in the hidden Markov model at the specified position.
 192    *
 193    * @param nodeIndex
 194    *          The index of the node requested. Node 0 optionally contains the
 195    *          average match emission probabilities across the entire model, and
 196    *          always contains the insert emission probabilities and state
 197    *          transition probabilities for the begin node. Node 1 contains the
 198    *          first node in the HMM that can correspond to a column in the
 199    *          alignment.
 200    * @return
 201    */
 202   public HMMNode getNode(int nodeIndex)
 203   {
 204     return getNodes().get(nodeIndex);
 205   }
 206
 207   /**
 208    * Sets the list of symbols used in the hidden Markov model to the list
 209    * specified.
 210    *
 211    * @param symbolsL
 212    *          The list of symbols to which the current list is to be changed.
 213    *
 214    */
 215   public void setSymbols(List<Character> symbolsL)
 216   {
 217     this.symbols = symbolsL;
 218   }
 219
 220   /**
 221    * Returns the name of the sequence alignment on which the HMM is based.
 222    *
 223    * @return
 224    */
 225   public String getName()
 226   {
 227     return fileProperties.get(NAME);
 228   }
 229
 230   /**
 231    * Returns the accession number.
 232    * @return
 233    */
 234   public String getAccessionNumber()
 235   {
 236     return fileProperties.get(ACCESSION_NUMBER);
 237   }
 238
 239   /**
 240    * Returns a description of the sequence alignment on which the hidden Markov
 241    * model is based.
 242    *
 243    * @return
 244    */
 245   public String getDescription()
 246   {
 247     return fileProperties.get(DESCRIPTION);
 248   }
 249
 250   /**
 251    * Returns the length of the hidden Markov model.
 252    *
 253    * @return
 254    */
 255   public Integer getLength()
 256   {
 257     if (fileProperties.get(LENGTH) == null)
 258     {
 259       return null;
 260     }
 261     return Integer.parseInt(fileProperties.get(LENGTH));
 262   }
 263
 264   /**
 265    * Returns the max instance length within the hidden Markov model.
 266    *
 267    * @return
 268    */
 269   public Integer getMaxInstanceLength()
 270   {
 271     if (fileProperties.get(MAX_LENGTH) == null)
 272     {
 273       return null;
 274     }
 275     return Integer.parseInt(fileProperties.get(MAX_LENGTH));
 276   }
 277
 278   /**
 279    * Returns the type of symbol alphabet - "amino", "DNA", "RNA" are the
 280    * options. Other alphabets may be added.
 281    *
 282    * @return
 283    */
 284   public String getAlphabetType()
 285   {
 286     return fileProperties.get(ALPHABET);
 287   }
 288
 289   /**
 290    * Returns the date as a String.
 291    *
 292    * @return
 293    */
 294   public String getDate()
 295   {
 296     return fileProperties.get(DATE);
 297   }
 298
 299   /**
 300    * Returns the command line log.
 301    *
 302    * @return
 303    */
 304   public String getCommandLineLog()
 305   {
 306     return fileProperties.get(COMMAND_LOG);
 307   }
 308
 309   /**
 310    * Returns the number of sequences on which the HMM was trained.
 311    *
 312    * @return
 313    */
 314   public Integer getNumberOfSequences()
 315   {
 316     if (fileProperties.get(NUMBER_OF_SEQUENCES) == null)
 317     {
 318       return null;
 319     }
 320     return Integer.parseInt(fileProperties.get(NUMBER_OF_SEQUENCES));
 321   }
 322
 323   /**
 324    * Returns the effective number of sequences on which the HMM was based.
 325    *
 326    * @param value
 327    */
 328   public Double getEffectiveNumberOfSequences()
 329   {
 330     if (fileProperties.get(LENGTH) == null)
 331     {
 332       return null;
 333     }
 334     return Double.parseDouble(fileProperties.get(EFF_NUMBER_OF_SEQUENCES));
 335   }
 336
 337   /**
 338    * Returns the checksum.
 339    *
 340    * @return
 341    */
 342   public Long getCheckSum()
 343   {
 344     if (fileProperties.get(LENGTH) == null)
 345     {
 346       return null;
 347     }
 348     return Long.parseLong(fileProperties.get(CHECK_SUM));
 349   }
 350
 351   /**
 352    * Returns the list of nodes in this HMM.
 353    *
 354    * @return
 355    */
 356   public List<HMMNode> getNodes()
 357   {
 358     return nodes;
 359   }
 360
 361   /**
 362    * Sets the list of nodes in this HMM to the given list.
 363    *
 364    * @param nodes
 365    *          The list of nodes to which the current list of nodes is being
 366    *          changed.
 367    */
 368   public void setNodes(List<HMMNode> nodes)
 369   {
 370     this.nodes = nodes;
 371   }
 372
 373   /**
 374    * Gets the match emission probability for a given symbol at a column in the
 375    * alignment.
 376    *
 377    * @param alignColumn
 378    *          The index of the alignment column, starting at index 0. Index 0
 379    *          usually corresponds to index 1 in the HMM.
 380    * @param symbol
 381    *          The symbol for which the desired probability is being requested.
 382    * @return
 383    *
 384    */
 385   public Double getMatchEmissionProbability(int alignColumn, char symbol)
 386   {
 387     int symbolIndex;
 388     int nodeIndex;
 389     Double probability;
 390     if (!symbolIndexLookup.containsKey(symbol))
 391     {
 392       return 0d;
 393     }
 394     symbolIndex = symbolIndexLookup.get(symbol);
 395     if (nodeLookup.containsKey(alignColumn))
 396     {
 397       nodeIndex = nodeLookup.get(alignColumn);
 398       probability = getNode(nodeIndex).getMatchEmissions().get(symbolIndex);
 399       return probability;
 400     }
 401     else
 402     {
 403       return 0d;
 404     }
 405
 406   }
 407
 408   /**
 409    * Gets the insert emission probability for a given symbol at a column in the
 410    * alignment.
 411    *
 412    * @param alignColumn
 413    *          The index of the alignment column, starting at index 0. Index 0
 414    *          usually corresponds to index 1 in the HMM.
 415    * @param symbol
 416    *          The symbol for which the desired probability is being requested.
 417    * @return
 418    *
 419    */
 420   public Double getInsertEmissionProbability(int alignColumn, char symbol)
 421   {
 422     int symbolIndex;
 423     int nodeIndex;
 424     Double probability;
 425     if (!symbolIndexLookup.containsKey(symbol))
 426     {
 427       return 0d;
 428     }
 429     symbolIndex = symbolIndexLookup.get(symbol);
 430     if (nodeLookup.containsKey(alignColumn))
 431     {
 432       nodeIndex = nodeLookup.get(alignColumn);
 433       probability = getNode(nodeIndex).getInsertEmissions()
 434               .get(symbolIndex);
 435       return probability;
 436     }
 437     else
 438     {
 439       return 0d;
 440     }
 441
 442   }
 443
 444   /**
 445    * Gets the state transition probability for a given symbol at a column in the
 446    * alignment.
 447    *
 448    * @param alignColumn
 449    *          The index of the alignment column, starting at index 0. Index 0
 450    *          usually corresponds to index 1 in the HMM.
 451    * @param symbol
 452    *          The symbol for which the desired probability is being requested.
 453    * @return
 454    *
 455    */
 456   public Double getStateTransitionProbability(int alignColumn,
 457           int transition)
 458   {
 459     int transitionIndex;
 460     int nodeIndex;
 461     Double probability;
 462     if (nodeLookup.containsKey(alignColumn))
 463     {
 464       nodeIndex = nodeLookup.get(alignColumn);
 465       probability = getNode(nodeIndex).getStateTransitions()
 466               .get(transition);
 467       return probability;
 468     }
 469     else
 470     {
 471       return 0d;
 472     }
 473
 474   }
 475
 476   /**
 477    * Returns the alignment column linked to the node at the given index.
 478    *
 479    * @param nodeIndex
 480    *          The index of the node, starting from index 1. Index 0 is the begin
 481    *          node, which does not correspond to a column in the alignment.
 482    * @return
 483    */
 484   public Integer getNodeAlignmentColumn(int nodeIndex)
 485   {
 486     Integer value = nodes.get(nodeIndex).getAlignmentColumn();
 487     return value;
 488   }
 489
 490   /**
 491    * Returns the consensus residue at the specified node.
 492    *
 493    * @param nodeIndex
 494    *          The index of the specified node.
 495    * @return
 496    */
 497   public char getConsensusResidue(int nodeIndex)
 498   {
 499    char value = nodes.get(nodeIndex).getConsensusResidue();
 500    return value;
 501   }
 502
 503   /**
 504    * Returns the consensus at a given alignment column.
 505    *
 506    * @param columnIndex
 507    *          The index of the column in the alignment for which the consensus
 508    *          is desired. The list of columns starts at index 0.
 509    * @return
 510    */
 511   public char getConsensusAtAlignColumn(int columnIndex)
 512   {
 513     char mostLikely = '-';
 514     if (consensusResidueIsActive())
 515     {
 516
 517     Integer index = findNodeIndex(columnIndex);
 518     if (index == null)
 519     {
 520       return '-';
 521     }
 522       mostLikely = getNodes().get(index).getConsensusResidue();
 523       return mostLikely;
 524     }
 525     else
 526     {
 527       double highestProb = 0;
 528       for (char character : symbols)
 529       {
 530         Double prob = getMatchEmissionProbability(columnIndex, character);
 531         if (prob > highestProb)
 532         {
 533           highestProb = prob;
 534           mostLikely = character;
 535         }
 536       }
 537       return mostLikely;
 538     }
 539
 540   }
 541
 542   /**
 543    * Returns the reference annotation at the specified node.
 544    *
 545    * @param nodeIndex
 546    *          The index of the specified node.
 547    * @return
 548    */
 549   public char getReferenceAnnotation(int nodeIndex)
 550   {
 551    char value = nodes.get(nodeIndex).getReferenceAnnotation();
 552    return value;
 553   }
 554
 555   /**
 556    * Returns the mask value at the specified node.
 557    *
 558    * @param nodeIndex
 559    *          The index of the specified node.
 560    * @return
 561    */
 562   public char getMaskedValue(int nodeIndex)
 563   {
 564    char value = nodes.get(nodeIndex).getMaskValue();
 565    return value;
 566   }
 567
 568   /**
 569    * Returns the consensus structure at the specified node.
 570    *
 571    * @param nodeIndex
 572    *          The index of the specified node.
 573    * @return
 574    */
 575   public char getConsensusStructure(int nodeIndex)
 576   {
 577    char value = nodes.get(nodeIndex).getConsensusStructure();
 578    return value;
 579   }
 580
 581   /**
 582    * Returns the average match emission probability for a given symbol
 583    *
 584    * @param symbolIndex
 585    *          The index of the symbol.
 586    * @return
 587    *
 588    */
 589   public double getAverageMatchEmission(int symbolIndex)
 590   {
 591     double value = nodes.get(0).getMatchEmissions().get(symbolIndex);
 592     return value;
 593   }
 594
 595   /**
 596    * Returns the number of symbols in the alphabet used in this HMM.
 597    *
 598    * @return
 599    */
 600   public int getNumberOfSymbols()
 601   {
 602     return numberOfSymbols;
 603   }
 604
 605   /**
 606    * Fills symbol array and whilst doing so, updates the value of the number of
 607    * symbols.
 608    *
 609    * @param parser
 610    *          The scanner scanning the symbol line in the file.
 611    */
 612   public void fillSymbols(Scanner parser)
 613   {
 614     int i = 0;
 615     while (parser.hasNext())
 616     {
 617       String strSymbol = parser.next();
 618       char[] symbol = strSymbol.toCharArray();
 619       symbols.add(symbol[0]);
 620       symbolIndexLookup.put(symbol[0], i);
 621       i++;
 622     }
 623     numberOfSymbols = symbols.size();
 624   }
 625
 626   /**
 627    * Adds a file property.
 628    *
 629    * @param key
 630    * @param value
 631    */
 632   public void addFileProperty(String key, String value)
 633   {
 634     fileProperties.put(key, value);
 635   }
 636
 637   /**
 638    * Returns a boolean indicating whether the reference annotation is active.
 639    *
 640    * @return
 641    */
 642   public boolean referenceAnnotationIsActive()
 643   {
 644     String status;
 645     status = fileProperties.get(REFERENCE_ANNOTATION);
 646     if (status == null)
 647     {
 648       return false;
 649     }
 650     switch (status)
 651     {
 652     case YES:
 653       return true;
 654     case NO:
 655       return false;
 656     default:
 657       return false;
 658     }
 659
 660   }
 661
 662   /**
 663    * Returns a boolean indicating whether the mask value annotation is active.
 664    *
 665    * @return
 666    */
 667   public boolean maskValueIsActive()
 668   {
 669     String status;
 670     status = fileProperties.get(MASKED_VALUE);
 671     if (status == null)
 672     {
 673       return false;
 674     }
 675     switch (status)
 676     {
 677     case YES:
 678       return true;
 679     case NO:
 680       return false;
 681     default:
 682       return false;
 683     }
 684
 685   }
 686
 687   /**
 688    * Returns a boolean indicating whether the consensus residue annotation is
 689    * active.
 690    *
 691    * @return
 692    */
 693   public boolean consensusResidueIsActive()
 694   {
 695     String status;
 696     status = fileProperties.get(CONSENSUS_RESIDUE);
 697     if (status == null)
 698     {
 699       return false;
 700     }
 701     switch (status)
 702     {
 703     case YES:
 704       return true;
 705     case NO:
 706       return false;
 707     default:
 708       return false;
 709     }
 710
 711   }
 712
 713   /**
 714    * Returns a boolean indicating whether the consensus structure annotation is
 715    * active.
 716    *
 717    * @return
 718    */
 719   public boolean consensusStructureIsActive()
 720   {
 721     String status;
 722     status = fileProperties.get(CONSENSUS_STRUCTURE);
 723     if (status == null)
 724     {
 725       return false;
 726     }
 727     switch (status)
 728     {
 729     case YES:
 730       return true;
 731     case NO:
 732       return false;
 733     default:
 734       return false;
 735     }
 736
 737   }
 738
 739   /**
 740    * Returns a boolean indicating whether the MAP annotation is active.
 741    *
 742    * @return
 743    */
 744   public boolean mapIsActive()
 745   {
 746     String status;
 747     status = fileProperties.get(MAP);
 748     if (status == null)
 749     {
 750       return false;
 751     }
 752     switch (status)
 753     {
 754     case YES:
 755       return true;
 756     case NO:
 757       return false;
 758     default:
 759       return false;
 760     }
 761
 762   }
 763
 764   /**
 765    * Sets the alignment column of the specified node.
 766    *
 767    * @param nodeIndex
 768    *
 769    * @param column
 770    *
 771    */
 772   public void setAlignmentColumn(int nodeIndex, int column)
 773   {
 774     nodes.get(nodeIndex).setAlignmentColumn(column);
 775   }
 776
 777   /**
 778    * Sets the reference annotation at a given node.
 779    *
 780    * @param nodeIndex
 781    * @param value
 782    */
 783   public void setReferenceAnnotation(int nodeIndex, char value)
 784   {
 785     nodes.get(nodeIndex).setReferenceAnnotation(value);
 786   }
 787
 788   /**
 789    * Sets the consensus residue at a given node.
 790    *
 791    * @param nodeIndex
 792    * @param value
 793    */
 794   public void setConsensusResidue(int nodeIndex, char value)
 795   {
 796     nodes.get(nodeIndex).setConsensusResidue(value);
 797   }
 798
 799   /**
 800    * Sets the consensus structure at a given node.
 801    *
 802    * @param nodeIndex
 803    * @param value
 804    */
 805   public void setConsensusStructure(int nodeIndex, char value)
 806   {
 807     nodes.get(nodeIndex).setConsensusStructure(value);
 808   }
 809
 810   /**
 811    * Sets the mask value at a given node.
 812    *
 813    * @param nodeIndex
 814    * @param value
 815    */
 816   public void setMaskValue(int nodeIndex, char value)
 817   {
 818     nodes.get(nodeIndex).setMaskValue(value);
 819   }
 820
 821   /**
 822    * Temporary implementation, should not be used.
 823    *
 824    * @return
 825    */
 826   public String getGatheringThreshold()
 827   {
 828     String value;
 829     value = fileProperties.get("GA");
 830     return value;
 831   }
 832
 833   /**
 834    * Temporary implementation, should not be used.
 835    *
 836    * @return
 837    */
 838   public String getNoiseCutoff()
 839   {
 840     String value;
 841     value = fileProperties.get("NC");
 842     return value;
 843   }
 844
 845   /**
 846    * Temporary implementation, should not be used.
 847    *
 848    * @return
 849    */
 850   public String getTrustedCutoff()
 851   {
 852     String value;
 853     value = fileProperties.get("TC");
 854     return value;
 855   }
 856
 857   /**
 858    * Temporary implementation, should not be used.
 859    *
 860    * @return
 861    */
 862   public String getViterbi()
 863   {
 864     String value;
 865     value = fileProperties.get(VITERBI);
 866     return value;
 867   }
 868
 869   /**
 870    * Temporary implementation, should not be used.
 871    *
 872    * @return
 873    */
 874   public String getMSV()
 875   {
 876     String value;
 877     value = fileProperties.get(MSV);
 878     return value;
 879   }
 880
 881   /**
 882    * Temporary implementation, should not be used.
 883    *
 884    * @return
 885    */
 886   public String getForward()
 887   {
 888     String value;
 889     value = fileProperties.get(FORWARD);
 890     return value;
 891   }
 892
 893   /**
 894    * Sets the activation status of the MAP annotation.
 895    *
 896    * @param status
 897    */
 898   public void setMAPStatus(boolean status)
 899   {
 900     fileProperties.put(MAP, status ? YES : NO);
 901   }
 902
 903   /**
 904    * Sets the activation status of the reference annotation.
 905    *
 906    * @param status
 907    */
 908   public void setReferenceAnnotationStatus(boolean status)
 909   {
 910     fileProperties.put(REFERENCE_ANNOTATION, status ? YES : NO);
 911   }
 912
 913   /**
 914    * Sets the activation status of the mask value annotation.
 915    *
 916    * @param status
 917    */
 918   public void setMaskedValueStatus(boolean status)
 919   {
 920     fileProperties.put(MASKED_VALUE, status ? YES : NO);
 921   }
 922
 923   /**
 924    * Sets the activation status of the consensus residue annotation.
 925    *
 926    * @param status
 927    */
 928   public void setConsensusResidueStatus(boolean status)
 929   {
 930     fileProperties.put(CONSENSUS_RESIDUE, status ? YES : NO);
 931   }
 932
 933   /**
 934    * Sets the activation status of the consensus structure annotation.
 935    *
 936    * @param status
 937    */
 938   public void setConsensusStructureStatus(boolean status)
 939   {
 940     fileProperties.put(CONSENSUS_STRUCTURE, status ? YES : NO);
 941   }
 942
 943   /**
 944    * Finds the index of the node in a hidden Markov model based on the column in
 945    * the alignment
 946    *
 947    * @param alignmentColumn
 948    *          The index of the column in the alignment, with the indexes
 949    *          starting from 0.
 950    */
 951
 952   public Integer findNodeIndex(int alignmentColumn)
 953   {
 954     Integer index;
 955     index = nodeLookup.get(alignmentColumn);
 956     return index;
 957   }
 958
 959   /**
 960    * Finds the String values of a boolean. "yes" for true and "no" for false.
 961    *
 962    * @param value
 963    * @return
 964    */
 965   public static String findStringFromBoolean(boolean value)
 966   {
 967     if (value)
 968     {
 969       return YES;
 970     }
 971     else
 972     {
 973       return NO;
 974     }
 975   }
 976
 977
 978
 979   /**
 980    * Returns the consensus sequence based on the most probable symbol at each
 981    * position. The sequence is adjusted to match the length of the existing
 982    * sequence alignment. Gap characters are used as padding.
 983    *
 984    * @param length
 985    *          The length of the longest sequence in the existing alignment.
 986    * @return
 987    */
 988   public Sequence getConsensusSequence(int length)
 989   {
 990     int start;
 991     int end;
 992     int modelLength;
 993     start = getNodeAlignmentColumn(1);
 994     modelLength = getLength();
 995     end = getNodeAlignmentColumn(modelLength);
 996     char[] sequence = new char[length];
 997     for (int index = 0; index < length; index++)
 998     {
 999       Character character;
1000
1001         character = getConsensusAtAlignColumn(index);
1002
1003       if (character == null || character == '-')
1004       {
1005         sequence[index] = '-';
1006       }
1007       else
1008       {
1009         sequence[index] = Character.toUpperCase(character);
1010       }
1011       }
1012
1013
1014     Sequence seq = new Sequence(getName() + "_HMM", sequence, start, end);
1015     return seq;
1016   }
1017
1018
1019   /**
1020    * Maps the nodes of the hidden Markov model to the reference annotation and
1021    * then deletes this annotation.
1022    */
1023   public void mapToReferenceAnnotation(AlignFrame af, SequenceI seq)
1024   {
1025     AlignmentAnnotation annotArray[] = af.getViewport().getAlignment()
1026             .getAlignmentAnnotation();
1027
1028     AlignmentAnnotation reference = null;
1029     for (AlignmentAnnotation annot : annotArray)
1030     {
1031       if (annot.label.contains("Reference"))
1032       {
1033         reference = annot;
1034       }
1035     }
1036
1037     if (reference == null)
1038     {
1039       return;
1040     }
1041
1042     mapToReferenceAnnotation(reference, seq);
1043     af.getViewport().getAlignment().deleteAnnotation(reference);
1044   }
1045
1046   public void mapToReferenceAnnotation(AlignmentAnnotation reference,
1047           SequenceI seq)
1048   {
1049     HiddenMarkovModel hmm = seq.getHMM();
1050     Annotation[] annots = reference.annotations;
1051     {
1052       int nodeIndex = 0;
1053       for (int col = 0; col < annots.length; col++)
1054       {
1055         String character = annots[col].displayCharacter;
1056         if ("x".equals(character) || "X".equals(character))
1057         {
1058           nodeIndex++;
1059           if (nodeIndex < hmm.getNodes().size())
1060           {
1061             HMMNode node = hmm.getNode(nodeIndex);
1062             int alignPos = getNodeAlignmentColumn(nodeIndex);
1063             char seqCharacter = seq.getCharAt(alignPos);
1064             if (alignPos >= seq.getLength() || col >= seq.getLength())
1065             {
1066               seq.insertCharAt(seq.getLength(),
1067                       (alignPos + 1) - seq.getLength(),
1068                       '-');
1069             }
1070             seq.getSequence()[alignPos] = '-';
1071             seq.getSequence()[col] = seqCharacter;
1072             node.setAlignmentColumn(col);
1073             hmm.nodeLookup.put(col, nodeIndex);
1074           }
1075           else
1076           {
1077             System.out.println(
1078                     "The reference annotation contains more consensus columns than the hidden Markov model");
1079             break;
1080           }
1081         }
1082         else
1083         {
1084           hmm.nodeLookup.remove(col);
1085         }
1086       }
1087
1088     }
1089
1090   }
1091
1092   public void mapToReferenceAnnotation(AlignmentAnnotation reference)
1093   {
1094     Annotation[] annots = reference.annotations;
1095     {
1096       int nodeIndex = 0;
1097       for (int col = 0; col < annots.length; col++)
1098       {
1099         String character = annots[col].displayCharacter;
1100         if ("x".equals(character) || "X".equals(character))
1101         {
1102           nodeIndex++;
1103           if (nodeIndex < nodes.size())
1104           {
1105             HMMNode node = nodes.get(nodeIndex);
1106             node.setAlignmentColumn(col + 1);
1107             nodeLookup.put(col, nodeIndex);
1108           }
1109           else
1110           {
1111             System.out.println(
1112                     "The reference annotation contains more consensus columns than the hidden Markov model");
1113             break;
1114           }
1115         }
1116         else
1117         {
1118           nodeLookup.remove(col);
1119         }
1120       }
1121
1122     }
1123
1124   }
1125
1126   public SequenceI initHMMSequence(AlignFrame af, int position)
1127   {
1128     AlignmentI alignment = af.getViewport().getAlignment();
1129     int length = alignment.getWidth();
1130     Sequence consensus = getConsensusSequence(length);
1131     consensus.setIsHMMConsensusSequence(true);
1132     consensus.setHMM(this);
1133     SequenceI[] consensusArr = new Sequence[] { consensus };
1134     alignment.getSequences().add(position, consensus);
1135     return consensus;
1136   }
1137
1138
1139 }
1140