1 /* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
3 /*********************************************************************
4 * Clustal Omega - Multiple sequence alignment
6 * Copyright (C) 2010 University College Dublin
8 * Clustal-Omega is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This file is part of Clustal-Omega.
15 ********************************************************************/
18 * RCS $Id: tree.c 230 2011-04-09 15:37:50Z andreas $
31 #include "muscle_upgma.h"
36 * @brief Creates a UPGMA guide tree. This is a frontend function to
37 * the ported Muscle UPGMA code ().
40 * created upgma tree. will be allocated here. use FreeMuscleTree()
43 * pointer to nseq sequence names
47 * optional: if non-NULL, tree will be written to this files
49 * @see FreeMuscleTree()
54 GuideTreeUpgma(tree_t **tree, char **labels,
55 symmatrix_t *distmat, char *ftree)
57 linkage_t linkage = LINKAGE_AVG;
61 if (NULL == (fp=fopen(ftree, "w"))) {
62 Log(&rLog, LOG_ERROR, "Couldn't open tree-file '%s' for writing. Skipping", ftree);
64 /* fp NULL is handled later */
67 (*tree) = (tree_t *) CKMALLOC(1 * sizeof(tree_t));
68 MuscleUpgma2((*tree), distmat, linkage, labels);
70 if (rLog.iLogLevelEnabled <= LOG_DEBUG) {
71 Log(&rLog, LOG_DEBUG, "tree logging...");
72 LogTree((*tree), LogGetFP(&rLog, LOG_DEBUG));
76 MuscleTreeToFile(fp, (*tree));
77 Log(&rLog, LOG_INFO, "Guide tree written to %s", ftree);
81 /*** end: guidetree_upgma ***/
90 * created upgma tree. will be allocated here. use FreeMuscleTree()
95 * @return non-zero on error
99 GuideTreeFromFile(tree_t **tree, mseq_t *mseq, char *ftree)
104 (*tree) = (tree_t *) CKMALLOC(1 * sizeof(tree_t));
105 if (MuscleTreeFromFile((*tree), ftree)!=0) {
106 Log(&rLog, LOG_ERROR, "%s", "MuscleTreeFromFile failed");
110 /* Make sure tree is rooted */
111 if (!IsRooted((*tree))) {
112 Log(&rLog, LOG_ERROR, "User tree must be rooted");
116 if ((int)GetLeafCount((*tree)) != mseq->nseqs) {
117 Log(&rLog, LOG_ERROR, "User tree does not match input sequences");
121 /* compare tree labels and sequence names and set leaf-ids */
122 iNodeCount = GetNodeCount((*tree));
123 for (iNodeIndex = 0; iNodeIndex < iNodeCount; ++iNodeIndex) {
127 if (!IsLeaf(iNodeIndex, (*tree)))
129 LeafName = GetLeafName(iNodeIndex, (*tree));
131 if ((iSeqIndex=FindSeqName(LeafName, mseq))==-1) {
132 Log(&rLog, LOG_ERROR, "Label '%s' in tree could not be found in sequence names", LeafName);
136 SetLeafId((*tree), iNodeIndex, iSeqIndex);
139 if (rLog.iLogLevelEnabled <= LOG_DEBUG) {
140 Log(&rLog, LOG_DEBUG, "tree logging...");
141 LogTree((*tree), LogGetFP(&rLog, LOG_DEBUG));
146 /*** end: GuideTreeFromFile() ***/
152 * @brief Depth first traversal of tree, i.e. leaf nodes (sequences)
153 * will be visited first. Order can be used to guide progressive
156 * @param[out] piOrderLR_p
157 * order in which left/right nodes (profiles) are to be aligned.
158 * allocated here; caller must free.
160 * The tree to traverse; has to be rooted
162 * corresponding multiple sequence structure
166 TraverseTree(int **piOrderLR_p,
167 tree_t *tree, mseq_t *mseq)
169 int tree_nodeindex = 0;
174 assert(IsRooted(tree));
176 /* allocate memory for node/profile alignment order;
177 * for every node allocate DIFF_NODE (3) int (1 left, 1 right, 1 parent)
179 *piOrderLR_p = (int *)CKCALLOC(DIFF_NODE * GetNodeCount(tree), sizeof(int));
181 /* Log(&rLog, LOG_FORCED_DEBUG, "print tree->m_iNodeCount=%d", tree->m_iNodeCount); */
184 tree_nodeindex = FirstDepthFirstNode(tree);
185 /*LOG_DEBUG("Starting with treenodeindex = %d", tree_nodeindex);*/
190 if (IsLeaf(tree_nodeindex, tree)) {
191 int leafid = GetLeafId(tree_nodeindex, tree);
192 if (leafid >= mseq->nseqs)
193 Log(&rLog, LOG_FATAL, "Sequence index out of range during tree traversal (leafid=%d nseqs=%d)",
194 leafid, mseq->nseqs);
196 /* this is a leaf node,
197 * indicate this by registering same leafid for left/right
200 (*piOrderLR_p)[DIFF_NODE*order_index+LEFT_NODE] = leafid;
201 (*piOrderLR_p)[DIFF_NODE*order_index+RGHT_NODE] = leafid;
202 (*piOrderLR_p)[DIFF_NODE*order_index+PRNT_NODE] = tree_nodeindex;
204 Log(&rLog, LOG_DEBUG, "Tree traversal: Visited leaf-node %d (leaf-id %d = Seq '%s')",
205 tree_nodeindex, leafid, mseq->sqinfo[leafid].name);
212 merge_nodeindex = tree_nodeindex;
213 left = GetLeft(tree_nodeindex, tree);
214 right = GetRight(tree_nodeindex, tree);
216 /* this is not a leaf node but a merge node,
217 * register left node (even) and right node (odd)
219 (*piOrderLR_p)[DIFF_NODE*order_index+LEFT_NODE] = left;
220 (*piOrderLR_p)[DIFF_NODE*order_index+RGHT_NODE] = right;
221 (*piOrderLR_p)[DIFF_NODE*order_index+PRNT_NODE] = merge_nodeindex;
223 Log(&rLog, LOG_DEBUG, "Tree traversal: Visited non-leaf node %d with siblings %d (L) and %d (R)",
224 merge_nodeindex, left, right);
226 tree_nodeindex = NextDepthFirstNode(tree_nodeindex, tree);
230 } while (NULL_NEIGHBOR != tree_nodeindex);
234 /*** end: TraverseTree ***/