new version of muscle 3.8.31
[jabaws.git] / binaries / src / muscle / makerootmsa.cpp
1 #include "muscle.h"\r
2 #include "tree.h"\r
3 #include "seqvect.h"\r
4 #include "profile.h"\r
5 #include "msa.h"\r
6 #include "pwpath.h"\r
7 #include "estring.h"\r
8 \r
9 #define TRACE           0\r
10 #define VALIDATE        0\r
11 \r
12 static void PathSeq(const Seq &s, const PWPath &Path, bool bRight, Seq &sOut)\r
13         {\r
14         short *esA;\r
15         short *esB;\r
16         PathToEstrings(Path, &esA, &esB);\r
17 \r
18         const unsigned uSeqLength = s.Length();\r
19         const unsigned uEdgeCount = Path.GetEdgeCount();\r
20 \r
21         sOut.Clear();\r
22         sOut.SetName(s.GetName());\r
23         unsigned uPos = 0;\r
24         for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
25                 {\r
26                 const PWEdge &Edge = Path.GetEdge(uEdgeIndex);\r
27                 char cType = Edge.cType;\r
28                 if (bRight)\r
29                         {\r
30                         if (cType == 'I')\r
31                                 cType = 'D';\r
32                         else if (cType == 'D')\r
33                                 cType = 'I';\r
34                         }\r
35                 switch (cType)\r
36                         {\r
37                 case 'M':\r
38                         sOut.AppendChar(s[uPos++]);\r
39                         break;\r
40                 case 'D':\r
41                         sOut.AppendChar('-');\r
42                         break;\r
43                 case 'I':\r
44                         sOut.AppendChar(s[uPos++]);\r
45                         break;\r
46                 default:\r
47                         Quit("PathSeq, invalid edge type %c", cType);\r
48                         }\r
49                 }\r
50         }\r
51 \r
52 #if     VALIDATE\r
53 \r
54 static void MakeRootSeq(const Seq &s, const Tree &GuideTree, unsigned uLeafNodeIndex,\r
55   const ProgNode Nodes[], Seq &sRoot)\r
56         {\r
57         sRoot.Copy(s);\r
58         unsigned uNodeIndex = uLeafNodeIndex;\r
59         for (;;)\r
60                 {\r
61                 unsigned uParent = GuideTree.GetParent(uNodeIndex);\r
62                 if (NULL_NEIGHBOR == uParent)\r
63                         break;\r
64                 bool bRight = (GuideTree.GetLeft(uParent) == uNodeIndex);\r
65                 uNodeIndex = uParent;\r
66                 const PWPath &Path = Nodes[uNodeIndex].m_Path;\r
67                 Seq sTmp;\r
68                 PathSeq(sRoot, Path, bRight, sTmp);\r
69                 sTmp.SetId(0);\r
70                 sRoot.Copy(sTmp);\r
71                 }\r
72         }\r
73 \r
74 #endif  // VALIDATE\r
75 \r
76 static short *MakeRootSeqE(const Seq &s, const Tree &GuideTree, unsigned uLeafNodeIndex,\r
77   const ProgNode Nodes[], Seq &sRoot, short *Estring1, short *Estring2)\r
78         {\r
79         short *EstringCurr = Estring1;\r
80         short *EstringNext = Estring2;\r
81 \r
82         const unsigned uSeqLength = s.Length();\r
83         EstringCurr[0] = uSeqLength;\r
84         EstringCurr[1] = 0;\r
85 \r
86         unsigned uNodeIndex = uLeafNodeIndex;\r
87         for (;;)\r
88                 {\r
89                 unsigned uParent = GuideTree.GetParent(uNodeIndex);\r
90                 if (NULL_NEIGHBOR == uParent)\r
91                         break;\r
92                 bool bRight = (GuideTree.GetLeft(uParent) == uNodeIndex);\r
93                 uNodeIndex = uParent;\r
94                 const PWPath &Path = Nodes[uNodeIndex].m_Path;\r
95                 const short *EstringNode = bRight ?\r
96                   Nodes[uNodeIndex].m_EstringL : Nodes[uNodeIndex].m_EstringR;\r
97 \r
98                 MulEstrings(EstringCurr, EstringNode, EstringNext);\r
99 #if     TRACE\r
100                 Log("\n");\r
101                 Log("Curr=");\r
102                 LogEstring(EstringCurr);\r
103                 Log("\n");\r
104                 Log("Node=");\r
105                 LogEstring(EstringNode);\r
106                 Log("\n");\r
107                 Log("Prod=");\r
108                 LogEstring(EstringNext);\r
109                 Log("\n");\r
110 #endif\r
111                 short *EstringTmp = EstringNext;\r
112                 EstringNext = EstringCurr;\r
113                 EstringCurr = EstringTmp;\r
114                 }\r
115         EstringOp(EstringCurr, s, sRoot);\r
116 \r
117 #if     TRACE\r
118         Log("Root estring=");\r
119         LogEstring(EstringCurr);\r
120         Log("\n");\r
121         Log("Root seq=");\r
122         sRoot.LogMe();\r
123 #endif\r
124         return EstringCurr;\r
125         }\r
126 \r
127 static unsigned GetFirstNodeIndex(const Tree &tree)\r
128         {\r
129         if (g_bStable)\r
130                 return 0;\r
131         return tree.FirstDepthFirstNode();\r
132         }\r
133 \r
134 static unsigned GetNextNodeIndex(const Tree &tree, unsigned uPrevNodeIndex)\r
135         {\r
136         if (g_bStable)\r
137                 {\r
138                 const unsigned uNodeCount = tree.GetNodeCount();\r
139                 unsigned uNodeIndex = uPrevNodeIndex;\r
140                 for (;;)\r
141                         {\r
142                         ++uNodeIndex;\r
143                         if (uNodeIndex >= uNodeCount)\r
144                                 return NULL_NEIGHBOR;\r
145                         if (tree.IsLeaf(uNodeIndex))\r
146                                 return uNodeIndex;\r
147                         }\r
148                 }\r
149         unsigned uNodeIndex = uPrevNodeIndex;\r
150         for (;;)\r
151                 {\r
152                 uNodeIndex = tree.NextDepthFirstNode(uNodeIndex);\r
153                 if (NULL_NEIGHBOR == uNodeIndex || tree.IsLeaf(uNodeIndex))\r
154                         return uNodeIndex;\r
155                 }\r
156         }\r
157 \r
158 void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[],\r
159   MSA &a)\r
160         {\r
161 #if     TRACE\r
162         Log("MakeRootMSA Tree=");\r
163         GuideTree.LogMe();\r
164 #endif\r
165         const unsigned uSeqCount = v.GetSeqCount();\r
166         unsigned uColCount = uInsane;\r
167         unsigned uSeqIndex = 0;\r
168         const unsigned uTreeNodeCount = GuideTree.GetNodeCount();\r
169         const unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();\r
170         const PWPath &RootPath = Nodes[uRootNodeIndex].m_Path;\r
171         const unsigned uRootColCount = RootPath.GetEdgeCount();\r
172         const unsigned uEstringSize = uRootColCount + 1;\r
173         short *Estring1 = new short[uEstringSize];\r
174         short *Estring2 = new short[uEstringSize];\r
175         SetProgressDesc("Root alignment");\r
176 \r
177         unsigned uTreeNodeIndex = GetFirstNodeIndex(GuideTree);\r
178         do\r
179                 {\r
180                 Progress(uSeqIndex, uSeqCount);\r
181 \r
182                 unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);\r
183                 const Seq &s = *(v[uId]);\r
184 \r
185                 Seq sRootE;\r
186                 short *es = MakeRootSeqE(s, GuideTree, uTreeNodeIndex, Nodes, sRootE,\r
187                   Estring1, Estring2);\r
188                 Nodes[uTreeNodeIndex].m_EstringL = EstringNewCopy(es);\r
189 \r
190 #if     VALIDATE\r
191                 Seq sRoot;\r
192                 MakeRootSeq(s, GuideTree, uTreeNodeIndex, Nodes, sRoot);\r
193                 if (!sRoot.Eq(sRootE))\r
194                         {\r
195                         Log("sRoot=");\r
196                         sRoot.LogMe();\r
197                         Log("sRootE=");\r
198                         sRootE.LogMe();\r
199                         Quit("Root seqs differ");\r
200                         }\r
201 #if     TRACE\r
202                 Log("MakeRootSeq=\n");\r
203                 sRoot.LogMe();\r
204 #endif\r
205 #endif\r
206 \r
207                 if (uInsane == uColCount)\r
208                         {\r
209                         uColCount = sRootE.Length();\r
210                         a.SetSize(uSeqCount, uColCount);\r
211                         }\r
212                 else\r
213                         {\r
214                         assert(uColCount == sRootE.Length());\r
215                         }\r
216                 a.SetSeqName(uSeqIndex, s.GetName());\r
217                 a.SetSeqId(uSeqIndex, uId);\r
218                 for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
219                         a.SetChar(uSeqIndex, uColIndex, sRootE[uColIndex]);\r
220                 ++uSeqIndex;\r
221 \r
222                 uTreeNodeIndex = GetNextNodeIndex(GuideTree, uTreeNodeIndex);\r
223                 }\r
224         while (NULL_NEIGHBOR != uTreeNodeIndex);\r
225 \r
226         delete[] Estring1;\r
227         delete[] Estring2;\r
228 \r
229         ProgressStepsDone();\r
230         assert(uSeqIndex == uSeqCount);\r
231         }\r