9c3e0f442a0c2229926840aa34f7c125399dc488
[jabaws.git] / binaries / src / muscle / makerootmsa.cpp
1 #include "muscle.h"\r
2 #include "tree.h"\r
3 #include "seqvect.h"\r
4 #include "profile.h"\r
5 #include "msa.h"\r
6 #include "pwpath.h"\r
7 #include "estring.h"\r
8 \r
9 #define TRACE           0\r
10 #define VALIDATE        0\r
11 \r
12 static void PathSeq(const Seq &s, const PWPath &Path, bool bRight, Seq &sOut)\r
13         {\r
14         short *esA;\r
15         short *esB;\r
16         PathToEstrings(Path, &esA, &esB);\r
17 \r
18         const unsigned uSeqLength = s.Length();\r
19         const unsigned uEdgeCount = Path.GetEdgeCount();\r
20 \r
21         sOut.Clear();\r
22         sOut.SetName(s.GetName());\r
23         unsigned uPos = 0;\r
24         for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
25                 {\r
26                 const PWEdge &Edge = Path.GetEdge(uEdgeIndex);\r
27                 char cType = Edge.cType;\r
28                 if (bRight)\r
29                         {\r
30                         if (cType == 'I')\r
31                                 cType = 'D';\r
32                         else if (cType == 'D')\r
33                                 cType = 'I';\r
34                         }\r
35                 switch (cType)\r
36                         {\r
37                 case 'M':\r
38                         sOut.AppendChar(s[uPos++]);\r
39                         break;\r
40                 case 'D':\r
41                         sOut.AppendChar('-');\r
42                         break;\r
43                 case 'I':\r
44                         sOut.AppendChar(s[uPos++]);\r
45                         break;\r
46                 default:\r
47                         Quit("PathSeq, invalid edge type %c", cType);\r
48                         }\r
49                 }\r
50         }\r
51 \r
52 #if     VALIDATE\r
53 \r
54 static void MakeRootSeq(const Seq &s, const Tree &GuideTree, unsigned uLeafNodeIndex,\r
55   const ProgNode Nodes[], Seq &sRoot)\r
56         {\r
57         sRoot.Copy(s);\r
58         unsigned uNodeIndex = uLeafNodeIndex;\r
59         for (;;)\r
60                 {\r
61                 unsigned uParent = GuideTree.GetParent(uNodeIndex);\r
62                 if (NULL_NEIGHBOR == uParent)\r
63                         break;\r
64                 bool bRight = (GuideTree.GetLeft(uParent) == uNodeIndex);\r
65                 uNodeIndex = uParent;\r
66                 const PWPath &Path = Nodes[uNodeIndex].m_Path;\r
67                 Seq sTmp;\r
68                 PathSeq(sRoot, Path, bRight, sTmp);\r
69                 sRoot.Copy(sTmp);\r
70                 }\r
71         }\r
72 \r
73 #endif  // VALIDATE\r
74 \r
75 static short *MakeRootSeqE(const Seq &s, const Tree &GuideTree, unsigned uLeafNodeIndex,\r
76   const ProgNode Nodes[], Seq &sRoot, short *Estring1, short *Estring2)\r
77         {\r
78         short *EstringCurr = Estring1;\r
79         short *EstringNext = Estring2;\r
80 \r
81         const unsigned uSeqLength = s.Length();\r
82         EstringCurr[0] = uSeqLength;\r
83         EstringCurr[1] = 0;\r
84 \r
85         unsigned uNodeIndex = uLeafNodeIndex;\r
86         for (;;)\r
87                 {\r
88                 unsigned uParent = GuideTree.GetParent(uNodeIndex);\r
89                 if (NULL_NEIGHBOR == uParent)\r
90                         break;\r
91                 bool bRight = (GuideTree.GetLeft(uParent) == uNodeIndex);\r
92                 uNodeIndex = uParent;\r
93                 const PWPath &Path = Nodes[uNodeIndex].m_Path;\r
94                 const short *EstringNode = bRight ?\r
95                   Nodes[uNodeIndex].m_EstringL : Nodes[uNodeIndex].m_EstringR;\r
96 \r
97                 MulEstrings(EstringCurr, EstringNode, EstringNext);\r
98 #if     TRACE\r
99                 Log("\n");\r
100                 Log("Curr=");\r
101                 LogEstring(EstringCurr);\r
102                 Log("\n");\r
103                 Log("Node=");\r
104                 LogEstring(EstringNode);\r
105                 Log("\n");\r
106                 Log("Prod=");\r
107                 LogEstring(EstringNext);\r
108                 Log("\n");\r
109 #endif\r
110                 short *EstringTmp = EstringNext;\r
111                 EstringNext = EstringCurr;\r
112                 EstringCurr = EstringTmp;\r
113                 }\r
114         EstringOp(EstringCurr, s, sRoot);\r
115 \r
116 #if     TRACE\r
117         Log("Root estring=");\r
118         LogEstring(EstringCurr);\r
119         Log("\n");\r
120         Log("Root seq=");\r
121         sRoot.LogMe();\r
122 #endif\r
123         return EstringCurr;\r
124         }\r
125 \r
126 static unsigned GetFirstNodeIndex(const Tree &tree)\r
127         {\r
128         if (g_bStable)\r
129                 return 0;\r
130         return tree.FirstDepthFirstNode();\r
131         }\r
132 \r
133 static unsigned GetNextNodeIndex(const Tree &tree, unsigned uPrevNodeIndex)\r
134         {\r
135         if (g_bStable)\r
136                 {\r
137                 const unsigned uNodeCount = tree.GetNodeCount();\r
138                 unsigned uNodeIndex = uPrevNodeIndex;\r
139                 for (;;)\r
140                         {\r
141                         ++uNodeIndex;\r
142                         if (uNodeIndex >= uNodeCount)\r
143                                 return NULL_NEIGHBOR;\r
144                         if (tree.IsLeaf(uNodeIndex))\r
145                                 return uNodeIndex;\r
146                         }\r
147                 }\r
148         unsigned uNodeIndex = uPrevNodeIndex;\r
149         for (;;)\r
150                 {\r
151                 uNodeIndex = tree.NextDepthFirstNode(uNodeIndex);\r
152                 if (NULL_NEIGHBOR == uNodeIndex || tree.IsLeaf(uNodeIndex))\r
153                         return uNodeIndex;\r
154                 }\r
155         }\r
156 \r
157 void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[],\r
158   MSA &a)\r
159         {\r
160 #if     TRACE\r
161         Log("MakeRootMSA Tree=");\r
162         GuideTree.LogMe();\r
163 #endif\r
164         const unsigned uSeqCount = v.GetSeqCount();\r
165         unsigned uColCount = uInsane;\r
166         unsigned uSeqIndex = 0;\r
167         const unsigned uTreeNodeCount = GuideTree.GetNodeCount();\r
168         const unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();\r
169         const PWPath &RootPath = Nodes[uRootNodeIndex].m_Path;\r
170         const unsigned uRootColCount = RootPath.GetEdgeCount();\r
171         const unsigned uEstringSize = uRootColCount + 1;\r
172         short *Estring1 = new short[uEstringSize];\r
173         short *Estring2 = new short[uEstringSize];\r
174         SetProgressDesc("Root alignment");\r
175 \r
176         unsigned uTreeNodeIndex = GetFirstNodeIndex(GuideTree);\r
177         do\r
178                 {\r
179                 Progress(uSeqIndex, uSeqCount);\r
180 \r
181                 unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);\r
182                 const Seq &s = *(v[uId]);\r
183 \r
184                 Seq sRootE;\r
185                 short *es = MakeRootSeqE(s, GuideTree, uTreeNodeIndex, Nodes, sRootE,\r
186                   Estring1, Estring2);\r
187                 Nodes[uTreeNodeIndex].m_EstringL = EstringNewCopy(es);\r
188 \r
189 #if     VALIDATE\r
190                 Seq sRoot;\r
191                 MakeRootSeq(s, GuideTree, uTreeNodeIndex, Nodes, sRoot);\r
192                 if (!sRoot.Eq(sRootE))\r
193                         {\r
194                         Log("sRoot=");\r
195                         sRoot.LogMe();\r
196                         Log("sRootE=");\r
197                         sRootE.LogMe();\r
198                         Quit("Root seqs differ");\r
199                         }\r
200 #endif\r
201 \r
202 #if     TRACE\r
203                 Log("MakeRootSeq=\n");\r
204                 sRoot.LogMe();\r
205 #endif\r
206                 if (uInsane == uColCount)\r
207                         {\r
208                         uColCount = sRootE.Length();\r
209                         a.SetSize(uSeqCount, uColCount);\r
210                         }\r
211                 else\r
212                         {\r
213                         assert(uColCount == sRootE.Length());\r
214                         }\r
215                 a.SetSeqName(uSeqIndex, s.GetName());\r
216                 a.SetSeqId(uSeqIndex, uId);\r
217                 for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
218                         a.SetChar(uSeqIndex, uColIndex, sRootE[uColIndex]);\r
219                 ++uSeqIndex;\r
220 \r
221                 uTreeNodeIndex = GetNextNodeIndex(GuideTree, uTreeNodeIndex);\r
222                 }\r
223         while (NULL_NEIGHBOR != uTreeNodeIndex);\r
224 \r
225         delete[] Estring1;\r
226         delete[] Estring2;\r
227 \r
228         ProgressStepsDone();\r
229         assert(uSeqIndex == uSeqCount);\r
230         }\r