Next version of JABA
[jabaws.git] / binaries / src / muscle / aligngivenpathsw.cpp
1 #include "muscle.h"\r
2 #include "msa.h"\r
3 #include "pwpath.h"\r
4 #include "profile.h"\r
5 \r
6 #define TRACE   0\r
7 \r
8 static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,\r
9   unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,\r
10   unsigned &uColIndexCombined)\r
11         {\r
12 #if     TRACE\r
13         Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",\r
14           uColIndexA, uColIndexCombined);\r
15 #endif\r
16         for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
17                 {\r
18                 char c = msaA.GetChar(uSeqIndexA, uColIndexA);\r
19                 msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);\r
20                 }\r
21 \r
22         for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
23                 msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');\r
24 \r
25         ++uColIndexCombined;\r
26         ++uColIndexA;\r
27         }\r
28 \r
29 static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,\r
30   unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,\r
31   unsigned &uColIndexCombined)\r
32         {\r
33 #if     TRACE\r
34         Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",\r
35           uColIndexB, uColIndexCombined);\r
36 #endif\r
37         for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
38                 msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');\r
39 \r
40         for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
41                 {\r
42                 char c = msaB.GetChar(uSeqIndexB, uColIndexB);\r
43                 msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);\r
44                 }\r
45 \r
46         ++uColIndexCombined;\r
47         ++uColIndexB;\r
48         }\r
49 \r
50 static void AppendUnalignedTerminals(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,\r
51   const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,\r
52   unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)\r
53         {\r
54 #if     TRACE\r
55         Log("AppendUnalignedTerminals ColIxA=%u ColIxB=%u ColIxCmb=%u\n",\r
56           uColIndexA, uColIndexB, uColIndexCombined);\r
57 #endif\r
58         const unsigned uLengthA = msaA.GetColCount();\r
59         const unsigned uLengthB = msaB.GetColCount();\r
60 \r
61         unsigned uNewColCount = uColCountA;\r
62         if (uColCountB > uNewColCount)\r
63                 uNewColCount = uColCountB;\r
64 \r
65         for (unsigned n = 0; n < uColCountA; ++n)\r
66                 {\r
67                 for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
68                         {\r
69                         char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);\r
70                         c = UnalignChar(c);\r
71                         msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);\r
72                         }\r
73                 }\r
74         for (unsigned n = uColCountA; n < uNewColCount; ++n)\r
75                 {\r
76                 for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
77                         msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');\r
78                 }\r
79 \r
80         for (unsigned n = 0; n < uColCountB; ++n)\r
81                 {\r
82                 for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
83                         {\r
84                         char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);\r
85                         c = UnalignChar(c);\r
86                         msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);\r
87                         }\r
88                 }\r
89         for (unsigned n = uColCountB; n < uNewColCount; ++n)\r
90                 {\r
91                 for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
92                         msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');\r
93                 }\r
94 \r
95         uColIndexCombined += uNewColCount;\r
96         uColIndexA += uColCountA;\r
97         uColIndexB += uColCountB;\r
98         }\r
99 \r
100 static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,\r
101   unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,\r
102   MSA &msaCombined, unsigned &uColIndexCombined)\r
103         {\r
104 #if     TRACE\r
105         Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",\r
106           uColIndexA, uColIndexB, uColIndexCombined);\r
107 #endif\r
108 \r
109         for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
110                 {\r
111                 char c = msaA.GetChar(uSeqIndexA, uColIndexA);\r
112                 msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);\r
113                 }\r
114 \r
115         for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
116                 {\r
117                 char c = msaB.GetChar(uSeqIndexB, uColIndexB);\r
118                 msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);\r
119                 }\r
120 \r
121         ++uColIndexA;\r
122         ++uColIndexB;\r
123         ++uColIndexCombined;\r
124         }\r
125 \r
126 void AlignTwoMSAsGivenPathSW(const PWPath &Path, const MSA &msaA, const MSA &msaB,\r
127   MSA &msaCombined)\r
128         {\r
129         msaCombined.Clear();\r
130 \r
131 #if     TRACE\r
132         Log("AlignTwoMSAsGivenPathSW\n");\r
133         Log("Template A:\n");\r
134         msaA.LogMe();\r
135         Log("Template B:\n");\r
136         msaB.LogMe();\r
137 #endif\r
138 \r
139         const unsigned uColCountA = msaA.GetColCount();\r
140         const unsigned uColCountB = msaB.GetColCount();\r
141 \r
142         const unsigned uSeqCountA = msaA.GetSeqCount();\r
143         const unsigned uSeqCountB = msaB.GetSeqCount();\r
144 \r
145         msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);\r
146 \r
147 // Copy sequence names into combined MSA\r
148         for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
149                 {\r
150                 msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));\r
151                 msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));\r
152                 }\r
153 \r
154         for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
155                 {\r
156                 msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));\r
157                 msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));\r
158                 }\r
159 \r
160         unsigned uColIndexA = 0;\r
161         unsigned uColIndexB = 0;\r
162         unsigned uColIndexCombined = 0;\r
163         const unsigned uEdgeCount = Path.GetEdgeCount();\r
164         for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
165                 {\r
166                 const PWEdge &Edge = Path.GetEdge(uEdgeIndex);\r
167 #if     TRACE\r
168                 Log("\nEdge %u %c%u.%u\n",\r
169                   uEdgeIndex,\r
170                   Edge.cType,\r
171                   Edge.uPrefixLengthA,\r
172                   Edge.uPrefixLengthB);\r
173 #endif\r
174                 const char cType = Edge.cType;\r
175                 const unsigned uPrefixLengthA = Edge.uPrefixLengthA;\r
176                 unsigned uColCountA = 0;\r
177                 if (uPrefixLengthA > 0)\r
178                         {\r
179                         const unsigned uNodeIndexA = uPrefixLengthA - 1;\r
180                         const unsigned uTplColIndexA = uNodeIndexA;\r
181                         if (uTplColIndexA > uColIndexA)\r
182                                 uColCountA = uTplColIndexA - uColIndexA;\r
183                         }\r
184 \r
185                 const unsigned uPrefixLengthB = Edge.uPrefixLengthB;\r
186                 unsigned uColCountB = 0;\r
187                 if (uPrefixLengthB > 0)\r
188                         {\r
189                         const unsigned uNodeIndexB = uPrefixLengthB - 1;\r
190                         const unsigned uTplColIndexB = uNodeIndexB;\r
191                         if (uTplColIndexB > uColIndexB)\r
192                                 uColCountB = uTplColIndexB - uColIndexB;\r
193                         }\r
194 \r
195                 AppendUnalignedTerminals(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,\r
196                   uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);\r
197 \r
198                 switch (cType)\r
199                         {\r
200                 case 'M':\r
201                         {\r
202                         assert(uPrefixLengthA > 0);\r
203                         assert(uPrefixLengthB > 0);\r
204                         const unsigned uColA = uPrefixLengthA - 1;\r
205                         const unsigned uColB = uPrefixLengthB - 1;\r
206                         assert(uColIndexA == uColA);\r
207                         assert(uColIndexB == uColB);\r
208                         AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,\r
209                           msaCombined, uColIndexCombined);\r
210                         break;\r
211                         }\r
212                 case 'D':\r
213                         {\r
214                         assert(uPrefixLengthA > 0);\r
215                         const unsigned uColA = uPrefixLengthA - 1;\r
216                         assert(uColIndexA == uColA);\r
217                         AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);\r
218                         break;\r
219                         }\r
220                 case 'I':\r
221                         {\r
222                         assert(uPrefixLengthB > 0);\r
223                         const unsigned uColB = uPrefixLengthB - 1;\r
224                         assert(uColIndexB == uColB);\r
225                         AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);\r
226                         break;\r
227                         }\r
228                 default:\r
229                         assert(false);\r
230                         }\r
231                 }\r
232         unsigned uInsertColCountA = uColCountA - uColIndexA;\r
233         unsigned uInsertColCountB = uColCountB - uColIndexB;\r
234 \r
235         AppendUnalignedTerminals(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,\r
236           uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);\r
237         }\r