Next version of JABA
[jabaws.git] / binaries / src / muscle / pwpath.cpp
1 #include "muscle.h"\r
2 #include "pwpath.h"\r
3 #include "seq.h"\r
4 #include "textfile.h"\r
5 #include "msa.h"\r
6 \r
7 PWPath::PWPath()\r
8         {\r
9         m_uArraySize = 0;\r
10         m_uEdgeCount = 0;\r
11         m_Edges = 0;\r
12         }\r
13 \r
14 PWPath::~PWPath()\r
15         {\r
16         Clear();\r
17         }\r
18 \r
19 void PWPath::Clear()\r
20         {\r
21         delete[] m_Edges;\r
22         m_Edges = 0;\r
23         m_uArraySize = 0;\r
24         m_uEdgeCount = 0;\r
25         }\r
26 \r
27 void PWPath::ExpandPath(unsigned uAdditionalEdgeCount)\r
28         {\r
29         PWEdge *OldPath = m_Edges;\r
30         unsigned uEdgeCount = m_uArraySize + uAdditionalEdgeCount;\r
31 \r
32         m_Edges = new PWEdge[uEdgeCount];\r
33         m_uArraySize = uEdgeCount;\r
34         if (m_uEdgeCount > 0)\r
35                 memcpy(m_Edges, OldPath, m_uEdgeCount*sizeof(PWEdge));\r
36         delete[] OldPath;\r
37         }\r
38 \r
39 void PWPath::AppendEdge(const PWEdge &Edge)\r
40         {\r
41         if (0 == m_uArraySize || m_uEdgeCount + 1 == m_uArraySize)\r
42                 ExpandPath(200);\r
43 \r
44         m_Edges[m_uEdgeCount] = Edge;\r
45         ++m_uEdgeCount;\r
46         }\r
47 \r
48 void PWPath::AppendEdge(char cType, unsigned uPrefixLengthA, unsigned uPrefixLengthB)\r
49         {\r
50         PWEdge e;\r
51         e.uPrefixLengthA = uPrefixLengthA;\r
52         e.uPrefixLengthB = uPrefixLengthB;\r
53         e.cType = cType;\r
54         AppendEdge(e);\r
55         }\r
56 \r
57 void PWPath::PrependEdge(const PWEdge &Edge)\r
58         {\r
59         if (0 == m_uArraySize || m_uEdgeCount + 1 == m_uArraySize)\r
60                 ExpandPath(1000);\r
61         if (m_uEdgeCount > 0)\r
62                 memmove(m_Edges + 1, m_Edges, sizeof(PWEdge)*m_uEdgeCount);\r
63         m_Edges[0] = Edge;\r
64         ++m_uEdgeCount;\r
65         }\r
66 \r
67 const PWEdge &PWPath::GetEdge(unsigned uEdgeIndex) const\r
68         {\r
69         assert(uEdgeIndex < m_uEdgeCount);\r
70         return m_Edges[uEdgeIndex];\r
71         }\r
72 \r
73 void PWPath::Validate() const\r
74         {\r
75         const unsigned uEdgeCount = GetEdgeCount();\r
76         if (0 == uEdgeCount)\r
77                 return;\r
78         const PWEdge &FirstEdge = GetEdge(0);\r
79         const PWEdge &LastEdge = GetEdge(uEdgeCount - 1);\r
80         unsigned uStartA = FirstEdge.uPrefixLengthA;\r
81         unsigned uStartB = FirstEdge.uPrefixLengthB;\r
82         if (FirstEdge.cType != 'I')\r
83                 --uStartA;\r
84         if (FirstEdge.cType != 'D')\r
85                 --uStartB;\r
86 \r
87         unsigned uPrefixLengthA = FirstEdge.uPrefixLengthA;\r
88         unsigned uPrefixLengthB = FirstEdge.uPrefixLengthB;\r
89         for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
90                 {\r
91                 const PWEdge &Edge = GetEdge(uEdgeIndex);\r
92                 switch (Edge.cType)\r
93                         {\r
94                 case 'M':\r
95                         if (uPrefixLengthA + 1 != Edge.uPrefixLengthA)\r
96                                 Quit("PWPath::Validate MA %u", uPrefixLengthA);\r
97                         if (uPrefixLengthB + 1 != Edge.uPrefixLengthB)\r
98                                 Quit("PWPath::Validate MB %u", uPrefixLengthB);\r
99                         ++uPrefixLengthA;\r
100                         ++uPrefixLengthB;\r
101                         break;\r
102                 case 'D':\r
103                         if (uPrefixLengthA + 1 != Edge.uPrefixLengthA)\r
104                                 Quit("PWPath::Validate DA %u", uPrefixLengthA);\r
105                         if (uPrefixLengthB != Edge.uPrefixLengthB)\r
106                                 Quit("PWPath::Validate DB %u", uPrefixLengthB);\r
107                         ++uPrefixLengthA;\r
108                         break;\r
109                 case 'I':\r
110                         if (uPrefixLengthA != Edge.uPrefixLengthA)\r
111                                 Quit("PWPath::Validate IA %u", uPrefixLengthA);\r
112                         if (uPrefixLengthB + 1 != Edge.uPrefixLengthB)\r
113                                 Quit("PWPath::Validate IB %u", uPrefixLengthB);\r
114                         ++uPrefixLengthB;\r
115                         break;\r
116                         }\r
117                 }\r
118         }\r
119 \r
120 void PWPath::LogMe() const\r
121         {\r
122         for (unsigned uEdgeIndex = 0; uEdgeIndex < GetEdgeCount(); ++uEdgeIndex)\r
123                 {\r
124                 const PWEdge &Edge = GetEdge(uEdgeIndex);\r
125                 if (uEdgeIndex > 0)\r
126                         Log(" ");\r
127                 Log("%c%d.%d",\r
128                   Edge.cType,\r
129                   Edge.uPrefixLengthA,\r
130                   Edge.uPrefixLengthB);\r
131                 if ((uEdgeIndex > 0 && uEdgeIndex%10 == 0) ||\r
132                  uEdgeIndex == GetEdgeCount() - 1)\r
133                         Log("\n");\r
134                 }\r
135         }\r
136 \r
137 void PWPath::Copy(const PWPath &Path)\r
138         {\r
139         Clear();\r
140         const unsigned uEdgeCount = Path.GetEdgeCount();\r
141         for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
142                 {\r
143                 const PWEdge &Edge = Path.GetEdge(uEdgeIndex);\r
144                 AppendEdge(Edge);\r
145                 }\r
146         }\r
147 \r
148 void PWPath::FromMSAPair(const MSA &msaA, const MSA &msaB)\r
149         {\r
150         const unsigned uColCount = msaA.GetColCount();\r
151         if (uColCount != msaB.GetColCount())\r
152                 Quit("PWPath::FromMSAPair, lengths differ");\r
153 \r
154         Clear();\r
155 \r
156         unsigned uPrefixLengthA = 0;\r
157         unsigned uPrefixLengthB = 0;\r
158         for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
159                 {\r
160                 bool bIsGapA = msaA.IsGapColumn(uColIndex);\r
161                 bool bIsGapB = msaB.IsGapColumn(uColIndex);\r
162 \r
163                 PWEdge Edge;\r
164                 char cType;\r
165                 if (!bIsGapA && !bIsGapB)\r
166                         {\r
167                         cType = 'M';\r
168                         ++uPrefixLengthA;\r
169                         ++uPrefixLengthB;\r
170                         }\r
171                 else if (bIsGapA && !bIsGapB)\r
172                         {\r
173                         cType = 'I';\r
174                         ++uPrefixLengthB;\r
175                         }\r
176                 else if (!bIsGapA && bIsGapB)\r
177                         {\r
178                         cType = 'D';\r
179                         ++uPrefixLengthA;\r
180                         }\r
181                 else\r
182                         {\r
183                         assert(bIsGapB && bIsGapA);\r
184                         continue;\r
185                         }\r
186 \r
187                 Edge.cType = cType;\r
188                 Edge.uPrefixLengthA = uPrefixLengthA;\r
189                 Edge.uPrefixLengthB = uPrefixLengthB;\r
190                 AppendEdge(Edge);\r
191                 }\r
192         }\r
193 \r
194 // Very similar to HMMPath::FromFile, should consolidate.\r
195 void PWPath::FromFile(TextFile &File)\r
196         {\r
197         Clear();\r
198         char szToken[1024];\r
199         File.GetTokenX(szToken, sizeof(szToken));\r
200         if (0 != strcmp(szToken, "Path"))\r
201                 Quit("Invalid path file (Path)");\r
202 \r
203         File.GetTokenX(szToken, sizeof(szToken));\r
204         if (0 != strcmp(szToken, "edges"))\r
205                 Quit("Invalid path file (edges)");\r
206 \r
207         File.GetTokenX(szToken, sizeof(szToken));\r
208         if (!IsValidInteger(szToken))\r
209                 Quit("Invalid path file (edges value)");\r
210 \r
211         const unsigned uEdgeCount = (unsigned) atoi(szToken);\r
212         unsigned uEdgeIndex = 0;\r
213         for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
214                 {\r
215         // index\r
216                 File.GetTokenX(szToken, sizeof(szToken));\r
217                 if (!IsValidInteger(szToken))\r
218                         Quit("Invalid path file, invalid index '%s'", szToken);\r
219                 unsigned n = (unsigned) atoi(szToken);\r
220                 if (n != uEdgeIndex)\r
221                         Quit("Invalid path file, expecting edge %u got %u", uEdgeIndex, n);\r
222 \r
223         // type\r
224                 File.GetTokenX(szToken, sizeof(szToken));\r
225                 if (1 != strlen(szToken))\r
226                         Quit("Invalid path file, expecting state, got '%s'", szToken);\r
227                 const char cType = szToken[0];\r
228                 if ('M' != cType && 'D' != cType && cType != 'I' && 'S' != cType)\r
229                         Quit("Invalid path file, expecting state, got '%c'", cType);\r
230 \r
231         // prefix length A\r
232                 File.GetTokenX(szToken, sizeof(szToken));\r
233                 if (!IsValidInteger(szToken))\r
234                         Quit("Invalid path file, bad prefix length A '%s'", szToken);\r
235                 const unsigned uPrefixLengthA = (unsigned) atoi(szToken);\r
236 \r
237         // prefix length B\r
238                 File.GetTokenX(szToken, sizeof(szToken));\r
239                 if (!IsValidInteger(szToken))\r
240                         Quit("Invalid path file, bad prefix length B '%s'", szToken);\r
241                 const unsigned uPrefixLengthB = (unsigned) atoi(szToken);\r
242 \r
243                 PWEdge Edge;\r
244                 Edge.cType = cType;\r
245                 Edge.uPrefixLengthA = uPrefixLengthA;\r
246                 Edge.uPrefixLengthB = uPrefixLengthB;\r
247                 AppendEdge(Edge);\r
248                 }\r
249         File.GetTokenX(szToken, sizeof(szToken));\r
250         if (0 != strcmp(szToken, "//"))\r
251                 Quit("Invalid path file (//)");\r
252         }\r
253 \r
254 void PWPath::ToFile(TextFile &File) const\r
255         {\r
256         const unsigned uEdgeCount = GetEdgeCount();\r
257 \r
258         File.PutString("Path\n");\r
259         File.PutFormat("edges %u\n", uEdgeCount);\r
260         for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
261                 {\r
262                 const PWEdge &Edge = GetEdge(uEdgeIndex);\r
263                 File.PutFormat("%u %c %u %u\n",\r
264                   uEdgeIndex,\r
265                   Edge.cType,\r
266                   Edge.uPrefixLengthA,\r
267                   Edge.uPrefixLengthB);\r
268                 }\r
269         File.PutString("//\n");\r
270         }\r
271 \r
272 void PWPath::AssertEqual(const PWPath &Path) const\r
273         {\r
274         const unsigned uEdgeCount = GetEdgeCount();\r
275         if (uEdgeCount != Path.GetEdgeCount())\r
276                 {\r
277                 Log("PWPath::AssertEqual, this=\n");\r
278                 LogMe();\r
279                 Log("\nOther path=\n");\r
280                 Path.LogMe();\r
281                 Log("\n");\r
282                 Quit("PWPath::AssertEqual, Edge count different %u %u\n",\r
283                   uEdgeCount, Path.GetEdgeCount());\r
284                 }\r
285 \r
286         for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
287                 {\r
288                 const PWEdge &e1 = GetEdge(uEdgeIndex);\r
289                 const PWEdge &e2 = Path.GetEdge(uEdgeIndex);\r
290                 if (e1.cType != e2.cType || e1.uPrefixLengthA != e2.uPrefixLengthA ||\r
291                   e1.uPrefixLengthB != e2.uPrefixLengthB)\r
292                         {\r
293                         Log("PWPath::AssertEqual, this=\n");\r
294                         LogMe();\r
295                         Log("\nOther path=\n");\r
296                         Path.LogMe();\r
297                         Log("\n");\r
298                         Log("This edge %c%u.%u, other edge %c%u.%u\n",\r
299                           e1.cType, e1.uPrefixLengthA, e1.uPrefixLengthB,\r
300                           e2.cType, e2.uPrefixLengthA, e2.uPrefixLengthB);\r
301                         Quit("PWPath::AssertEqual, edge %u different\n", uEdgeIndex);\r
302                         }\r
303                 }\r
304         }\r
305 \r
306 bool PWPath::Equal(const PWPath &Path) const\r
307         {\r
308         const unsigned uEdgeCount = GetEdgeCount();\r
309         if (uEdgeCount != Path.GetEdgeCount())\r
310                 return false;\r
311 \r
312         for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
313                 {\r
314                 const PWEdge &e1 = GetEdge(uEdgeIndex);\r
315                 const PWEdge &e2 = Path.GetEdge(uEdgeIndex);\r
316                 if (e1.cType != e2.cType || e1.uPrefixLengthA != e2.uPrefixLengthA ||\r
317                   e1.uPrefixLengthB != e2.uPrefixLengthB)\r
318                         return false;\r
319                 }\r
320         return true;\r
321         }\r
322 \r
323 unsigned PWPath::GetMatchCount() const\r
324         {\r
325         unsigned uMatchCount = 0;\r
326         const unsigned uEdgeCount = GetEdgeCount();\r
327         for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
328                 {\r
329                 const PWEdge &e = GetEdge(uEdgeIndex);\r
330                 if ('M' == e.cType)\r
331                         ++uMatchCount;\r
332                 }\r
333         return uMatchCount;\r
334         }\r
335 \r
336 unsigned PWPath::GetInsertCount() const\r
337         {\r
338         unsigned uInsertCount = 0;\r
339         const unsigned uEdgeCount = GetEdgeCount();\r
340         for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
341                 {\r
342                 const PWEdge &e = GetEdge(uEdgeIndex);\r
343                 if ('I' == e.cType)\r
344                         ++uInsertCount;\r
345                 }\r
346         return uInsertCount;\r
347         }\r
348 \r
349 unsigned PWPath::GetDeleteCount() const\r
350         {\r
351         unsigned uDeleteCount = 0;\r
352         const unsigned uEdgeCount = GetEdgeCount();\r
353         for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
354                 {\r
355                 const PWEdge &e = GetEdge(uEdgeIndex);\r
356                 if ('D' == e.cType)\r
357                         ++uDeleteCount;\r
358                 }\r
359         return uDeleteCount;\r
360         }\r
361 \r
362 void PWPath::FromStr(const char Str[])\r
363         {\r
364         Clear();\r
365         unsigned uPrefixLengthA = 0;\r
366         unsigned uPrefixLengthB = 0;\r
367         while (char c = *Str++)\r
368                 {\r
369                 switch (c)\r
370                         {\r
371                 case 'M':\r
372                         ++uPrefixLengthA;\r
373                         ++uPrefixLengthB;\r
374                         break;\r
375                 case 'D':\r
376                         ++uPrefixLengthA;\r
377                         break;\r
378                 case 'I':\r
379                         ++uPrefixLengthB;\r
380                         break;\r
381                 default:\r
382                         Quit("PWPath::FromStr, invalid state %c", c);\r
383                         }\r
384                 AppendEdge(c, uPrefixLengthA, uPrefixLengthB);\r
385                 }\r
386         }\r