Next version of JABA
[jabaws.git] / binaries / src / muscle / finddiagsn.cpp
1 #include "muscle.h"\r
2 #include "profile.h"\r
3 #include "diaglist.h"\r
4 \r
5 #define TRACE   0\r
6 \r
7 #define pow4(i) (1 << (2*i))    // 4^i = 2^(2*i)\r
8 const unsigned K = 7;\r
9 const unsigned KTUPS = pow4(K);\r
10 static unsigned TuplePos[KTUPS];\r
11 \r
12 static char *TupleToStr(int t)\r
13         {\r
14         static char s[K];\r
15 \r
16         for (int i = 0; i < K; ++i)\r
17                 {\r
18                 unsigned Letter = (t/(pow4(i)))%4;\r
19                 assert(Letter >= 0 && Letter < 4);\r
20                 s[K-i-1] = LetterToChar(Letter);\r
21                 }\r
22 \r
23         return s;\r
24         }\r
25 \r
26 static unsigned GetTuple(const ProfPos *PP, unsigned uPos)\r
27         {\r
28         unsigned t = 0;\r
29 \r
30         for (unsigned i = 0; i < K; ++i)\r
31                 {\r
32                 const unsigned uLetter = PP[uPos+i].m_uResidueGroup;\r
33                 if (RESIDUE_GROUP_MULTIPLE == uLetter)\r
34                         return EMPTY;\r
35                 t = t*4 + uLetter;\r
36                 }\r
37 \r
38         return t;\r
39         }\r
40 \r
41 void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,\r
42   unsigned uLengthY, DiagList &DL)\r
43         {\r
44         if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)\r
45                 Quit("FindDiagsNuc: requires nucleo alphabet");\r
46 \r
47         DL.Clear();\r
48 \r
49 // 16 is arbitrary slop, no principled reason for this.\r
50         if (uLengthX < K + 16 || uLengthY < K + 16)\r
51                 return;\r
52 \r
53 // Set A to shorter profile, B to longer\r
54         const ProfPos *PA;\r
55         const ProfPos *PB;\r
56         unsigned uLengthA;\r
57         unsigned uLengthB;\r
58         bool bSwap;\r
59         if (uLengthX < uLengthY)\r
60                 {\r
61                 bSwap = false;\r
62                 PA = PX;\r
63                 PB = PY;\r
64                 uLengthA = uLengthX;\r
65                 uLengthB = uLengthY;\r
66                 }\r
67         else\r
68                 {\r
69                 bSwap = true;\r
70                 PA = PY;\r
71                 PB = PX;\r
72                 uLengthA = uLengthY;\r
73                 uLengthB = uLengthX;\r
74                 }\r
75 \r
76 #if     TRACE\r
77         Log("FindDiagsNuc(LengthA=%d LengthB=%d\n", uLengthA, uLengthB);\r
78 #endif\r
79 \r
80 // Build tuple map for the longer profile, B\r
81         if (uLengthB < K)\r
82                 Quit("FindDiags: profile too short");\r
83 \r
84         memset(TuplePos, EMPTY, sizeof(TuplePos));\r
85 \r
86         for (unsigned uPos = 0; uPos < uLengthB - K; ++uPos)\r
87                 {\r
88                 const unsigned uTuple = GetTuple(PB, uPos);\r
89                 if (EMPTY == uTuple)\r
90                         continue;\r
91                 TuplePos[uTuple] = uPos;\r
92                 }\r
93 \r
94 // Find matches\r
95         for (unsigned uPosA = 0; uPosA < uLengthA - K; ++uPosA)\r
96                 {\r
97                 const unsigned uTuple = GetTuple(PA, uPosA);\r
98                 if (EMPTY == uTuple)\r
99                         continue;\r
100                 const unsigned uPosB = TuplePos[uTuple];\r
101                 if (EMPTY == uPosB)\r
102                         continue;\r
103 \r
104         // This tuple is found in both profiles\r
105                 unsigned uStartPosA = uPosA;\r
106                 unsigned uStartPosB = uPosB;\r
107 \r
108         // Try to extend the match forwards\r
109                 unsigned uEndPosA = uPosA + K - 1;\r
110                 unsigned uEndPosB = uPosB + K - 1;\r
111                 for (;;)\r
112                         {\r
113                         if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)\r
114                                 break;\r
115                         const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;\r
116                         if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)\r
117                                 break;\r
118                         const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;\r
119                         if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)\r
120                                 break;\r
121                         if (uAAGroupA != uAAGroupB)\r
122                                 break;\r
123                         ++uEndPosA;\r
124                         ++uEndPosB;\r
125                         }\r
126                 uPosA = uEndPosA;\r
127 \r
128 #if     TRACE\r
129                 {\r
130                 Log("Match: A %4u-%4u   ", uStartPosA, uEndPosA);\r
131                 for (unsigned n = uStartPosA; n <= uEndPosA; ++n)\r
132                         Log("%c", LetterToChar(PA[n].m_uResidueGroup));\r
133                 Log("\n");\r
134                 Log("       B %4u-%4u   ", uStartPosB, uEndPosB);\r
135                 for (unsigned n = uStartPosB; n <= uEndPosB; ++n)\r
136                         Log("%c", LetterToChar(PB[n].m_uResidueGroup));\r
137                 Log("\n");\r
138                 }\r
139 #endif\r
140 \r
141                 const unsigned uLength = uEndPosA - uStartPosA + 1;\r
142                 assert(uEndPosB - uStartPosB + 1 == uLength);\r
143 \r
144                 if (uLength >= g_uMinDiagLength)\r
145                         {\r
146                         if (bSwap)\r
147                                 DL.Add(uStartPosB, uStartPosA, uLength);\r
148                         else\r
149                                 DL.Add(uStartPosA, uStartPosB, uLength);\r
150                         }\r
151                 }\r
152         }\r