Next version of JABA
[jabaws.git] / binaries / src / muscle / scoregaps.cpp
1 #include "muscle.h"\r
2 #include "msa.h"\r
3 #include "objscore.h"\r
4 \r
5 #define TRACE   0\r
6 \r
7 struct GAPINFO\r
8         {\r
9         GAPINFO *Next;\r
10         unsigned Start;\r
11         unsigned End;\r
12         };\r
13 \r
14 static GAPINFO **g_Gaps;\r
15 static GAPINFO *g_FreeList;\r
16 static unsigned g_MaxSeqCount;\r
17 static unsigned g_MaxColCount;\r
18 static unsigned g_ColCount;\r
19 static bool *g_ColDiff;\r
20 \r
21 static GAPINFO *NewGapInfo()\r
22         {\r
23         if (0 == g_FreeList)\r
24                 {\r
25                 const int NEWCOUNT = 256;\r
26                 GAPINFO *NewList = new GAPINFO[NEWCOUNT];\r
27                 g_FreeList = &NewList[0];\r
28                 for (int i = 0; i < NEWCOUNT-1; ++i)\r
29                         NewList[i].Next = &NewList[i+1];\r
30                 NewList[NEWCOUNT-1].Next = 0;\r
31                 }\r
32         GAPINFO *GI = g_FreeList;\r
33         g_FreeList = g_FreeList->Next;\r
34         return GI;\r
35         }\r
36 \r
37 static void FreeGapInfo(GAPINFO *GI)\r
38         {\r
39         GI->Next = g_FreeList;\r
40         g_FreeList = GI;\r
41         }\r
42 \r
43 // TODO: This could be much faster, no need to look\r
44 // at all columns.\r
45 static void FindIntersectingGaps(const MSA &msa, unsigned SeqIndex)\r
46         {\r
47         const unsigned ColCount = msa.GetColCount();\r
48         bool InGap = false;\r
49         bool Intersects = false;\r
50         unsigned Start = uInsane;\r
51         for (unsigned Col = 0; Col <= ColCount; ++Col)\r
52                 {\r
53                 bool Gap = ((Col != ColCount) && msa.IsGap(SeqIndex, Col));\r
54                 if (Gap)\r
55                         {\r
56                         if (!InGap)\r
57                                 {\r
58                                 InGap = true;\r
59                                 Start = Col;\r
60                                 }\r
61                         if (g_ColDiff[Col])\r
62                                 Intersects = true;\r
63                         }\r
64                 else if (InGap)\r
65                         {\r
66                         InGap = false;\r
67                         if (Intersects)\r
68                                 {\r
69                                 GAPINFO *GI = NewGapInfo();\r
70                                 GI->Start = Start;\r
71                                 GI->End = Col - 1;\r
72                                 GI->Next = g_Gaps[SeqIndex];\r
73                                 g_Gaps[SeqIndex] = GI;\r
74                                 }\r
75                         Intersects = false;\r
76                         }\r
77                 }\r
78         }\r
79 \r
80 static SCORE Penalty(unsigned Length, bool Term)\r
81         {\r
82         if (0 == Length)\r
83                 return 0;\r
84         SCORE s1 = g_scoreGapOpen + g_scoreGapExtend*(Length - 1);\r
85 #if     DOUBLE_AFFINE\r
86         SCORE s2 = g_scoreGapOpen2 + g_scoreGapExtend2*(Length - 1);\r
87         if (s1 > s2)\r
88                 return s1;\r
89         return s2;\r
90 #else\r
91         return s1;\r
92 #endif\r
93         }\r
94 \r
95 //static SCORE ScorePair(unsigned Seq1, unsigned Seq2)\r
96 //      {\r
97 //#if   TRACE\r
98 //      {\r
99 //      Log("ScorePair(%d,%d)\n", Seq1, Seq2);\r
100 //      Log("Gaps seq 1: ");\r
101 //      for (GAPINFO *GI = g_Gaps[Seq1]; GI; GI = GI->Next)\r
102 //              Log(" %d-%d", GI->Start, GI->End);\r
103 //      Log("\n");\r
104 //      Log("Gaps seq 2: ");\r
105 //      for (GAPINFO *GI = g_Gaps[Seq2]; GI; GI = GI->Next)\r
106 //              Log(" %d-%d", GI->Start, GI->End);\r
107 //      Log("\n");\r
108 //      }\r
109 //#endif\r
110 //      return 0;\r
111 //      }\r
112 \r
113 SCORE ScoreGaps(const MSA &msa, const unsigned DiffCols[], unsigned DiffColCount)\r
114         {\r
115 #if     TRACE\r
116         {\r
117         Log("ScoreGaps\n");\r
118         Log("DiffCols ");\r
119         for (unsigned i = 0; i < DiffColCount; ++i)\r
120                 Log(" %u", DiffCols[i]);\r
121         Log("\n");\r
122         Log("msa=\n");\r
123         msa.LogMe();\r
124         Log("\n");\r
125         }\r
126 #endif\r
127         const unsigned SeqCount = msa.GetSeqCount();\r
128         const unsigned ColCount = msa.GetColCount();\r
129         g_ColCount = ColCount;\r
130 \r
131         if (SeqCount > g_MaxSeqCount)\r
132                 {\r
133                 delete[] g_Gaps;\r
134                 g_MaxSeqCount = SeqCount + 256;\r
135                 g_Gaps = new GAPINFO *[g_MaxSeqCount];\r
136                 }\r
137         memset(g_Gaps, 0, SeqCount*sizeof(GAPINFO *));\r
138 \r
139         if (ColCount > g_MaxColCount)\r
140                 {\r
141                 delete[] g_ColDiff;\r
142                 g_MaxColCount = ColCount + 256;\r
143                 g_ColDiff = new bool[g_MaxColCount];\r
144                 }\r
145 \r
146         memset(g_ColDiff, 0, g_ColCount*sizeof(bool));\r
147         for (unsigned i = 0; i < DiffColCount; ++i)\r
148                 {\r
149                 unsigned Col = DiffCols[i];\r
150                 assert(Col < ColCount);\r
151                 g_ColDiff[Col] = true;\r
152                 }\r
153 \r
154         for (unsigned SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)\r
155                 FindIntersectingGaps(msa, SeqIndex);\r
156 \r
157 #if     TRACE\r
158         {\r
159         Log("\n");\r
160         Log("Intersecting gaps:\n");\r
161         Log("      ");\r
162         for (unsigned Col = 0; Col < ColCount; ++Col)\r
163                 Log("%c", g_ColDiff[Col] ? '*' : ' ');\r
164         Log("\n");\r
165         Log("      ");\r
166         for (unsigned Col = 0; Col < ColCount; ++Col)\r
167                 Log("%d", Col%10);\r
168         Log("\n");\r
169         for (unsigned Seq = 0; Seq < SeqCount; ++Seq)\r
170                 {\r
171                 Log("%3d:  ", Seq);\r
172                 for (unsigned Col = 0; Col < ColCount; ++Col)\r
173                         Log("%c", msa.GetChar(Seq, Col));\r
174                 Log("  :: ");\r
175                 for (GAPINFO *GI = g_Gaps[Seq]; GI; GI = GI->Next)\r
176                         Log(" (%d,%d)", GI->Start, GI->End);\r
177                 Log("  >%s\n", msa.GetSeqName(Seq));\r
178                 }\r
179         Log("\n");\r
180         }\r
181 #endif\r
182 \r
183         SCORE Score = 0;\r
184         for (unsigned Seq1 = 0; Seq1 < SeqCount; ++Seq1)\r
185                 {\r
186                 const WEIGHT w1 = msa.GetSeqWeight(Seq1);\r
187                 for (unsigned Seq2 = Seq1 + 1; Seq2 < SeqCount; ++Seq2)\r
188                         {\r
189                         const WEIGHT w2 = msa.GetSeqWeight(Seq2);\r
190 //                      const SCORE Pair = ScorePair(Seq1, Seq2);\r
191                         const SCORE Pair = ScoreSeqPairGaps(msa, Seq1, msa, Seq2);\r
192                         Score += w1*w2*Pair;\r
193 #if     TRACE\r
194                         Log("Seq1=%u Seq2=%u ScorePair=%.4g w1=%.4g w2=%.4g Sum=%.4g\n",\r
195                           Seq1, Seq2, Pair, w1, w2, Score);\r
196 #endif\r
197                         }\r
198                 }\r
199 \r
200         return Score;\r
201         }\r