Change Eclipse configuration
[jabaws.git] / website / archive / binaries / mac / src / muscle / refinew.cpp
1 #include "muscle.h"\r
2 #include "msa.h"\r
3 #include "seqvect.h"\r
4 #include "textfile.h"\r
5 \r
6 #define MEMDEBUG        0\r
7 \r
8 #if     MEMDEBUG\r
9 #include <crtdbg.h>\r
10 #endif\r
11 \r
12 void MUSCLE(SeqVect &v, MSA &msaOut);\r
13 \r
14 // Append msa2 at the end of msa1\r
15 void AppendMSA(MSA &msa1, const MSA &msa2)\r
16         {\r
17         const unsigned uSeqCount = msa1.GetSeqCount();\r
18 \r
19         const unsigned uColCount1 = msa1.GetColCount();\r
20         const unsigned uColCount2 = msa2.GetColCount();\r
21 \r
22         const unsigned uColCountCat = uColCount1 + uColCount2;\r
23 \r
24         for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
25                 {\r
26                 unsigned uId = msa1.GetSeqId(uSeqIndex);\r
27                 unsigned uSeqIndex2;\r
28                 bool bFound = msa2.GetSeqIndex(uId, &uSeqIndex2);\r
29                 if (bFound)\r
30                         {\r
31                         for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)\r
32                                 {\r
33                                 const char c = msa2.GetChar(uSeqIndex2, uColIndex);\r
34                                 msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, c);\r
35                                 }\r
36                         }\r
37                 else\r
38                         {\r
39                         for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)\r
40                                 msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');\r
41                         }\r
42                 }\r
43         }\r
44 \r
45 static void SeqFromMSACols(const MSA &msa, unsigned uSeqIndex, unsigned uColFrom,\r
46   unsigned uColTo, Seq &s)\r
47         {\r
48         s.Clear();\r
49         s.SetName(msa.GetSeqName(uSeqIndex));\r
50         s.SetId(msa.GetSeqId(uSeqIndex));\r
51         for (unsigned uColIndex = uColFrom; uColIndex <= uColTo; ++uColIndex)\r
52                 {\r
53                 char c = msa.GetChar(uSeqIndex, uColIndex);\r
54                 if (!IsGapChar(c))\r
55                         s.AppendChar(c);\r
56                 }\r
57         }\r
58 \r
59 static void SeqVectFromMSACols(const MSA &msa, unsigned uColFrom, unsigned uColTo,\r
60   SeqVect &v)\r
61         {\r
62         v.Clear();\r
63         const unsigned uSeqCount = msa.GetSeqCount();\r
64         for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
65                 {\r
66                 Seq s;\r
67                 SeqFromMSACols(msa, uSeqIndex, uColFrom, uColTo, s);\r
68                 v.AppendSeq(s);\r
69                 }\r
70         }\r
71 \r
72 void RefineW(const MSA &msaIn, MSA &msaOut)\r
73         {\r
74         const unsigned uSeqCount = msaIn.GetSeqCount();\r
75         const unsigned uColCount = msaIn.GetColCount();\r
76 \r
77 // Reserve same nr seqs, 20% more cols\r
78         const unsigned uReserveColCount = (uColCount*120)/100;\r
79         msaOut.SetSize(uSeqCount, uReserveColCount);\r
80 \r
81         for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
82                 {\r
83                 msaOut.SetSeqName(uSeqIndex, msaIn.GetSeqName(uSeqIndex));\r
84                 msaOut.SetSeqId(uSeqIndex, msaIn.GetSeqId(uSeqIndex));\r
85                 }\r
86 \r
87         const unsigned uWindowCount = (uColCount + g_uRefineWindow - 1)/g_uRefineWindow;\r
88         if (0 == g_uWindowTo)\r
89                 g_uWindowTo = uWindowCount - 1;\r
90 \r
91 #if     MEMDEBUG\r
92         _CrtSetBreakAlloc(1560);\r
93 #endif\r
94 \r
95         if (g_uWindowOffset > 0)\r
96                 {\r
97                 MSA msaTmp;\r
98                 MSAFromColRange(msaIn, 0, g_uWindowOffset, msaOut);\r
99                 }\r
100 \r
101         fprintf(stderr, "\n");\r
102         for (unsigned uWindowIndex = g_uWindowFrom; uWindowIndex <= g_uWindowTo; ++uWindowIndex)\r
103                 {\r
104                 fprintf(stderr, "Window %d of %d    \r", uWindowIndex, uWindowCount);\r
105                 const unsigned uColFrom = g_uWindowOffset + uWindowIndex*g_uRefineWindow;\r
106                 unsigned uColTo = uColFrom + g_uRefineWindow - 1;\r
107                 if (uColTo >= uColCount)\r
108                         uColTo = uColCount - 1;\r
109                 assert(uColTo >= uColFrom);\r
110 \r
111                 SeqVect v;\r
112                 SeqVectFromMSACols(msaIn, uColFrom, uColTo, v);\r
113 \r
114 #if     MEMDEBUG\r
115                 _CrtMemState s1;\r
116                 _CrtMemCheckpoint(&s1);\r
117 #endif\r
118 \r
119                 MSA msaTmp;\r
120                 MUSCLE(v, msaTmp);\r
121                 AppendMSA(msaOut, msaTmp);\r
122                 if (uWindowIndex == g_uSaveWindow)\r
123                         {\r
124                         MSA msaInTmp;\r
125                         unsigned uOutCols = msaOut.GetColCount();\r
126                         unsigned un = uColTo - uColFrom + 1;\r
127                         MSAFromColRange(msaIn, uColFrom, un, msaInTmp);\r
128 \r
129                         char fn[256];\r
130                         sprintf(fn, "win%d_inaln.tmp", uWindowIndex);\r
131                         TextFile fIn(fn, true);\r
132                         msaInTmp.ToFile(fIn);\r
133 \r
134                         sprintf(fn, "win%d_inseqs.tmp", uWindowIndex);\r
135                         TextFile fv(fn, true);\r
136                         v.ToFile(fv);\r
137 \r
138                         sprintf(fn, "win%d_outaln.tmp", uWindowIndex);\r
139                         TextFile fOut(fn, true);\r
140                         msaTmp.ToFile(fOut);\r
141                         }\r
142 \r
143 #if     MEMDEBUG\r
144                 void FreeDPMemSPN();\r
145                 FreeDPMemSPN();\r
146 \r
147                 _CrtMemState s2;\r
148                 _CrtMemCheckpoint(&s2);\r
149 \r
150                 _CrtMemState s;\r
151                 _CrtMemDifference(&s, &s1, &s2);\r
152 \r
153                 _CrtMemDumpStatistics(&s);\r
154                 _CrtMemDumpAllObjectsSince(&s1);\r
155                 exit(1);\r
156 #endif\r
157 //#if   DEBUG\r
158 //              AssertMSAEqIgnoreCaseAndGaps(msaInTmp, msaTmp);\r
159 //#endif\r
160                 }\r
161         fprintf(stderr, "\n");\r
162 \r
163 //      AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);//@@uncomment!\r
164         }\r
165 \r
166 void DoRefineW()\r
167         {\r
168         SetOutputFileName(g_pstrOutFileName);\r
169         SetInputFileName(g_pstrInFileName);\r
170         SetStartTime();\r
171 \r
172         SetMaxIters(g_uMaxIters);\r
173         SetSeqWeightMethod(g_SeqWeight1);\r
174 \r
175         TextFile fileIn(g_pstrInFileName);\r
176         MSA msa;\r
177         msa.FromFile(fileIn);\r
178 \r
179         const unsigned uSeqCount = msa.GetSeqCount();\r
180         if (0 == uSeqCount)\r
181                 Quit("No sequences in input file");\r
182 \r
183         MSA::SetIdCount(uSeqCount);\r
184 \r
185 // Initialize sequence ids.\r
186 // From this point on, ids must somehow propogate from here.\r
187         for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
188                 msa.SetSeqId(uSeqIndex, uSeqIndex);\r
189         SetMuscleInputMSA(msa);\r
190 \r
191         ALPHA Alpha = ALPHA_Undefined;\r
192         switch (g_SeqType)\r
193                 {\r
194         case SEQTYPE_Auto:\r
195                 Alpha = msa.GuessAlpha();\r
196                 break;\r
197 \r
198         case SEQTYPE_Protein:\r
199                 Alpha = ALPHA_Amino;\r
200                 break;\r
201 \r
202         case SEQTYPE_DNA:\r
203                 Alpha = ALPHA_DNA;\r
204                 break;\r
205 \r
206         case SEQTYPE_RNA:\r
207                 Alpha = ALPHA_RNA;\r
208                 break;\r
209 \r
210         default:\r
211                 Quit("Invalid SeqType");\r
212                 }\r
213         SetAlpha(Alpha);\r
214         msa.FixAlpha();\r
215 \r
216         if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)\r
217                 SetPPScore(PPSCORE_SPN);\r
218 \r
219         MSA msaOut;\r
220         RefineW(msa, msaOut);\r
221 \r
222 //      ValidateMuscleIds(msa);\r
223 \r
224 //      TextFile fileOut(g_pstrOutFileName, true);\r
225 //      msaOut.ToFile(fileOut);\r
226         MuscleOutput(msaOut);\r
227         }\r