4 #include "textfile.h"
\r
12 void MUSCLE(SeqVect &v, MSA &msaOut);
\r
14 // Append msa2 at the end of msa1
\r
15 void AppendMSA(MSA &msa1, const MSA &msa2)
\r
17 const unsigned uSeqCount = msa1.GetSeqCount();
\r
19 const unsigned uColCount1 = msa1.GetColCount();
\r
20 const unsigned uColCount2 = msa2.GetColCount();
\r
22 const unsigned uColCountCat = uColCount1 + uColCount2;
\r
24 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
26 unsigned uId = msa1.GetSeqId(uSeqIndex);
\r
27 unsigned uSeqIndex2;
\r
28 bool bFound = msa2.GetSeqIndex(uId, &uSeqIndex2);
\r
31 for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
\r
33 const char c = msa2.GetChar(uSeqIndex2, uColIndex);
\r
34 msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
\r
39 for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
\r
40 msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');
\r
45 static void SeqFromMSACols(const MSA &msa, unsigned uSeqIndex, unsigned uColFrom,
\r
46 unsigned uColTo, Seq &s)
\r
49 s.SetName(msa.GetSeqName(uSeqIndex));
\r
50 s.SetId(msa.GetSeqId(uSeqIndex));
\r
51 for (unsigned uColIndex = uColFrom; uColIndex <= uColTo; ++uColIndex)
\r
53 char c = msa.GetChar(uSeqIndex, uColIndex);
\r
59 static void SeqVectFromMSACols(const MSA &msa, unsigned uColFrom, unsigned uColTo,
\r
63 const unsigned uSeqCount = msa.GetSeqCount();
\r
64 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
67 SeqFromMSACols(msa, uSeqIndex, uColFrom, uColTo, s);
\r
72 void RefineW(const MSA &msaIn, MSA &msaOut)
\r
74 const unsigned uSeqCount = msaIn.GetSeqCount();
\r
75 const unsigned uColCount = msaIn.GetColCount();
\r
77 // Reserve same nr seqs, 20% more cols
\r
78 const unsigned uReserveColCount = (uColCount*120)/100;
\r
79 msaOut.SetSize(uSeqCount, uReserveColCount);
\r
81 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
83 msaOut.SetSeqName(uSeqIndex, msaIn.GetSeqName(uSeqIndex));
\r
84 msaOut.SetSeqId(uSeqIndex, msaIn.GetSeqId(uSeqIndex));
\r
87 const unsigned uWindowCount = (uColCount + g_uRefineWindow - 1)/g_uRefineWindow;
\r
88 if (0 == g_uWindowTo)
\r
89 g_uWindowTo = uWindowCount - 1;
\r
92 _CrtSetBreakAlloc(1560);
\r
95 if (g_uWindowOffset > 0)
\r
98 MSAFromColRange(msaIn, 0, g_uWindowOffset, msaOut);
\r
101 fprintf(stderr, "\n");
\r
102 for (unsigned uWindowIndex = g_uWindowFrom; uWindowIndex <= g_uWindowTo; ++uWindowIndex)
\r
104 fprintf(stderr, "Window %d of %d \r", uWindowIndex, uWindowCount);
\r
105 const unsigned uColFrom = g_uWindowOffset + uWindowIndex*g_uRefineWindow;
\r
106 unsigned uColTo = uColFrom + g_uRefineWindow - 1;
\r
107 if (uColTo >= uColCount)
\r
108 uColTo = uColCount - 1;
\r
109 assert(uColTo >= uColFrom);
\r
112 SeqVectFromMSACols(msaIn, uColFrom, uColTo, v);
\r
116 _CrtMemCheckpoint(&s1);
\r
121 AppendMSA(msaOut, msaTmp);
\r
122 if (uWindowIndex == g_uSaveWindow)
\r
125 unsigned uOutCols = msaOut.GetColCount();
\r
126 unsigned un = uColTo - uColFrom + 1;
\r
127 MSAFromColRange(msaIn, uColFrom, un, msaInTmp);
\r
130 sprintf(fn, "win%d_inaln.tmp", uWindowIndex);
\r
131 TextFile fIn(fn, true);
\r
132 msaInTmp.ToFile(fIn);
\r
134 sprintf(fn, "win%d_inseqs.tmp", uWindowIndex);
\r
135 TextFile fv(fn, true);
\r
138 sprintf(fn, "win%d_outaln.tmp", uWindowIndex);
\r
139 TextFile fOut(fn, true);
\r
140 msaTmp.ToFile(fOut);
\r
144 void FreeDPMemSPN();
\r
148 _CrtMemCheckpoint(&s2);
\r
151 _CrtMemDifference(&s, &s1, &s2);
\r
153 _CrtMemDumpStatistics(&s);
\r
154 _CrtMemDumpAllObjectsSince(&s1);
\r
158 // AssertMSAEqIgnoreCaseAndGaps(msaInTmp, msaTmp);
\r
161 fprintf(stderr, "\n");
\r
163 // AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);//@@uncomment!
\r
168 SetOutputFileName(g_pstrOutFileName);
\r
169 SetInputFileName(g_pstrInFileName);
\r
172 SetMaxIters(g_uMaxIters);
\r
173 SetSeqWeightMethod(g_SeqWeight1);
\r
175 TextFile fileIn(g_pstrInFileName);
\r
177 msa.FromFile(fileIn);
\r
179 const unsigned uSeqCount = msa.GetSeqCount();
\r
180 if (0 == uSeqCount)
\r
181 Quit("No sequences in input file");
\r
183 MSA::SetIdCount(uSeqCount);
\r
185 // Initialize sequence ids.
\r
186 // From this point on, ids must somehow propogate from here.
\r
187 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
188 msa.SetSeqId(uSeqIndex, uSeqIndex);
\r
189 SetMuscleInputMSA(msa);
\r
191 ALPHA Alpha = ALPHA_Undefined;
\r
195 Alpha = msa.GuessAlpha();
\r
198 case SEQTYPE_Protein:
\r
199 Alpha = ALPHA_Amino;
\r
211 Quit("Invalid SeqType");
\r
216 if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
\r
217 SetPPScore(PPSCORE_SPN);
\r
220 RefineW(msa, msaOut);
\r
222 // ValidateMuscleIds(msa);
\r
224 // TextFile fileOut(g_pstrOutFileName, true);
\r
225 // msaOut.ToFile(fileOut);
\r
226 MuscleOutput(msaOut);
\r