--- /dev/null
+#include "muscle.h"\r
+#include "msa.h"\r
+#include "seqvect.h"\r
+#include "textfile.h"\r
+\r
+#define MEMDEBUG 0\r
+\r
+#if MEMDEBUG\r
+#include <crtdbg.h>\r
+#endif\r
+\r
+void MUSCLE(SeqVect &v, MSA &msaOut);\r
+\r
+// Append msa2 at the end of msa1\r
+void AppendMSA(MSA &msa1, const MSA &msa2)\r
+ {\r
+ const unsigned uSeqCount = msa1.GetSeqCount();\r
+\r
+ const unsigned uColCount1 = msa1.GetColCount();\r
+ const unsigned uColCount2 = msa2.GetColCount();\r
+\r
+ const unsigned uColCountCat = uColCount1 + uColCount2;\r
+\r
+ for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
+ {\r
+ unsigned uId = msa1.GetSeqId(uSeqIndex);\r
+ unsigned uSeqIndex2;\r
+ bool bFound = msa2.GetSeqIndex(uId, &uSeqIndex2);\r
+ if (bFound)\r
+ {\r
+ for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)\r
+ {\r
+ const char c = msa2.GetChar(uSeqIndex2, uColIndex);\r
+ msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, c);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)\r
+ msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');\r
+ }\r
+ }\r
+ }\r
+\r
+static void SeqFromMSACols(const MSA &msa, unsigned uSeqIndex, unsigned uColFrom,\r
+ unsigned uColTo, Seq &s)\r
+ {\r
+ s.Clear();\r
+ s.SetName(msa.GetSeqName(uSeqIndex));\r
+ s.SetId(msa.GetSeqId(uSeqIndex));\r
+ for (unsigned uColIndex = uColFrom; uColIndex <= uColTo; ++uColIndex)\r
+ {\r
+ char c = msa.GetChar(uSeqIndex, uColIndex);\r
+ if (!IsGapChar(c))\r
+ s.AppendChar(c);\r
+ }\r
+ }\r
+\r
+static void SeqVectFromMSACols(const MSA &msa, unsigned uColFrom, unsigned uColTo,\r
+ SeqVect &v)\r
+ {\r
+ v.Clear();\r
+ const unsigned uSeqCount = msa.GetSeqCount();\r
+ for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
+ {\r
+ Seq s;\r
+ SeqFromMSACols(msa, uSeqIndex, uColFrom, uColTo, s);\r
+ v.AppendSeq(s);\r
+ }\r
+ }\r
+\r
+void RefineW(const MSA &msaIn, MSA &msaOut)\r
+ {\r
+ const unsigned uSeqCount = msaIn.GetSeqCount();\r
+ const unsigned uColCount = msaIn.GetColCount();\r
+\r
+// Reserve same nr seqs, 20% more cols\r
+ const unsigned uReserveColCount = (uColCount*120)/100;\r
+ msaOut.SetSize(uSeqCount, uReserveColCount);\r
+\r
+ for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
+ {\r
+ msaOut.SetSeqName(uSeqIndex, msaIn.GetSeqName(uSeqIndex));\r
+ msaOut.SetSeqId(uSeqIndex, msaIn.GetSeqId(uSeqIndex));\r
+ }\r
+\r
+ const unsigned uWindowCount = (uColCount + g_uRefineWindow - 1)/g_uRefineWindow;\r
+ if (0 == g_uWindowTo)\r
+ g_uWindowTo = uWindowCount - 1;\r
+\r
+#if MEMDEBUG\r
+ _CrtSetBreakAlloc(1560);\r
+#endif\r
+\r
+ if (g_uWindowOffset > 0)\r
+ {\r
+ MSA msaTmp;\r
+ MSAFromColRange(msaIn, 0, g_uWindowOffset, msaOut);\r
+ }\r
+\r
+ fprintf(stderr, "\n");\r
+ for (unsigned uWindowIndex = g_uWindowFrom; uWindowIndex <= g_uWindowTo; ++uWindowIndex)\r
+ {\r
+ fprintf(stderr, "Window %d of %d \r", uWindowIndex, uWindowCount);\r
+ const unsigned uColFrom = g_uWindowOffset + uWindowIndex*g_uRefineWindow;\r
+ unsigned uColTo = uColFrom + g_uRefineWindow - 1;\r
+ if (uColTo >= uColCount)\r
+ uColTo = uColCount - 1;\r
+ assert(uColTo >= uColFrom);\r
+\r
+ SeqVect v;\r
+ SeqVectFromMSACols(msaIn, uColFrom, uColTo, v);\r
+\r
+#if MEMDEBUG\r
+ _CrtMemState s1;\r
+ _CrtMemCheckpoint(&s1);\r
+#endif\r
+\r
+ MSA msaTmp;\r
+ MUSCLE(v, msaTmp);\r
+ AppendMSA(msaOut, msaTmp);\r
+ if (uWindowIndex == g_uSaveWindow)\r
+ {\r
+ MSA msaInTmp;\r
+ unsigned uOutCols = msaOut.GetColCount();\r
+ unsigned un = uColTo - uColFrom + 1;\r
+ MSAFromColRange(msaIn, uColFrom, un, msaInTmp);\r
+\r
+ char fn[256];\r
+ sprintf(fn, "win%d_inaln.tmp", uWindowIndex);\r
+ TextFile fIn(fn, true);\r
+ msaInTmp.ToFile(fIn);\r
+\r
+ sprintf(fn, "win%d_inseqs.tmp", uWindowIndex);\r
+ TextFile fv(fn, true);\r
+ v.ToFile(fv);\r
+\r
+ sprintf(fn, "win%d_outaln.tmp", uWindowIndex);\r
+ TextFile fOut(fn, true);\r
+ msaTmp.ToFile(fOut);\r
+ }\r
+\r
+#if MEMDEBUG\r
+ void FreeDPMemSPN();\r
+ FreeDPMemSPN();\r
+\r
+ _CrtMemState s2;\r
+ _CrtMemCheckpoint(&s2);\r
+\r
+ _CrtMemState s;\r
+ _CrtMemDifference(&s, &s1, &s2);\r
+\r
+ _CrtMemDumpStatistics(&s);\r
+ _CrtMemDumpAllObjectsSince(&s1);\r
+ exit(1);\r
+#endif\r
+//#if DEBUG\r
+// AssertMSAEqIgnoreCaseAndGaps(msaInTmp, msaTmp);\r
+//#endif\r
+ }\r
+ fprintf(stderr, "\n");\r
+\r
+// AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);//@@uncomment!\r
+ }\r
+\r
+void DoRefineW()\r
+ {\r
+ SetOutputFileName(g_pstrOutFileName);\r
+ SetInputFileName(g_pstrInFileName);\r
+ SetStartTime();\r
+\r
+ SetMaxIters(g_uMaxIters);\r
+ SetSeqWeightMethod(g_SeqWeight1);\r
+\r
+ TextFile fileIn(g_pstrInFileName);\r
+ MSA msa;\r
+ msa.FromFile(fileIn);\r
+\r
+ const unsigned uSeqCount = msa.GetSeqCount();\r
+ if (0 == uSeqCount)\r
+ Quit("No sequences in input file");\r
+\r
+ MSA::SetIdCount(uSeqCount);\r
+\r
+// Initialize sequence ids.\r
+// From this point on, ids must somehow propogate from here.\r
+ for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
+ msa.SetSeqId(uSeqIndex, uSeqIndex);\r
+ SetMuscleInputMSA(msa);\r
+\r
+ ALPHA Alpha = ALPHA_Undefined;\r
+ switch (g_SeqType)\r
+ {\r
+ case SEQTYPE_Auto:\r
+ Alpha = msa.GuessAlpha();\r
+ break;\r
+\r
+ case SEQTYPE_Protein:\r
+ Alpha = ALPHA_Amino;\r
+ break;\r
+\r
+ case SEQTYPE_DNA:\r
+ Alpha = ALPHA_DNA;\r
+ break;\r
+\r
+ case SEQTYPE_RNA:\r
+ Alpha = ALPHA_RNA;\r
+ break;\r
+\r
+ default:\r
+ Quit("Invalid SeqType");\r
+ }\r
+ SetAlpha(Alpha);\r
+ msa.FixAlpha();\r
+\r
+ if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)\r
+ SetPPScore(PPSCORE_SPN);\r
+\r
+ MSA msaOut;\r
+ RefineW(msa, msaOut);\r
+\r
+// ValidateMuscleIds(msa);\r
+\r
+// TextFile fileOut(g_pstrOutFileName, true);\r
+// msaOut.ToFile(fileOut);\r
+ MuscleOutput(msaOut);\r
+ }\r