+++ /dev/null
-#include "muscle.h"\r
-#include "msa.h"\r
-#include "seqvect.h"\r
-#include "textfile.h"\r
-\r
-#define MEMDEBUG 0\r
-\r
-#if MEMDEBUG\r
-#include <crtdbg.h>\r
-#endif\r
-\r
-void MUSCLE(SeqVect &v, MSA &msaOut);\r
-\r
-// Append msa2 at the end of msa1\r
-void AppendMSA(MSA &msa1, const MSA &msa2)\r
- {\r
- const unsigned uSeqCount = msa1.GetSeqCount();\r
-\r
- const unsigned uColCount1 = msa1.GetColCount();\r
- const unsigned uColCount2 = msa2.GetColCount();\r
-\r
- const unsigned uColCountCat = uColCount1 + uColCount2;\r
-\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- unsigned uId = msa1.GetSeqId(uSeqIndex);\r
- unsigned uSeqIndex2;\r
- bool bFound = msa2.GetSeqIndex(uId, &uSeqIndex2);\r
- if (bFound)\r
- {\r
- for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)\r
- {\r
- const char c = msa2.GetChar(uSeqIndex2, uColIndex);\r
- msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, c);\r
- }\r
- }\r
- else\r
- {\r
- for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)\r
- msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');\r
- }\r
- }\r
- }\r
-\r
-static void SeqFromMSACols(const MSA &msa, unsigned uSeqIndex, unsigned uColFrom,\r
- unsigned uColTo, Seq &s)\r
- {\r
- s.Clear();\r
- s.SetName(msa.GetSeqName(uSeqIndex));\r
- s.SetId(msa.GetSeqId(uSeqIndex));\r
- for (unsigned uColIndex = uColFrom; uColIndex <= uColTo; ++uColIndex)\r
- {\r
- char c = msa.GetChar(uSeqIndex, uColIndex);\r
- if (!IsGapChar(c))\r
- s.AppendChar(c);\r
- }\r
- }\r
-\r
-static void SeqVectFromMSACols(const MSA &msa, unsigned uColFrom, unsigned uColTo,\r
- SeqVect &v)\r
- {\r
- v.Clear();\r
- const unsigned uSeqCount = msa.GetSeqCount();\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- Seq s;\r
- SeqFromMSACols(msa, uSeqIndex, uColFrom, uColTo, s);\r
- v.AppendSeq(s);\r
- }\r
- }\r
-\r
-void RefineW(const MSA &msaIn, MSA &msaOut)\r
- {\r
- const unsigned uSeqCount = msaIn.GetSeqCount();\r
- const unsigned uColCount = msaIn.GetColCount();\r
-\r
-// Reserve same nr seqs, 20% more cols\r
- const unsigned uReserveColCount = (uColCount*120)/100;\r
- msaOut.SetSize(uSeqCount, uReserveColCount);\r
-\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- msaOut.SetSeqName(uSeqIndex, msaIn.GetSeqName(uSeqIndex));\r
- msaOut.SetSeqId(uSeqIndex, msaIn.GetSeqId(uSeqIndex));\r
- }\r
-\r
- const unsigned uWindowCount = (uColCount + g_uRefineWindow - 1)/g_uRefineWindow;\r
- if (0 == g_uWindowTo)\r
- g_uWindowTo = uWindowCount - 1;\r
-\r
-#if MEMDEBUG\r
- _CrtSetBreakAlloc(1560);\r
-#endif\r
-\r
- if (g_uWindowOffset > 0)\r
- {\r
- MSA msaTmp;\r
- MSAFromColRange(msaIn, 0, g_uWindowOffset, msaOut);\r
- }\r
-\r
- fprintf(stderr, "\n");\r
- for (unsigned uWindowIndex = g_uWindowFrom; uWindowIndex <= g_uWindowTo; ++uWindowIndex)\r
- {\r
- fprintf(stderr, "Window %d of %d \r", uWindowIndex, uWindowCount);\r
- const unsigned uColFrom = g_uWindowOffset + uWindowIndex*g_uRefineWindow;\r
- unsigned uColTo = uColFrom + g_uRefineWindow - 1;\r
- if (uColTo >= uColCount)\r
- uColTo = uColCount - 1;\r
- assert(uColTo >= uColFrom);\r
-\r
- SeqVect v;\r
- SeqVectFromMSACols(msaIn, uColFrom, uColTo, v);\r
-\r
-#if MEMDEBUG\r
- _CrtMemState s1;\r
- _CrtMemCheckpoint(&s1);\r
-#endif\r
-\r
- MSA msaTmp;\r
- MUSCLE(v, msaTmp);\r
- AppendMSA(msaOut, msaTmp);\r
- if (uWindowIndex == g_uSaveWindow)\r
- {\r
- MSA msaInTmp;\r
- unsigned uOutCols = msaOut.GetColCount();\r
- unsigned un = uColTo - uColFrom + 1;\r
- MSAFromColRange(msaIn, uColFrom, un, msaInTmp);\r
-\r
- char fn[256];\r
- sprintf(fn, "win%d_inaln.tmp", uWindowIndex);\r
- TextFile fIn(fn, true);\r
- msaInTmp.ToFile(fIn);\r
-\r
- sprintf(fn, "win%d_inseqs.tmp", uWindowIndex);\r
- TextFile fv(fn, true);\r
- v.ToFile(fv);\r
-\r
- sprintf(fn, "win%d_outaln.tmp", uWindowIndex);\r
- TextFile fOut(fn, true);\r
- msaTmp.ToFile(fOut);\r
- }\r
-\r
-#if MEMDEBUG\r
- void FreeDPMemSPN();\r
- FreeDPMemSPN();\r
-\r
- _CrtMemState s2;\r
- _CrtMemCheckpoint(&s2);\r
-\r
- _CrtMemState s;\r
- _CrtMemDifference(&s, &s1, &s2);\r
-\r
- _CrtMemDumpStatistics(&s);\r
- _CrtMemDumpAllObjectsSince(&s1);\r
- exit(1);\r
-#endif\r
-//#if DEBUG\r
-// AssertMSAEqIgnoreCaseAndGaps(msaInTmp, msaTmp);\r
-//#endif\r
- }\r
- fprintf(stderr, "\n");\r
-\r
-// AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);//@@uncomment!\r
- }\r
-\r
-void DoRefineW()\r
- {\r
- SetOutputFileName(g_pstrOutFileName);\r
- SetInputFileName(g_pstrInFileName);\r
- SetStartTime();\r
-\r
- SetMaxIters(g_uMaxIters);\r
- SetSeqWeightMethod(g_SeqWeight1);\r
-\r
- TextFile fileIn(g_pstrInFileName);\r
- MSA msa;\r
- msa.FromFile(fileIn);\r
-\r
- const unsigned uSeqCount = msa.GetSeqCount();\r
- if (0 == uSeqCount)\r
- Quit("No sequences in input file");\r
-\r
- MSA::SetIdCount(uSeqCount);\r
-\r
-// Initialize sequence ids.\r
-// From this point on, ids must somehow propogate from here.\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- msa.SetSeqId(uSeqIndex, uSeqIndex);\r
- SetMuscleInputMSA(msa);\r
-\r
- ALPHA Alpha = ALPHA_Undefined;\r
- switch (g_SeqType)\r
- {\r
- case SEQTYPE_Auto:\r
- Alpha = msa.GuessAlpha();\r
- break;\r
-\r
- case SEQTYPE_Protein:\r
- Alpha = ALPHA_Amino;\r
- break;\r
-\r
- case SEQTYPE_DNA:\r
- Alpha = ALPHA_DNA;\r
- break;\r
-\r
- case SEQTYPE_RNA:\r
- Alpha = ALPHA_RNA;\r
- break;\r
-\r
- default:\r
- Quit("Invalid SeqType");\r
- }\r
- SetAlpha(Alpha);\r
- msa.FixAlpha();\r
-\r
- if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)\r
- SetPPScore(PPSCORE_SPN);\r
-\r
- MSA msaOut;\r
- RefineW(msa, msaOut);\r
-\r
-// ValidateMuscleIds(msa);\r
-\r
-// TextFile fileOut(g_pstrOutFileName, true);\r
-// msaOut.ToFile(fileOut);\r
- MuscleOutput(msaOut);\r
- }\r