Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / muscle / refinew.cpp
diff --git a/website/archive/binaries/mac/src/muscle/refinew.cpp b/website/archive/binaries/mac/src/muscle/refinew.cpp
new file mode 100644 (file)
index 0000000..671e591
--- /dev/null
@@ -0,0 +1,227 @@
+#include "muscle.h"\r
+#include "msa.h"\r
+#include "seqvect.h"\r
+#include "textfile.h"\r
+\r
+#define MEMDEBUG       0\r
+\r
+#if    MEMDEBUG\r
+#include <crtdbg.h>\r
+#endif\r
+\r
+void MUSCLE(SeqVect &v, MSA &msaOut);\r
+\r
+// Append msa2 at the end of msa1\r
+void AppendMSA(MSA &msa1, const MSA &msa2)\r
+       {\r
+       const unsigned uSeqCount = msa1.GetSeqCount();\r
+\r
+       const unsigned uColCount1 = msa1.GetColCount();\r
+       const unsigned uColCount2 = msa2.GetColCount();\r
+\r
+       const unsigned uColCountCat = uColCount1 + uColCount2;\r
+\r
+       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
+               {\r
+               unsigned uId = msa1.GetSeqId(uSeqIndex);\r
+               unsigned uSeqIndex2;\r
+               bool bFound = msa2.GetSeqIndex(uId, &uSeqIndex2);\r
+               if (bFound)\r
+                       {\r
+                       for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)\r
+                               {\r
+                               const char c = msa2.GetChar(uSeqIndex2, uColIndex);\r
+                               msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, c);\r
+                               }\r
+                       }\r
+               else\r
+                       {\r
+                       for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)\r
+                               msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');\r
+                       }\r
+               }\r
+       }\r
+\r
+static void SeqFromMSACols(const MSA &msa, unsigned uSeqIndex, unsigned uColFrom,\r
+  unsigned uColTo, Seq &s)\r
+       {\r
+       s.Clear();\r
+       s.SetName(msa.GetSeqName(uSeqIndex));\r
+       s.SetId(msa.GetSeqId(uSeqIndex));\r
+       for (unsigned uColIndex = uColFrom; uColIndex <= uColTo; ++uColIndex)\r
+               {\r
+               char c = msa.GetChar(uSeqIndex, uColIndex);\r
+               if (!IsGapChar(c))\r
+                       s.AppendChar(c);\r
+               }\r
+       }\r
+\r
+static void SeqVectFromMSACols(const MSA &msa, unsigned uColFrom, unsigned uColTo,\r
+  SeqVect &v)\r
+       {\r
+       v.Clear();\r
+       const unsigned uSeqCount = msa.GetSeqCount();\r
+       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
+               {\r
+               Seq s;\r
+               SeqFromMSACols(msa, uSeqIndex, uColFrom, uColTo, s);\r
+               v.AppendSeq(s);\r
+               }\r
+       }\r
+\r
+void RefineW(const MSA &msaIn, MSA &msaOut)\r
+       {\r
+       const unsigned uSeqCount = msaIn.GetSeqCount();\r
+       const unsigned uColCount = msaIn.GetColCount();\r
+\r
+// Reserve same nr seqs, 20% more cols\r
+       const unsigned uReserveColCount = (uColCount*120)/100;\r
+       msaOut.SetSize(uSeqCount, uReserveColCount);\r
+\r
+       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
+               {\r
+               msaOut.SetSeqName(uSeqIndex, msaIn.GetSeqName(uSeqIndex));\r
+               msaOut.SetSeqId(uSeqIndex, msaIn.GetSeqId(uSeqIndex));\r
+               }\r
+\r
+       const unsigned uWindowCount = (uColCount + g_uRefineWindow - 1)/g_uRefineWindow;\r
+       if (0 == g_uWindowTo)\r
+               g_uWindowTo = uWindowCount - 1;\r
+\r
+#if    MEMDEBUG\r
+       _CrtSetBreakAlloc(1560);\r
+#endif\r
+\r
+       if (g_uWindowOffset > 0)\r
+               {\r
+               MSA msaTmp;\r
+               MSAFromColRange(msaIn, 0, g_uWindowOffset, msaOut);\r
+               }\r
+\r
+       fprintf(stderr, "\n");\r
+       for (unsigned uWindowIndex = g_uWindowFrom; uWindowIndex <= g_uWindowTo; ++uWindowIndex)\r
+               {\r
+               fprintf(stderr, "Window %d of %d    \r", uWindowIndex, uWindowCount);\r
+               const unsigned uColFrom = g_uWindowOffset + uWindowIndex*g_uRefineWindow;\r
+               unsigned uColTo = uColFrom + g_uRefineWindow - 1;\r
+               if (uColTo >= uColCount)\r
+                       uColTo = uColCount - 1;\r
+               assert(uColTo >= uColFrom);\r
+\r
+               SeqVect v;\r
+               SeqVectFromMSACols(msaIn, uColFrom, uColTo, v);\r
+\r
+#if    MEMDEBUG\r
+               _CrtMemState s1;\r
+               _CrtMemCheckpoint(&s1);\r
+#endif\r
+\r
+               MSA msaTmp;\r
+               MUSCLE(v, msaTmp);\r
+               AppendMSA(msaOut, msaTmp);\r
+               if (uWindowIndex == g_uSaveWindow)\r
+                       {\r
+                       MSA msaInTmp;\r
+                       unsigned uOutCols = msaOut.GetColCount();\r
+                       unsigned un = uColTo - uColFrom + 1;\r
+                       MSAFromColRange(msaIn, uColFrom, un, msaInTmp);\r
+\r
+                       char fn[256];\r
+                       sprintf(fn, "win%d_inaln.tmp", uWindowIndex);\r
+                       TextFile fIn(fn, true);\r
+                       msaInTmp.ToFile(fIn);\r
+\r
+                       sprintf(fn, "win%d_inseqs.tmp", uWindowIndex);\r
+                       TextFile fv(fn, true);\r
+                       v.ToFile(fv);\r
+\r
+                       sprintf(fn, "win%d_outaln.tmp", uWindowIndex);\r
+                       TextFile fOut(fn, true);\r
+                       msaTmp.ToFile(fOut);\r
+                       }\r
+\r
+#if    MEMDEBUG\r
+               void FreeDPMemSPN();\r
+               FreeDPMemSPN();\r
+\r
+               _CrtMemState s2;\r
+               _CrtMemCheckpoint(&s2);\r
+\r
+               _CrtMemState s;\r
+               _CrtMemDifference(&s, &s1, &s2);\r
+\r
+               _CrtMemDumpStatistics(&s);\r
+               _CrtMemDumpAllObjectsSince(&s1);\r
+               exit(1);\r
+#endif\r
+//#if  DEBUG\r
+//             AssertMSAEqIgnoreCaseAndGaps(msaInTmp, msaTmp);\r
+//#endif\r
+               }\r
+       fprintf(stderr, "\n");\r
+\r
+//     AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);//@@uncomment!\r
+       }\r
+\r
+void DoRefineW()\r
+       {\r
+       SetOutputFileName(g_pstrOutFileName);\r
+       SetInputFileName(g_pstrInFileName);\r
+       SetStartTime();\r
+\r
+       SetMaxIters(g_uMaxIters);\r
+       SetSeqWeightMethod(g_SeqWeight1);\r
+\r
+       TextFile fileIn(g_pstrInFileName);\r
+       MSA msa;\r
+       msa.FromFile(fileIn);\r
+\r
+       const unsigned uSeqCount = msa.GetSeqCount();\r
+       if (0 == uSeqCount)\r
+               Quit("No sequences in input file");\r
+\r
+       MSA::SetIdCount(uSeqCount);\r
+\r
+// Initialize sequence ids.\r
+// From this point on, ids must somehow propogate from here.\r
+       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
+               msa.SetSeqId(uSeqIndex, uSeqIndex);\r
+       SetMuscleInputMSA(msa);\r
+\r
+       ALPHA Alpha = ALPHA_Undefined;\r
+       switch (g_SeqType)\r
+               {\r
+       case SEQTYPE_Auto:\r
+               Alpha = msa.GuessAlpha();\r
+               break;\r
+\r
+       case SEQTYPE_Protein:\r
+               Alpha = ALPHA_Amino;\r
+               break;\r
+\r
+       case SEQTYPE_DNA:\r
+               Alpha = ALPHA_DNA;\r
+               break;\r
+\r
+       case SEQTYPE_RNA:\r
+               Alpha = ALPHA_RNA;\r
+               break;\r
+\r
+       default:\r
+               Quit("Invalid SeqType");\r
+               }\r
+       SetAlpha(Alpha);\r
+       msa.FixAlpha();\r
+\r
+       if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)\r
+               SetPPScore(PPSCORE_SPN);\r
+\r
+       MSA msaOut;\r
+       RefineW(msa, msaOut);\r
+\r
+//     ValidateMuscleIds(msa);\r
+\r
+//     TextFile fileOut(g_pstrOutFileName, true);\r
+//     msaOut.ToFile(fileOut);\r
+       MuscleOutput(msaOut);\r
+       }\r