+++ /dev/null
-#include "muscle.h"\r
-#include "textfile.h"\r
-#include "seqvect.h"\r
-#include "distfunc.h"\r
-#include "msa.h"\r
-#include "tree.h"\r
-#include "profile.h"\r
-#include "timing.h"\r
-\r
-static char g_strUseTreeWarning[] =\r
-"\n******** WARNING ****************\n"\r
-"\nYou specified the -usetree option.\n"\r
-"Note that a good evolutionary tree may NOT be a good\n"\r
-"guide tree for multiple alignment. For more details,\n"\r
-"please refer to the user guide. To disable this\n"\r
-"warning, use -usetree_nowarn <treefilename>.\n\n";\r
-\r
-void DoMuscle()\r
- {\r
- SetOutputFileName(g_pstrOutFileName);\r
- SetInputFileName(g_pstrInFileName);\r
-\r
- SetMaxIters(g_uMaxIters);\r
- SetSeqWeightMethod(g_SeqWeight1);\r
-\r
- TextFile fileIn(g_pstrInFileName);\r
- SeqVect v;\r
- v.FromFASTAFile(fileIn);\r
- const unsigned uSeqCount = v.Length();\r
-\r
- if (0 == uSeqCount)\r
- Quit("No sequences in input file");\r
-\r
- ALPHA Alpha = ALPHA_Undefined;\r
- switch (g_SeqType)\r
- {\r
- case SEQTYPE_Auto:\r
- Alpha = v.GuessAlpha();\r
- break;\r
-\r
- case SEQTYPE_Protein:\r
- Alpha = ALPHA_Amino;\r
- break;\r
-\r
- case SEQTYPE_DNA:\r
- Alpha = ALPHA_DNA;\r
- break;\r
-\r
- case SEQTYPE_RNA:\r
- Alpha = ALPHA_RNA;\r
- break;\r
-\r
- default:\r
- Quit("Invalid seq type");\r
- }\r
- SetAlpha(Alpha);\r
- v.FixAlpha();\r
-\r
- PTR_SCOREMATRIX UserMatrix = 0;\r
- if (0 != g_pstrMatrixFileName)\r
- {\r
- const char *FileName = g_pstrMatrixFileName;\r
- const char *Path = getenv("MUSCLE_MXPATH");\r
- if (Path != 0)\r
- {\r
- size_t n = strlen(Path) + 1 + strlen(FileName) + 1;\r
- char *NewFileName = new char[n];\r
- sprintf(NewFileName, "%s/%s", Path, FileName);\r
- FileName = NewFileName;\r
- }\r
- TextFile File(FileName);\r
- UserMatrix = ReadMx(File);\r
- g_Alpha = ALPHA_Amino;\r
- g_PPScore = PPSCORE_SP;\r
- }\r
-\r
- SetPPScore();\r
-\r
- if (0 != UserMatrix)\r
- g_ptrScoreMatrix = UserMatrix;\r
-\r
- unsigned uMaxL = 0;\r
- unsigned uTotL = 0;\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- unsigned L = v.GetSeq(uSeqIndex).Length();\r
- uTotL += L;\r
- if (L > uMaxL)\r
- uMaxL = L;\r
- }\r
-\r
- SetIter(1);\r
- g_bDiags = g_bDiags1;\r
- SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);\r
-\r
- SetMuscleSeqVect(v);\r
-\r
- MSA::SetIdCount(uSeqCount);\r
-\r
-// Initialize sequence ids.\r
-// From this point on, ids must somehow propogate from here.\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- v.SetSeqId(uSeqIndex, uSeqIndex);\r
-\r
- if (0 == uSeqCount)\r
- Quit("Input file '%s' has no sequences", g_pstrInFileName);\r
- if (1 == uSeqCount)\r
- {\r
- TextFile fileOut(g_pstrOutFileName, true);\r
- v.ToFile(fileOut);\r
- return;\r
- }\r
-\r
- if (uSeqCount > 1)\r
- MHackStart(v);\r
-\r
-// First iteration\r
- Tree GuideTree;\r
- if (0 != g_pstrUseTreeFileName)\r
- {\r
- // Discourage users...\r
- if (!g_bUseTreeNoWarn)\r
- fprintf(stderr, "%s", g_strUseTreeWarning);\r
-\r
- // Read tree from file\r
- TextFile TreeFile(g_pstrUseTreeFileName);\r
- GuideTree.FromFile(TreeFile);\r
-\r
- // Make sure tree is rooted\r
- if (!GuideTree.IsRooted())\r
- Quit("User tree must be rooted");\r
-\r
- if (GuideTree.GetLeafCount() != uSeqCount)\r
- Quit("User tree does not match input sequences");\r
-\r
- const unsigned uNodeCount = GuideTree.GetNodeCount();\r
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
- {\r
- if (!GuideTree.IsLeaf(uNodeIndex))\r
- continue;\r
- const char *LeafName = GuideTree.GetLeafName(uNodeIndex);\r
- unsigned uSeqIndex;\r
- bool SeqFound = v.FindName(LeafName, &uSeqIndex);\r
- if (!SeqFound)\r
- Quit("Label %s in tree does not match sequences", LeafName);\r
- unsigned uId = v.GetSeqIdFromName(LeafName);\r
- GuideTree.SetLeafId(uNodeIndex, uId);\r
- }\r
- }\r
- else\r
- TreeFromSeqVect(v, GuideTree, g_Cluster1, g_Distance1, g_Root1,\r
- g_pstrDistMxFileName1);\r
-\r
- const char *Tree1 = ValueOpt("Tree1");\r
- if (0 != Tree1)\r
- {\r
- TextFile f(Tree1, true);\r
- GuideTree.ToFile(f);\r
- if (g_bClusterOnly)\r
- return;\r
- }\r
-\r
- SetMuscleTree(GuideTree);\r
- ValidateMuscleIds(GuideTree);\r
-\r
- MSA msa;\r
- ProgNode *ProgNodes = 0;\r
- if (g_bLow)\r
- ProgNodes = ProgressiveAlignE(v, GuideTree, msa);\r
- else\r
- ProgressiveAlign(v, GuideTree, msa);\r
- SetCurrentAlignment(msa);\r
-\r
- if (0 != g_pstrComputeWeightsFileName)\r
- {\r
- extern void OutWeights(const char *FileName, const MSA &msa);\r
- SetMSAWeightsMuscle(msa);\r
- OutWeights(g_pstrComputeWeightsFileName, msa);\r
- return;\r
- }\r
-\r
- ValidateMuscleIds(msa);\r
-\r
- if (1 == g_uMaxIters || 2 == uSeqCount)\r
- {\r
- //TextFile fileOut(g_pstrOutFileName, true);\r
- //MHackEnd(msa);\r
- //msa.ToFile(fileOut);\r
- MuscleOutput(msa);\r
- return;\r
- }\r
-\r
- if (0 == g_pstrUseTreeFileName)\r
- {\r
- g_bDiags = g_bDiags2;\r
- SetIter(2);\r
-\r
- if (g_bLow)\r
- {\r
- if (0 != g_uMaxTreeRefineIters)\r
- RefineTreeE(msa, v, GuideTree, ProgNodes);\r
- }\r
- else\r
- RefineTree(msa, GuideTree);\r
-\r
- const char *Tree2 = ValueOpt("Tree2");\r
- if (0 != Tree2)\r
- {\r
- TextFile f(Tree2, true);\r
- GuideTree.ToFile(f);\r
- }\r
- }\r
-\r
- SetSeqWeightMethod(g_SeqWeight2);\r
- SetMuscleTree(GuideTree);\r
-\r
- if (g_bAnchors)\r
- RefineVert(msa, GuideTree, g_uMaxIters - 2);\r
- else\r
- RefineHoriz(msa, GuideTree, g_uMaxIters - 2, false, false);\r
-\r
-#if 0\r
-// Refining by subfamilies is disabled as it didn't give better\r
-// results. I tried doing this before and after RefineHoriz.\r
-// Should get back to this as it seems like this should work.\r
- RefineSubfams(msa, GuideTree, g_uMaxIters - 2);\r
-#endif\r
-\r
- ValidateMuscleIds(msa);\r
- ValidateMuscleIds(GuideTree);\r
-\r
- //TextFile fileOut(g_pstrOutFileName, true);\r
- //MHackEnd(msa);\r
- //msa.ToFile(fileOut);\r
- MuscleOutput(msa);\r
- }\r
-\r
-void Run()\r
- {\r
- SetStartTime();\r
- Log("Started %s\n", GetTimeAsStr());\r
- for (int i = 0; i < g_argc; ++i)\r
- Log("%s ", g_argv[i]);\r
- Log("\n");\r
-\r
-#if TIMING\r
- TICKS t1 = GetClockTicks();\r
-#endif\r
- if (g_bRefine)\r
- Refine();\r
- else if (g_bRefineW)\r
- {\r
- extern void DoRefineW();\r
- DoRefineW();\r
- }\r
- else if (g_bProfDB)\r
- ProfDB();\r
- else if (g_bSW)\r
- Local();\r
- else if (0 != g_pstrSPFileName)\r
- DoSP();\r
- else if (g_bProfile)\r
- Profile();\r
- else if (g_bPPScore)\r
- PPScore();\r
- else if (g_bPAS)\r
- ProgAlignSubFams();\r
- else if (g_bMakeTree)\r
- {\r
- extern void DoMakeTree();\r
- DoMakeTree();\r
- }\r
- else\r
- DoMuscle();\r
-\r
-#if TIMING\r
- extern TICKS g_ticksDP;\r
- extern TICKS g_ticksObjScore;\r
- TICKS t2 = GetClockTicks();\r
- TICKS TotalTicks = t2 - t1;\r
- TICKS ticksOther = TotalTicks - g_ticksDP - g_ticksObjScore;\r
- double dSecs = TicksToSecs(TotalTicks);\r
- double PctDP = (double) g_ticksDP*100.0/(double) TotalTicks;\r
- double PctOS = (double) g_ticksObjScore*100.0/(double) TotalTicks;\r
- double PctOther = (double) ticksOther*100.0/(double) TotalTicks;\r
- Log(" Ticks Secs Pct\n");\r
- Log(" ============ ======= =====\n");\r
- Log("DP %12ld %7.2f %5.1f%%\n",\r
- (long) g_ticksDP, TicksToSecs(g_ticksDP), PctDP);\r
- Log("OS %12ld %7.2f %5.1f%%\n",\r
- (long) g_ticksObjScore, TicksToSecs(g_ticksObjScore), PctOS);\r
- Log("Other %12ld %7.2f %5.1f%%\n",\r
- (long) ticksOther, TicksToSecs(ticksOther), PctOther);\r
- Log("Total %12ld %7.2f 100.0%%\n", (long) TotalTicks, dSecs);\r
-#endif\r
-\r
- ListDiagSavings();\r
- Log("Finished %s\n", GetTimeAsStr());\r
- }\r