2 #include "textfile.h"
\r
4 #include "distfunc.h"
\r
10 static char g_strUseTreeWarning[] =
\r
11 "\n******** WARNING ****************\n"
\r
12 "\nYou specified the -usetree option.\n"
\r
13 "Note that a good evolutionary tree may NOT be a good\n"
\r
14 "guide tree for multiple alignment. For more details,\n"
\r
15 "please refer to the user guide. To disable this\n"
\r
16 "warning, use -usetree_nowarn <treefilename>.\n\n";
\r
20 SetOutputFileName(g_pstrOutFileName);
\r
21 SetInputFileName(g_pstrInFileName);
\r
23 SetMaxIters(g_uMaxIters);
\r
24 SetSeqWeightMethod(g_SeqWeight1);
\r
26 TextFile fileIn(g_pstrInFileName);
\r
28 v.FromFASTAFile(fileIn);
\r
29 const unsigned uSeqCount = v.Length();
\r
32 Quit("No sequences in input file");
\r
34 ALPHA Alpha = ALPHA_Undefined;
\r
38 Alpha = v.GuessAlpha();
\r
41 case SEQTYPE_Protein:
\r
42 Alpha = ALPHA_Amino;
\r
54 Quit("Invalid seq type");
\r
59 PTR_SCOREMATRIX UserMatrix = 0;
\r
60 if (0 != g_pstrMatrixFileName)
\r
62 const char *FileName = g_pstrMatrixFileName;
\r
63 const char *Path = getenv("MUSCLE_MXPATH");
\r
66 size_t n = strlen(Path) + 1 + strlen(FileName) + 1;
\r
67 char *NewFileName = new char[n];
\r
68 sprintf(NewFileName, "%s/%s", Path, FileName);
\r
69 FileName = NewFileName;
\r
71 TextFile File(FileName);
\r
72 UserMatrix = ReadMx(File);
\r
73 g_Alpha = ALPHA_Amino;
\r
74 g_PPScore = PPSCORE_SP;
\r
79 if (0 != UserMatrix)
\r
80 g_ptrScoreMatrix = UserMatrix;
\r
84 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
86 unsigned L = v.GetSeq(uSeqIndex).Length();
\r
93 g_bDiags = g_bDiags1;
\r
94 SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);
\r
96 SetMuscleSeqVect(v);
\r
98 MSA::SetIdCount(uSeqCount);
\r
100 // Initialize sequence ids.
\r
101 // From this point on, ids must somehow propogate from here.
\r
102 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
103 v.SetSeqId(uSeqIndex, uSeqIndex);
\r
105 if (0 == uSeqCount)
\r
106 Quit("Input file '%s' has no sequences", g_pstrInFileName);
\r
107 if (1 == uSeqCount)
\r
109 TextFile fileOut(g_pstrOutFileName, true);
\r
119 if (0 != g_pstrUseTreeFileName)
\r
121 // Discourage users...
\r
122 if (!g_bUseTreeNoWarn)
\r
123 fprintf(stderr, "%s", g_strUseTreeWarning);
\r
125 // Read tree from file
\r
126 TextFile TreeFile(g_pstrUseTreeFileName);
\r
127 GuideTree.FromFile(TreeFile);
\r
129 // Make sure tree is rooted
\r
130 if (!GuideTree.IsRooted())
\r
131 Quit("User tree must be rooted");
\r
133 if (GuideTree.GetLeafCount() != uSeqCount)
\r
134 Quit("User tree does not match input sequences");
\r
136 const unsigned uNodeCount = GuideTree.GetNodeCount();
\r
137 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
\r
139 if (!GuideTree.IsLeaf(uNodeIndex))
\r
141 const char *LeafName = GuideTree.GetLeafName(uNodeIndex);
\r
142 unsigned uSeqIndex;
\r
143 bool SeqFound = v.FindName(LeafName, &uSeqIndex);
\r
145 Quit("Label %s in tree does not match sequences", LeafName);
\r
146 unsigned uId = v.GetSeqIdFromName(LeafName);
\r
147 GuideTree.SetLeafId(uNodeIndex, uId);
\r
151 TreeFromSeqVect(v, GuideTree, g_Cluster1, g_Distance1, g_Root1,
\r
152 g_pstrDistMxFileName1);
\r
154 const char *Tree1 = ValueOpt("Tree1");
\r
157 TextFile f(Tree1, true);
\r
158 GuideTree.ToFile(f);
\r
159 if (g_bClusterOnly)
\r
163 SetMuscleTree(GuideTree);
\r
164 ValidateMuscleIds(GuideTree);
\r
167 ProgNode *ProgNodes = 0;
\r
169 ProgNodes = ProgressiveAlignE(v, GuideTree, msa);
\r
171 ProgressiveAlign(v, GuideTree, msa);
\r
172 SetCurrentAlignment(msa);
\r
174 if (0 != g_pstrComputeWeightsFileName)
\r
176 extern void OutWeights(const char *FileName, const MSA &msa);
\r
177 SetMSAWeightsMuscle(msa);
\r
178 OutWeights(g_pstrComputeWeightsFileName, msa);
\r
182 ValidateMuscleIds(msa);
\r
184 if (1 == g_uMaxIters || 2 == uSeqCount)
\r
186 //TextFile fileOut(g_pstrOutFileName, true);
\r
188 //msa.ToFile(fileOut);
\r
193 if (0 == g_pstrUseTreeFileName)
\r
195 g_bDiags = g_bDiags2;
\r
200 if (0 != g_uMaxTreeRefineIters)
\r
201 RefineTreeE(msa, v, GuideTree, ProgNodes);
\r
204 RefineTree(msa, GuideTree);
\r
206 const char *Tree2 = ValueOpt("Tree2");
\r
209 TextFile f(Tree2, true);
\r
210 GuideTree.ToFile(f);
\r
214 SetSeqWeightMethod(g_SeqWeight2);
\r
215 SetMuscleTree(GuideTree);
\r
218 RefineVert(msa, GuideTree, g_uMaxIters - 2);
\r
220 RefineHoriz(msa, GuideTree, g_uMaxIters - 2, false, false);
\r
223 // Refining by subfamilies is disabled as it didn't give better
\r
224 // results. I tried doing this before and after RefineHoriz.
\r
225 // Should get back to this as it seems like this should work.
\r
226 RefineSubfams(msa, GuideTree, g_uMaxIters - 2);
\r
229 ValidateMuscleIds(msa);
\r
230 ValidateMuscleIds(GuideTree);
\r
232 //TextFile fileOut(g_pstrOutFileName, true);
\r
234 //msa.ToFile(fileOut);
\r
241 Log("Started %s\n", GetTimeAsStr());
\r
242 for (int i = 0; i < g_argc; ++i)
\r
243 Log("%s ", g_argv[i]);
\r
247 TICKS t1 = GetClockTicks();
\r
251 else if (g_bRefineW)
\r
253 extern void DoRefineW();
\r
256 else if (g_bProfDB)
\r
260 else if (0 != g_pstrSPFileName)
\r
262 else if (g_bProfile)
\r
264 else if (g_bPPScore)
\r
267 ProgAlignSubFams();
\r
268 else if (g_bMakeTree)
\r
270 extern void DoMakeTree();
\r
277 extern TICKS g_ticksDP;
\r
278 extern TICKS g_ticksObjScore;
\r
279 TICKS t2 = GetClockTicks();
\r
280 TICKS TotalTicks = t2 - t1;
\r
281 TICKS ticksOther = TotalTicks - g_ticksDP - g_ticksObjScore;
\r
282 double dSecs = TicksToSecs(TotalTicks);
\r
283 double PctDP = (double) g_ticksDP*100.0/(double) TotalTicks;
\r
284 double PctOS = (double) g_ticksObjScore*100.0/(double) TotalTicks;
\r
285 double PctOther = (double) ticksOther*100.0/(double) TotalTicks;
\r
286 Log(" Ticks Secs Pct\n");
\r
287 Log(" ============ ======= =====\n");
\r
288 Log("DP %12ld %7.2f %5.1f%%\n",
\r
289 (long) g_ticksDP, TicksToSecs(g_ticksDP), PctDP);
\r
290 Log("OS %12ld %7.2f %5.1f%%\n",
\r
291 (long) g_ticksObjScore, TicksToSecs(g_ticksObjScore), PctOS);
\r
292 Log("Other %12ld %7.2f %5.1f%%\n",
\r
293 (long) ticksOther, TicksToSecs(ticksOther), PctOther);
\r
294 Log("Total %12ld %7.2f 100.0%%\n", (long) TotalTicks, dSecs);
\r
298 Log("Finished %s\n", GetTimeAsStr());
\r