15 #ifdef _MSC_VER // Miscrosoft compiler
\r
16 #pragma warning(disable : 4800) // int-bool conversion
\r
17 #pragma warning(disable : 4996) // deprecated names like strdup, isatty.
\r
20 extern const char *MUSCLE_LONG_VERSION;
\r
21 #define SHORT_VERSION "3.8"
\r
29 #define DOUBLE_AFFINE 0
\r
30 #define SINGLE_AFFINE 1
\r
34 #include "intmath.h"
\r
39 #define stricmp strcasecmp
\r
40 #define strnicmp strncasecmp
\r
41 #define _snprintf snprintf
\r
42 #define _fsopen(name, mode, share) fopen((name), (mode))
\r
47 #define assert(b) Call_MY_ASSERT(__FILE__, __LINE__, b, #b)
\r
48 void Call_MY_ASSERT(const char *file, int line, bool b, const char *msg);
\r
50 #define assert(exp) ((void)0)
\r
54 extern char **g_argv;
\r
56 #define Rotate(a, b, c) { SCORE *tmp = a; a = b; b = c; c = tmp; }
\r
58 const double VERY_LARGE_DOUBLE = 1e20;
\r
60 extern unsigned g_uTreeSplitNode1;
\r
61 extern unsigned g_uTreeSplitNode2;
\r
63 // Number of elements in array a[]
\r
64 #define countof(a) (sizeof(a)/sizeof(a[0]))
\r
66 // Maximum of two of any type
\r
67 #define Max2(a, b) ((a) > (b) ? (a) : (b))
\r
69 // Maximum of three of any type
\r
70 #define Max3(a, b, c) Max2(Max2(a, b), c)
\r
72 // Minimum of two of any type
\r
73 #define Min2(a, b) ((a) < (b) ? (a) : (b))
\r
75 // Maximum of four of any type
\r
76 #define Max4(a, b, c, d) Max2(Max2(a, b), Max2(c, d))
\r
78 const double VERY_NEGATIVE_DOUBLE = -9e29;
\r
79 const float VERY_NEGATIVE_FLOAT = (float) -9e29;
\r
81 const double BLOSUM_DIST = 0.62; // todo settable
\r
83 // insane value for uninitialized variables
\r
84 const unsigned uInsane = 8888888;
\r
85 const int iInsane = 8888888;
\r
86 const SCORE scoreInsane = 8888888;
\r
87 const char cInsane = (char) 0xcd; // int 3 instruction, used e.g. for unint. memory
\r
88 const double dInsane = VERY_NEGATIVE_DOUBLE;
\r
89 const float fInsane = VERY_NEGATIVE_FLOAT;
\r
90 const char INVALID_STATE = '*';
\r
91 const BASETYPE BTInsane = (BASETYPE) dInsane;
\r
92 const WEIGHT wInsane = BTInsane;
\r
94 extern double g_dNAN;
\r
96 extern unsigned long g_tStart;
\r
98 void Quit(const char szFormat[], ...);
\r
99 void Warning(const char szFormat[], ...);
\r
100 void TrimBlanks(char szStr[]);
\r
101 void TrimLeadingBlanks(char szStr[]);
\r
102 void TrimTrailingBlanks(char szStr[]);
\r
103 void Log(const char szFormat[], ...);
\r
105 const char *ScoreToStr(SCORE Score);
\r
106 const char *ScoreToStrL(SCORE Score);
\r
107 SCORE StrToScore(const char *pszStr);
\r
110 double VecSum(const double v[], unsigned n);
\r
111 bool IsValidInteger(const char *Str);
\r
112 bool IsValidSignedInteger(const char *Str);
\r
113 bool IsValidIdentifier(const char *Str);
\r
114 bool IsValidFloatChar(char c);
\r
115 bool isident(char c);
\r
116 bool isidentf(char c);
\r
118 void TreeFromSeqVect(const SeqVect &c, Tree &tree, CLUSTER Cluster,
\r
119 DISTANCE Distance, ROOT Root, const char *SaveFileName = 0);
\r
120 void TreeFromMSA(const MSA &msa, Tree &tree, CLUSTER Cluster,
\r
121 DISTANCE Distance, ROOT Root, const char *SaveFileName = 0);
\r
123 void StripGaps(char szStr[]);
\r
124 void StripWhitespace(char szStr[]);
\r
125 const char *GetTimeAsStr();
\r
126 unsigned CalcBLOSUMWeights(MSA &Aln, ClusterTree &BlosumCluster);
\r
127 void CalcGSCWeights(MSA &Aln, const ClusterTree &BlosumCluster);
\r
128 void AssertNormalized(const PROB p[]);
\r
129 void AssertNormalizedOrZero(const PROB p[]);
\r
130 void AssertNormalized(const double p[]);
\r
131 bool VectorIsZero(const double dValues[], unsigned n);
\r
132 void VectorSet(double dValues[], unsigned n, double d);
\r
133 bool VectorIsZero(const float dValues[], unsigned n);
\r
134 void VectorSet(float dValues[], unsigned n, float d);
\r
136 // @@TODO should be "not linux"
\r
138 double log2(double x); // Defined in <math.h> on Linux
\r
141 double pow2(double x);
\r
142 double lnTolog2(double ln);
\r
144 double lp2(double x);
\r
145 SCORE SumLog(SCORE x, SCORE y);
\r
146 SCORE SumLog(SCORE x, SCORE y, SCORE z);
\r
147 SCORE SumLog(SCORE w, SCORE x, SCORE y, SCORE z);
\r
149 double lp2Fast(double x);
\r
150 double SumLogFast(double x, double y);
\r
151 double SumLogFast(double x, double y, double z);
\r
152 double SumLogFast(double w, double x, double y, double z);
\r
154 void chkmem(const char szMsg[] = "");
\r
156 void Normalize(PROB p[], unsigned n);
\r
157 void Normalize(PROB p[], unsigned n, double dRequiredTotal);
\r
158 void NormalizeUnlessZero(PROB p[], unsigned n);
\r
160 void DebugPrintf(const char szFormat[], ...);
\r
161 void SetListFileName(const char *ptrListFileName, bool bAppend);
\r
162 void ModelFromAlign(const char *strInputFileName, const char *strModelFileName,
\r
164 double GetMemUseMB();
\r
165 double GetRAMSizeMB();
\r
166 double GetPeakMemUseMB();
\r
167 void CheckMemUse();
\r
168 const char *ElapsedTimeAsString();
\r
169 char *SecsToHHMMSS(long lSecs, char szStr[]);
\r
170 double GetCPUGHz();
\r
171 SCORE GetBlosum62(unsigned uLetterA, unsigned uLetterB);
\r
172 SCORE GetBlosum62d(unsigned uLetterA, unsigned uLetterB);
\r
173 SCORE GetBlosum50(unsigned uLetterA, unsigned uLetterB);
\r
174 void AssertNormalizedDist(const PROB p[], unsigned N);
\r
175 void CmdLineError(const char *Format, ...);
\r
176 void Fatal(const char *Format, ...);
\r
178 void ExecCommandLine(int argc, char *argv[]);
\r
181 void NameFromPath(const char szPath[], char szName[], unsigned uBytes);
\r
182 char *strsave(const char *s);
\r
183 void DistKmer20_3(const SeqVect &v, DistFunc &DF);
\r
184 void DistKbit20_3(const SeqVect &v, DistFunc &DF);
\r
185 void DistKmer6_6(const SeqVect &v, DistFunc &DF);
\r
186 void DistKmer4_6(const SeqVect &v, DistFunc &DF);
\r
187 void DistPWKimura(const SeqVect &v, DistFunc &DF);
\r
188 void FastDistKmer(const SeqVect &v, DistFunc &DF);
\r
189 void DistUnaligned(const SeqVect &v, DISTANCE DistMethod, DistFunc &DF);
\r
190 double PctIdToMAFFTDist(double dPctId);
\r
191 double KimuraDist(double dPctId);
\r
192 void SetFastParams();
\r
193 void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
\r
194 unsigned uLengthB);
\r
195 void ValidateMuscleIds(const MSA &msa);
\r
196 void ValidateMuscleIds(const Tree &tree);
\r
197 void TraceBackToPath(int **TraceBack, unsigned uLengthA,
\r
198 unsigned uLengthB, PWPath &Path);
\r
199 void BitTraceBack(char **TraceBack, unsigned uLengthA, unsigned uLengthB,
\r
200 char LastEdge, PWPath &Path);
\r
201 SCORE AlignTwoMSAs(const MSA &msa1, const MSA &msa2, MSA &msaOut, PWPath &Path,
\r
202 bool bLockLeft = false, bool bLockRight = false);
\r
203 SCORE AlignTwoProfs(
\r
204 const ProfPos *PA, unsigned uLengthA, WEIGHT wA,
\r
205 const ProfPos *PB, unsigned uLengthB, WEIGHT wB,
\r
206 PWPath &Path, ProfPos **ptrPout, unsigned *ptruLengthOut);
\r
207 void AlignTwoProfsGivenPath(const PWPath &Path,
\r
208 const ProfPos *PA, unsigned uLengthA, WEIGHT wA,
\r
209 const ProfPos *PB, unsigned uLengthB, WEIGHT wB,
\r
210 ProfPos **ptrPOut, unsigned *ptruLengthOut);
\r
211 void AlignTwoMSAsGivenPathSW(const PWPath &Path, const MSA &msaA, const MSA &msaB,
\r
213 void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,
\r
215 SCORE FastScorePath2(const ProfPos *PA, unsigned uLengthA,
\r
216 const ProfPos *PB, unsigned uLengthB, const PWPath &Path);
\r
217 SCORE GlobalAlignDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
\r
218 unsigned uLengthB, PWPath &Path);
\r
219 SCORE GlobalAlignSimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
\r
220 unsigned uLengthB, PWPath &Path);
\r
221 SCORE GlobalAlignSP(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
\r
222 unsigned uLengthB, PWPath &Path);
\r
223 SCORE GlobalAlignSPN(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
\r
224 unsigned uLengthB, PWPath &Path);
\r
225 SCORE GlobalAlignLE(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
\r
226 unsigned uLengthB, PWPath &Path);
\r
227 void CalcThreeWayWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,
\r
229 SCORE GlobalAlignSS(const Seq &seqA, const Seq &seqB, PWPath &Path);
\r
230 bool RefineHoriz(MSA &msaIn, const Tree &tree, unsigned uIters, bool bLockLeft, bool bLockRight);
\r
231 bool RefineVert(MSA &msaIn, const Tree &tree, unsigned uIters);
\r
232 SCORE GlobalAlignNoDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
\r
233 unsigned uLengthB, PWPath &Path);
\r
235 void SetInputFileName(const char *pstrFileName);
\r
236 void SetIter(unsigned uIter);
\r
238 void SetMaxIters(unsigned uMaxIters);
\r
239 void Progress(unsigned uStep, unsigned uTotalSteps);
\r
240 void Progress(const char *szFormat, ...);
\r
241 void SetStartTime();
\r
242 void ProgressStepsDone();
\r
243 void SetProgressDesc(const char szDesc[]);
\r
244 void SetSeqStats(unsigned uSeqCount, unsigned uMaxL, unsigned uAvgL);
\r
246 void SetNewHandler();
\r
247 void SaveCurrentAlignment();
\r
248 void SetCurrentAlignment(MSA &msa);
\r
249 void SetOutputFileName(const char *out);
\r
252 void SetMuscleSeqVect(SeqVect &v);
\r
253 void SetMuscleInputMSA(MSA &msa);
\r
254 void ValidateMuscleIds(const MSA &msa);
\r
255 void ValidateMuscleIds(const Tree &tree);
\r
257 #define SetMuscleSeqVect(x) /* empty */
\r
258 #define SetMuscleInputMSA(x) /* empty */
\r
259 #define ValidateMuscleIds(x) /* empty */
\r
262 void ProcessArgVect(int argc, char *argv[]);
\r
263 void ProcessArgStr(const char *Str);
\r
267 void SortCounts(const FCOUNT fcCounts[], unsigned SortOrder[]);
\r
268 unsigned ResidueGroupFromFCounts(const FCOUNT fcCounts[]);
\r
269 FCOUNT SumCounts(const FCOUNT Counts[]);
\r
271 bool FlagOpt(const char *Name);
\r
272 const char *ValueOpt(const char *Name);
\r
276 void ProgAlignSubFams();
\r
279 void OnException();
\r
280 void SetSeqWeightMethod(SEQWEIGHT Method);
\r
281 SEQWEIGHT GetSeqWeightMethod();
\r
282 WEIGHT GetMuscleSeqWeightById(unsigned uId);
\r
283 void ListDiagSavings();
\r
284 void CheckMaxTime();
\r
285 const char *MaxSecsToStr();
\r
286 unsigned long GetStartTime();
\r
288 void ProgressiveAlign(const SeqVect &v, const Tree &GuideTree, MSA &a);
\r
289 ProgNode *ProgressiveAlignE(const SeqVect &v, const Tree &GuideTree, MSA &a);
\r
291 void CalcDistRangeKmer6_6(const MSA &msa, unsigned uRow, float Dist[]);
\r
292 void CalcDistRangeKmer20_3(const MSA &msa, unsigned uRow, float Dist[]);
\r
293 void CalcDistRangeKmer20_4(const MSA &msa, unsigned uRow, float Dist[]);
\r
294 void CalcDistRangePctIdKimura(const MSA &msa, unsigned uRow, float Dist[]);
\r
295 void CalcDistRangePctIdLog(const MSA &msa, unsigned uRow, float Dist[]);
\r
297 void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[], MSA &a);
\r
298 void MakeRootMSABrenner(SeqVect &v, const Tree &GuideTree, ProgNode Nodes[], MSA &a);
\r
304 void UPGMA2(const DistCalc &DC, Tree &tree, LINKAGE Linkage);
\r
306 char *GetFastaSeq(FILE *f, unsigned *ptrSeqLength, char **ptrLabel,
\r
307 bool DeleteGaps = true);
\r
308 SCORE SW(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
\r
309 unsigned uLengthB, PWPath &Path);
\r
310 void TraceBackSW(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
\r
311 unsigned uLengthB, const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
\r
312 unsigned uPrefixLengthAMax, unsigned uPrefixLengthBMax, PWPath &Path);
\r
313 void DiffPaths(const PWPath &p1, const PWPath &p2, unsigned Edges1[],
\r
314 unsigned *ptruDiffCount1, unsigned Edges2[], unsigned *ptruDiffCount2);
\r
315 void SetPPScore(bool bRespectFlagOpts = true);
\r
316 void SetPPScore(PPSCORE p);
\r
317 SCORE GlobalAlignDimer(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
\r
318 unsigned uLengthB, PWPath &Path);
\r
319 bool MissingCommand();
\r
321 void ProfileProfile(MSA &msa1, MSA &msa2, MSA &msaOut);
\r
322 void MHackStart(SeqVect &v);
\r
323 void MHackEnd(MSA &msa);
\r
324 void WriteScoreFile(const MSA &msa);
\r
325 char ConsensusChar(const ProfPos &PP);
\r
326 void Stabilize(const MSA &msa, MSA &msaStable);
\r
327 void MuscleOutput(MSA &msa);
\r
328 PTR_SCOREMATRIX ReadMx(TextFile &File);
\r
329 void MemPlus(size_t Bytes, char *Where);
\r
330 void MemMinus(size_t Bytes, char *Where);
\r