+++ /dev/null
-#include <limits.h>\r
-#include <math.h>\r
-#include "muscle.h"\r
-#include "msa.h"\r
-#include "distfunc.h"\r
-#include "msa.h"\r
-#include "seqvect.h"\r
-#include "pwpath.h"\r
-\r
-// ScoreDist\r
-// E. Sonnhammer & V. Hollich, Scoredist: A simple and robust protein sequence\r
-// distance estimator, BMC Bioinformatics 2005, 6:108.\r
-\r
-extern int BLOSUM62[20][20];\r
-extern double BLOSUM62_Expected;\r
-\r
-static const double Dayhoff_CalibrationFactor = 1.3370;\r
-static const double JTT_CalibrationFactor = 1.2873;\r
-static const double MV_CalibrationFactor = 1.1775;\r
-static const double LARGE_D = 3.0;\r
-\r
-static double CalibrationFactor = JTT_CalibrationFactor;\r
-\r
-\r
-// Similarity score\r
-static double Sigma(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2,\r
- unsigned *ptrLength)\r
- {\r
- unsigned Length = 0;\r
- double Score = 0;\r
- const unsigned ColCount = msa.GetColCount();\r
- for (unsigned ColIndex = 0; ColIndex < ColCount; ++ColIndex)\r
- {\r
- unsigned Letter1 = msa.GetLetterEx(SeqIndex1, ColIndex);\r
- unsigned Letter2 = msa.GetLetterEx(SeqIndex2, ColIndex);\r
- if (Letter1 >= 20 || Letter2 >= 20)\r
- continue;\r
- ++Length;\r
- Score += BLOSUM62[Letter1][Letter2];\r
- }\r
-\r
- *ptrLength = Length;\r
- return Score;\r
- }\r
-\r
-// Normalized score\r
-static double Sigma_N(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2)\r
- {\r
- unsigned Length = UINT_MAX;\r
- double Score = Sigma(msa, SeqIndex1, SeqIndex2, &Length);\r
- double RandomScore = Length*BLOSUM62_Expected;\r
- return Score - RandomScore;\r
- }\r
-\r
-// Upper limit\r
-static double Sigma_U(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2,\r
- unsigned *ptrLength)\r
- {\r
- double Score11 = Sigma(msa, SeqIndex1, SeqIndex1, ptrLength);\r
- double Score22 = Sigma(msa, SeqIndex2, SeqIndex2, ptrLength);\r
- return (Score11 + Score22)/2;\r
- }\r
-\r
-// Normalized upper limit\r
-static double Sigma_UN(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2)\r
- {\r
- unsigned Length = UINT_MAX;\r
- double Score = Sigma_U(msa, SeqIndex1, SeqIndex2, &Length);\r
- double RandomScore = Length*BLOSUM62_Expected;\r
- return Score - RandomScore;\r
- }\r
-\r
-double GetScoreDist(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2)\r
- {\r
- if (g_Alpha != ALPHA_Amino)\r
- Quit("Scoredist is only for amino acid sequences");\r
-\r
- double s_N = Sigma_N(msa, SeqIndex1, SeqIndex2);\r
- double s_UN = Sigma_UN(msa, SeqIndex1, SeqIndex2);\r
- double d = 0.0;\r
- if (s_UN != 0)\r
- {\r
- double Ratio = s_N/s_UN;\r
- if (Ratio < 0.001)\r
- d = LARGE_D;\r
- else\r
- d = -log(Ratio);\r
- }\r
- return d*CalibrationFactor;\r
- }\r
-\r
-void DistPWScoreDist(const SeqVect &v, DistFunc &DF)\r
- {\r
- SEQWEIGHT SeqWeightSave = GetSeqWeightMethod();\r
- SetSeqWeightMethod(SEQWEIGHT_Henikoff);\r
-\r
- const unsigned uSeqCount = v.Length();\r
- DF.SetCount(uSeqCount);\r
-\r
- const unsigned uPairCount = (uSeqCount*(uSeqCount + 1))/2;\r
- unsigned uCount = 0;\r
- SetProgressDesc("PW ScoreDist");\r
- for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)\r
- {\r
- const Seq &s1 = v.GetSeq(uSeqIndex1);\r
- MSA msa1;\r
- msa1.FromSeq(s1);\r
- for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)\r
- {\r
- if (0 == uCount%20)\r
- Progress(uCount, uPairCount);\r
- ++uCount;\r
- const Seq &s2 = v.GetSeq(uSeqIndex2);\r
- MSA msa2;\r
- msa2.FromSeq(s2);\r
- \r
- PWPath Path;\r
- MSA msaOut;\r
- AlignTwoMSAs(msa1, msa2, msaOut, Path, false, false);\r
-\r
- float d = (float) GetScoreDist(msaOut, 0, 1);\r
- DF.SetDist(uSeqIndex1, uSeqIndex2, d);\r
- }\r
- }\r
- ProgressStepsDone();\r
-\r
- SetSeqWeightMethod(SeqWeightSave);\r
- }\r