5 #include "distfunc.h"
\r
11 // E. Sonnhammer & V. Hollich, Scoredist: A simple and robust protein sequence
\r
12 // distance estimator, BMC Bioinformatics 2005, 6:108.
\r
14 extern int BLOSUM62[20][20];
\r
15 extern double BLOSUM62_Expected;
\r
17 static const double Dayhoff_CalibrationFactor = 1.3370;
\r
18 static const double JTT_CalibrationFactor = 1.2873;
\r
19 static const double MV_CalibrationFactor = 1.1775;
\r
20 static const double LARGE_D = 3.0;
\r
22 static double CalibrationFactor = JTT_CalibrationFactor;
\r
26 static double Sigma(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2,
\r
27 unsigned *ptrLength)
\r
29 unsigned Length = 0;
\r
31 const unsigned ColCount = msa.GetColCount();
\r
32 for (unsigned ColIndex = 0; ColIndex < ColCount; ++ColIndex)
\r
34 unsigned Letter1 = msa.GetLetterEx(SeqIndex1, ColIndex);
\r
35 unsigned Letter2 = msa.GetLetterEx(SeqIndex2, ColIndex);
\r
36 if (Letter1 >= 20 || Letter2 >= 20)
\r
39 Score += BLOSUM62[Letter1][Letter2];
\r
42 *ptrLength = Length;
\r
47 static double Sigma_N(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2)
\r
49 unsigned Length = UINT_MAX;
\r
50 double Score = Sigma(msa, SeqIndex1, SeqIndex2, &Length);
\r
51 double RandomScore = Length*BLOSUM62_Expected;
\r
52 return Score - RandomScore;
\r
56 static double Sigma_U(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2,
\r
57 unsigned *ptrLength)
\r
59 double Score11 = Sigma(msa, SeqIndex1, SeqIndex1, ptrLength);
\r
60 double Score22 = Sigma(msa, SeqIndex2, SeqIndex2, ptrLength);
\r
61 return (Score11 + Score22)/2;
\r
64 // Normalized upper limit
\r
65 static double Sigma_UN(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2)
\r
67 unsigned Length = UINT_MAX;
\r
68 double Score = Sigma_U(msa, SeqIndex1, SeqIndex2, &Length);
\r
69 double RandomScore = Length*BLOSUM62_Expected;
\r
70 return Score - RandomScore;
\r
73 double GetScoreDist(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2)
\r
75 if (g_Alpha != ALPHA_Amino)
\r
76 Quit("Scoredist is only for amino acid sequences");
\r
78 double s_N = Sigma_N(msa, SeqIndex1, SeqIndex2);
\r
79 double s_UN = Sigma_UN(msa, SeqIndex1, SeqIndex2);
\r
83 double Ratio = s_N/s_UN;
\r
89 return d*CalibrationFactor;
\r
92 void DistPWScoreDist(const SeqVect &v, DistFunc &DF)
\r
94 SEQWEIGHT SeqWeightSave = GetSeqWeightMethod();
\r
95 SetSeqWeightMethod(SEQWEIGHT_Henikoff);
\r
97 const unsigned uSeqCount = v.Length();
\r
98 DF.SetCount(uSeqCount);
\r
100 const unsigned uPairCount = (uSeqCount*(uSeqCount + 1))/2;
\r
101 unsigned uCount = 0;
\r
102 SetProgressDesc("PW ScoreDist");
\r
103 for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
\r
105 const Seq &s1 = v.GetSeq(uSeqIndex1);
\r
108 for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)
\r
110 if (0 == uCount%20)
\r
111 Progress(uCount, uPairCount);
\r
113 const Seq &s2 = v.GetSeq(uSeqIndex2);
\r
119 AlignTwoMSAs(msa1, msa2, msaOut, Path, false, false);
\r
121 float d = (float) GetScoreDist(msaOut, 0, 1);
\r
122 DF.SetDist(uSeqIndex1, uSeqIndex2, d);
\r
125 ProgressStepsDone();
\r
127 SetSeqWeightMethod(SeqWeightSave);
\r