Delete unneeded directory
[jabaws.git] / website / archive / binaries / mac / src / muscle / fastdistnuc.cpp
diff --git a/website/archive/binaries/mac/src/muscle/fastdistnuc.cpp b/website/archive/binaries/mac/src/muscle/fastdistnuc.cpp
deleted file mode 100644 (file)
index a7a7e7f..0000000
+++ /dev/null
@@ -1,265 +0,0 @@
-#include "muscle.h"\r
-#include "distfunc.h"\r
-#include "seqvect.h"\r
-#include <math.h>\r
-\r
-#define TRACE 0\r
-\r
-#define MIN(x, y)      (((x) < (y)) ? (x) : (y))\r
-#define MAX(x, y)      (((x) > (y)) ? (x) : (y))\r
-\r
-const unsigned TUPLE_COUNT = 6*6*6*6*6*6;\r
-static unsigned char Count1[TUPLE_COUNT];\r
-static unsigned char Count2[TUPLE_COUNT];\r
-\r
-// Nucleotide groups according to MAFFT (sextet5)\r
-// 0 =  A\r
-// 1 =  C\r
-// 2 =  G\r
-// 3 =  T\r
-// 4 =  other\r
-\r
-static unsigned ResidueGroup[] =\r
-       {\r
-       0,              // NX_A,\r
-       1,              // NX_C,\r
-       2,              // NX_G,\r
-       3,              // NX_T/U\r
-       4,              // NX_N,\r
-       4,              // NX_R,\r
-       4,              // NX_Y,\r
-       4,              // NX_GAP\r
-       };\r
-static unsigned uResidueGroupCount = sizeof(ResidueGroup)/sizeof(ResidueGroup[0]);\r
-\r
-static char *TupleToStr(int t)\r
-       {\r
-       static char s[7];\r
-       int t1, t2, t3, t4, t5, t6;\r
-\r
-       t1 = t%6;\r
-       t2 = (t/6)%6;\r
-       t3 = (t/(6*6))%6;\r
-       t4 = (t/(6*6*6))%6;\r
-       t5 = (t/(6*6*6*6))%6;\r
-       t6 = (t/(6*6*6*6*6))%6;\r
-\r
-       s[5] = '0' + t1;\r
-       s[4] = '0' + t2;\r
-       s[3] = '0' + t3;\r
-       s[2] = '0' + t4;\r
-       s[1] = '0' + t5;\r
-       s[0] = '0' + t6;\r
-       return s;\r
-       }\r
-\r
-static unsigned GetTuple(const unsigned uLetters[], unsigned n)\r
-       {\r
-       assert(uLetters[n] < uResidueGroupCount);\r
-       assert(uLetters[n+1] < uResidueGroupCount);\r
-       assert(uLetters[n+2] < uResidueGroupCount);\r
-       assert(uLetters[n+3] < uResidueGroupCount);\r
-       assert(uLetters[n+4] < uResidueGroupCount);\r
-       assert(uLetters[n+5] < uResidueGroupCount);\r
-\r
-       unsigned u1 = ResidueGroup[uLetters[n]];\r
-       unsigned u2 = ResidueGroup[uLetters[n+1]];\r
-       unsigned u3 = ResidueGroup[uLetters[n+2]];\r
-       unsigned u4 = ResidueGroup[uLetters[n+3]];\r
-       unsigned u5 = ResidueGroup[uLetters[n+4]];\r
-       unsigned u6 = ResidueGroup[uLetters[n+5]];\r
-\r
-       return u6 + u5*6 + u4*6*6 + u3*6*6*6 + u2*6*6*6*6 + u1*6*6*6*6*6;\r
-       }\r
-\r
-static void CountTuples(const unsigned L[], unsigned uTupleCount, unsigned char Count[])\r
-       {\r
-       memset(Count, 0, TUPLE_COUNT*sizeof(unsigned char));\r
-       for (unsigned n = 0; n < uTupleCount; ++n)\r
-               {\r
-               const unsigned uTuple = GetTuple(L, n);\r
-               ++(Count[uTuple]);\r
-               }\r
-       }\r
-\r
-static void ListCount(const unsigned char Count[])\r
-       {\r
-       for (unsigned n = 0; n < TUPLE_COUNT; ++n)\r
-               {\r
-               if (0 == Count[n])\r
-                       continue;\r
-               Log("%s  %u\n", TupleToStr(n), Count[n]);\r
-               }\r
-       }\r
-\r
-void DistKmer4_6(const SeqVect &v, DistFunc &DF)\r
-       {\r
-       if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)\r
-               Quit("DistKmer4_6 requires nucleo alphabet");\r
-\r
-       const unsigned uSeqCount = v.Length();\r
-\r
-       DF.SetCount(uSeqCount);\r
-       if (0 == uSeqCount)\r
-               return;\r
-\r
-// Initialize distance matrix to zero\r
-       for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)\r
-               {\r
-               DF.SetDist(uSeq1, uSeq1, 0);\r
-               for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)\r
-                       DF.SetDist(uSeq1, uSeq2, 0);\r
-               }\r
-\r
-// Convert to letters\r
-       unsigned **Letters = new unsigned *[uSeqCount];\r
-       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-               {\r
-               Seq &s = *(v[uSeqIndex]);\r
-               const unsigned uSeqLength = s.Length();\r
-               unsigned *L = new unsigned[uSeqLength];\r
-               Letters[uSeqIndex] = L;\r
-               for (unsigned n = 0; n < uSeqLength; ++n)\r
-                       {\r
-                       char c = s[n];\r
-                       L[n] = CharToLetterEx(c);\r
-                       if (L[n] >= 4)\r
-                               L[n] = 4;\r
-                       }\r
-               }\r
-\r
-       unsigned **uCommonTupleCount = new unsigned *[uSeqCount];\r
-       for (unsigned n = 0; n < uSeqCount; ++n)\r
-               {\r
-               uCommonTupleCount[n] = new unsigned[uSeqCount];\r
-               memset(uCommonTupleCount[n], 0, uSeqCount*sizeof(unsigned));\r
-               }\r
-\r
-       const unsigned uPairCount = (uSeqCount*(uSeqCount + 1))/2;\r
-       unsigned uCount = 0;\r
-       for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)\r
-               {\r
-               Seq &seq1 = *(v[uSeq1]);\r
-               const unsigned uSeqLength1 = seq1.Length();\r
-               if (uSeqLength1 < 5)\r
-                       continue;\r
-\r
-               const unsigned uTupleCount = uSeqLength1 - 5;\r
-               const unsigned *L = Letters[uSeq1];\r
-               CountTuples(L, uTupleCount, Count1);\r
-#if    TRACE\r
-               {\r
-               Log("Seq1=%d\n", uSeq1);\r
-               Log("Groups:\n");\r
-               for (unsigned n = 0; n < uSeqLength1; ++n)\r
-                       Log("%u", ResidueGroup[L[n]]);\r
-               Log("\n");\r
-\r
-               Log("Tuples:\n");\r
-               ListCount(Count1);\r
-               }\r
-#endif\r
-\r
-               SetProgressDesc("K-mer dist pass 1");\r
-               for (unsigned uSeq2 = 0; uSeq2 <= uSeq1; ++uSeq2)\r
-                       {\r
-                       if (0 == uCount%500)\r
-                               Progress(uCount, uPairCount);\r
-                       ++uCount;\r
-                       Seq &seq2 = *(v[uSeq2]);\r
-                       const unsigned uSeqLength2 = seq2.Length();\r
-                       if (uSeqLength2 < 5)\r
-                               {\r
-                               if (uSeq1 == uSeq2)\r
-                                       DF.SetDist(uSeq1, uSeq2, 0);\r
-                               else\r
-                                       DF.SetDist(uSeq1, uSeq2, 1);\r
-                               continue;\r
-                               }\r
-\r
-               // First pass through seq 2 to count tuples\r
-                       const unsigned uTupleCount = uSeqLength2 - 5;\r
-                       const unsigned *L = Letters[uSeq2];\r
-                       CountTuples(L, uTupleCount, Count2);\r
-#if    TRACE\r
-                       Log("Seq2=%d Counts=\n", uSeq2);\r
-                       ListCount(Count2);\r
-#endif\r
-\r
-               // Second pass to accumulate sum of shared tuples\r
-               // MAFFT defines this as the sum over unique tuples\r
-               // in seq2 of the minimum of the number of tuples found\r
-               // in the two sequences.\r
-                       unsigned uSum = 0;\r
-                       for (unsigned n = 0; n < uTupleCount; ++n)\r
-                               {\r
-                               const unsigned uTuple = GetTuple(L, n);\r
-                               uSum += MIN(Count1[uTuple], Count2[uTuple]);\r
-\r
-                       // This is a hack to make sure each unique tuple counted only once.\r
-                               Count2[uTuple] = 0;\r
-                               }\r
-#if    TRACE\r
-                       {\r
-                       Seq &s1 = *(v[uSeq1]);\r
-                       Seq &s2 = *(v[uSeq2]);\r
-                       const char *pName1 = s1.GetName();\r
-                       const char *pName2 = s2.GetName();\r
-                       Log("Common count %s(%d) - %s(%d) =%u\n",\r
-                         pName1, uSeq1, pName2, uSeq2, uSum);\r
-                       }\r
-#endif\r
-                       uCommonTupleCount[uSeq1][uSeq2] = uSum;\r
-                       uCommonTupleCount[uSeq2][uSeq1] = uSum;\r
-                       }\r
-               }\r
-       ProgressStepsDone();\r
-\r
-       uCount = 0;\r
-       SetProgressDesc("K-mer dist pass 2");\r
-       for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)\r
-               {\r
-               Seq &s1 = *(v[uSeq1]);\r
-               const char *pName1 = s1.GetName();\r
-\r
-               double dCommonTupleCount11 = uCommonTupleCount[uSeq1][uSeq1];\r
-               if (0 == dCommonTupleCount11)\r
-                       dCommonTupleCount11 = 1;\r
-\r
-               DF.SetDist(uSeq1, uSeq1, 0);\r
-               for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)\r
-                       {\r
-                       if (0 == uCount%500)\r
-                               Progress(uCount, uPairCount);\r
-                       ++uCount;\r
-\r
-                       double dCommonTupleCount22 = uCommonTupleCount[uSeq2][uSeq2];\r
-                       if (0 == dCommonTupleCount22)\r
-                               dCommonTupleCount22 = 1;\r
-\r
-                       const double dDist1 = 3.0*(dCommonTupleCount11 - uCommonTupleCount[uSeq1][uSeq2])\r
-                         /dCommonTupleCount11;\r
-                       const double dDist2 = 3.0*(dCommonTupleCount22 - uCommonTupleCount[uSeq1][uSeq2])\r
-                         /dCommonTupleCount22;\r
-\r
-               // dMinDist is the value used for tree-building in MAFFT\r
-                       const double dMinDist = MIN(dDist1, dDist2);\r
-                       DF.SetDist(uSeq1, uSeq2, (float) dMinDist);\r
-\r
-                       //const double dEstimatedPctId = TupleDistToEstimatedPctId(dMinDist);\r
-                       //g_dfPwId.SetDist(uSeq1, uSeq2, dEstimatedPctId);\r
-               // **** TODO **** why does this make score slightly worse??\r
-                       //const double dKimuraDist = KimuraDist(dEstimatedPctId);\r
-                       //DF.SetDist(uSeq1, uSeq2, dKimuraDist);\r
-                       }\r
-               }\r
-       ProgressStepsDone();\r
-\r
-       for (unsigned n = 0; n < uSeqCount; ++n)\r
-               {\r
-               delete[] uCommonTupleCount[n];\r
-               delete[] Letters[n];\r
-               }\r
-       delete[] uCommonTupleCount;\r
-       delete[] Letters;\r
-       }\r