Delete unneeded directory
[jabaws.git] / website / archive / binaries / mac / src / muscle / fastdistjones.cpp
diff --git a/website/archive/binaries/mac/src/muscle/fastdistjones.cpp b/website/archive/binaries/mac/src/muscle/fastdistjones.cpp
deleted file mode 100644 (file)
index bacc485..0000000
+++ /dev/null
@@ -1,206 +0,0 @@
-#include "muscle.h"\r
-#include "distfunc.h"\r
-#include "seqvect.h"\r
-#include <math.h>\r
-\r
-const unsigned TRIPLE_COUNT = 20*20*20;\r
-\r
-struct TripleCount\r
-       {\r
-       unsigned m_uSeqCount;                   // How many sequences have this triple?\r
-       unsigned short *m_Counts;               // m_Counts[s] = nr of times triple found in seq s\r
-       };\r
-static TripleCount *TripleCounts;\r
-\r
-// WARNING: Sequences MUST be stripped of gaps and upper case!\r
-void DistKmer20_3(const SeqVect &v, DistFunc &DF)\r
-       {\r
-       const unsigned uSeqCount = v.Length();\r
-\r
-       DF.SetCount(uSeqCount);\r
-       if (0 == uSeqCount)\r
-               return;\r
-       for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)\r
-               {\r
-               DF.SetDist(uSeq1, uSeq1, 0);\r
-               for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)\r
-                       DF.SetDist(uSeq1, uSeq2, 0);\r
-               }\r
-\r
-       const unsigned uTripleArrayBytes = TRIPLE_COUNT*sizeof(TripleCount);\r
-       TripleCounts = (TripleCount *) malloc(uTripleArrayBytes);\r
-       if (0 == TripleCounts)\r
-               Quit("Not enough memory (TripleCounts)");\r
-       memset(TripleCounts, 0, uTripleArrayBytes);\r
-\r
-       for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)\r
-               {\r
-               TripleCount &tc = *(TripleCounts + uWord);\r
-               const unsigned uBytes = uSeqCount*sizeof(short);\r
-               tc.m_Counts = (unsigned short *) malloc(uBytes);\r
-               memset(tc.m_Counts, 0, uBytes);\r
-               }\r
-\r
-       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-               {\r
-               Seq &s = *(v[uSeqIndex]);\r
-               const unsigned uSeqLength = s.Length();\r
-               for (unsigned uPos = 0; uPos < uSeqLength - 2; ++uPos)\r
-                       {\r
-                       const unsigned uLetter1 = CharToLetterEx(s[uPos]);\r
-                       if (uLetter1 >= 20)\r
-                               continue;\r
-                       const unsigned uLetter2 = CharToLetterEx(s[uPos+1]);\r
-                       if (uLetter2 >= 20)\r
-                               continue;\r
-                       const unsigned uLetter3 = CharToLetterEx(s[uPos+2]);\r
-                       if (uLetter3 >= 20)\r
-                               continue;\r
-\r
-                       const unsigned uWord = uLetter1 + uLetter2*20 + uLetter3*20*20;\r
-                       assert(uWord < TRIPLE_COUNT);\r
-\r
-                       TripleCount &tc = *(TripleCounts + uWord);\r
-                       const unsigned uOldCount = tc.m_Counts[uSeqIndex];\r
-                       if (0 == uOldCount)\r
-                               ++(tc.m_uSeqCount);\r
-\r
-                       ++(tc.m_Counts[uSeqIndex]);\r
-                       }\r
-               }\r
-\r
-#if TRACE\r
-       {\r
-       Log("TripleCounts\n");\r
-       unsigned uGrandTotal = 0;\r
-       for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)\r
-               {\r
-               const TripleCount &tc = *(TripleCounts + uWord);\r
-               if (0 == tc.m_uSeqCount)\r
-                       continue;\r
-\r
-               const unsigned uLetter3 = uWord/(20*20);\r
-               const unsigned uLetter2 = (uWord - uLetter3*20*20)/20;\r
-               const unsigned uLetter1 = uWord%20;\r
-               Log("Word %6u %c%c%c   %6u",\r
-                 uWord,\r
-                 LetterToCharAmino(uLetter1),\r
-                 LetterToCharAmino(uLetter2),\r
-                 LetterToCharAmino(uLetter3),\r
-                 tc.m_uSeqCount);\r
-\r
-               unsigned uSeqCountWithThisWord = 0;\r
-               for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-                       {\r
-                       const unsigned uCount = tc.m_Counts[uSeqIndex];\r
-                       if (uCount > 0)\r
-                               {\r
-                               ++uSeqCountWithThisWord;\r
-                               Log(" %u=%u", uSeqIndex, uCount);\r
-                               uGrandTotal += uCount;\r
-                               }\r
-                       }\r
-               if (uSeqCountWithThisWord != tc.m_uSeqCount)\r
-                       Log(" *** SQ ERROR *** %u %u", tc.m_uSeqCount, uSeqCountWithThisWord);\r
-               Log("\n");\r
-               }\r
-       \r
-       unsigned uTotalBySeqLength = 0;\r
-       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-               {\r
-               Seq &s = *(v[uSeqIndex]);\r
-               const unsigned uSeqLength = s.Length();\r
-               uTotalBySeqLength += uSeqLength - 2;\r
-               }\r
-       if (uGrandTotal != uTotalBySeqLength)\r
-               Log("*** TOTALS DISAGREE *** %u %u\n", uGrandTotal, uTotalBySeqLength);\r
-       }\r
-#endif\r
-\r
-       const unsigned uSeqListBytes = uSeqCount*sizeof(unsigned);\r
-       unsigned short *SeqList = (unsigned short *) malloc(uSeqListBytes);\r
-\r
-       for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)\r
-               {\r
-               const TripleCount &tc = *(TripleCounts + uWord);\r
-               if (0 == tc.m_uSeqCount)\r
-                       continue;\r
-\r
-               unsigned uSeqCountFound = 0;\r
-               memset(SeqList, 0, uSeqListBytes);\r
-\r
-               for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-                       {\r
-                       if (tc.m_Counts[uSeqIndex] > 0)\r
-                               {\r
-                               SeqList[uSeqCountFound] = uSeqIndex;\r
-                               ++uSeqCountFound;\r
-                               if (uSeqCountFound == tc.m_uSeqCount)\r
-                                       break;\r
-                               }\r
-                       }\r
-               assert(uSeqCountFound == tc.m_uSeqCount);\r
-\r
-               for (unsigned uSeq1 = 0; uSeq1 < uSeqCountFound; ++uSeq1)\r
-                       {\r
-                       const unsigned uSeqIndex1 = SeqList[uSeq1];\r
-                       const unsigned uCount1 = tc.m_Counts[uSeqIndex1];\r
-                       for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)\r
-                               {\r
-                               const unsigned uSeqIndex2 = SeqList[uSeq2];\r
-                               const unsigned uCount2 = tc.m_Counts[uSeqIndex2];\r
-                               const unsigned uMinCount = uCount1 < uCount2 ? uCount1 : uCount2;\r
-                               const double d = DF.GetDist(uSeqIndex1, uSeqIndex2);\r
-                               DF.SetDist(uSeqIndex1, uSeqIndex2, (float) (d + uMinCount));\r
-                               }\r
-                       }\r
-               }\r
-       delete[] SeqList;\r
-       free(TripleCounts);\r
-\r
-       unsigned uDone = 0;\r
-       const unsigned uTotal = (uSeqCount*(uSeqCount - 1))/2;\r
-       for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)\r
-               {\r
-               DF.SetDist(uSeq1, uSeq1, 0.0);\r
-\r
-               const Seq &s1 = *(v[uSeq1]);\r
-               const unsigned uLength1 = s1.Length();\r
-\r
-               for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)\r
-                       {\r
-                       const Seq &s2 = *(v[uSeq2]);\r
-                       const unsigned uLength2 = s2.Length();\r
-                       unsigned uMinLength = uLength1 < uLength2 ? uLength1 : uLength2;\r
-                       if (uMinLength < 3)\r
-                               {\r
-                               DF.SetDist(uSeq1, uSeq2, 1.0);\r
-                               continue;\r
-                               }\r
-\r
-                       const double dTripleCount = DF.GetDist(uSeq1, uSeq2);\r
-                       if (dTripleCount == 0)\r
-                               {\r
-                               DF.SetDist(uSeq1, uSeq2, 1.0);\r
-                               continue;\r
-                               }\r
-                       double dNormalizedTripletScore = dTripleCount/(uMinLength - 2);\r
-                       //double dEstimatedPairwiseIdentity = exp(0.3912*log(dNormalizedTripletScore));\r
-                       //if (dEstimatedPairwiseIdentity > 1)\r
-                       //      dEstimatedPairwiseIdentity = 1;\r
-//                     DF.SetDist(uSeq1, uSeq2, (float) (1.0 - dEstimatedPairwiseIdentity));\r
-                       DF.SetDist(uSeq1, uSeq2, (float) dNormalizedTripletScore);\r
-\r
-#if    TRACE\r
-                       {\r
-                       Log("%s - %s  Triplet count = %g  Lengths %u, %u Estimated pwid = %g\n",\r
-                         s1.GetName(), s2.GetName(), dTripleCount, uLength1, uLength2,\r
-                         dEstimatedPairwiseIdentity);\r
-                       }\r
-#endif\r
-                       if (uDone%1000 == 0)\r
-                               Progress(uDone, uTotal);\r
-                       }\r
-               }\r
-       ProgressStepsDone();\r
-       }\r