Delete unneeded directory
[jabaws.git] / website / archive / binaries / mac / src / muscle / objscore2.cpp
diff --git a/website/archive/binaries/mac/src/muscle/objscore2.cpp b/website/archive/binaries/mac/src/muscle/objscore2.cpp
deleted file mode 100644 (file)
index bac5b5a..0000000
+++ /dev/null
@@ -1,522 +0,0 @@
-#include "muscle.h"\r
-#include "msa.h"\r
-#include "profile.h"\r
-#include "objscore.h"\r
-\r
-#define TRACE                  0\r
-#define TRACE_SEQPAIR  0\r
-#define TEST_SPFAST            0\r
-\r
-extern SCOREMATRIX VTML_LA;\r
-extern SCOREMATRIX PAM200;\r
-extern SCOREMATRIX PAM200NoCenter;\r
-extern SCOREMATRIX VTML_SP;\r
-extern SCOREMATRIX VTML_SPNoCenter;\r
-extern SCOREMATRIX NUC_SP;\r
-\r
-SCORE g_SPScoreLetters;\r
-SCORE g_SPScoreGaps;\r
-\r
-static SCORE TermGapScore(bool Gap)\r
-       {\r
-       switch (g_TermGaps)\r
-               {\r
-       case TERMGAPS_Full:\r
-               return 0;\r
-\r
-       case TERMGAPS_Half:\r
-               if (Gap)\r
-                       return g_scoreGapOpen/2;\r
-               return 0;\r
-\r
-       case TERMGAPS_Ext:\r
-               if (Gap)\r
-                       return g_scoreGapExtend;\r
-               return 0;\r
-               }\r
-       Quit("TermGapScore?!");\r
-       return 0;\r
-       }\r
-\r
-SCORE ScoreSeqPairLetters(const MSA &msa1, unsigned uSeqIndex1,\r
-  const MSA &msa2, unsigned uSeqIndex2)\r
-       {\r
-       const unsigned uColCount = msa1.GetColCount();\r
-       const unsigned uColCount2 = msa2.GetColCount();\r
-       if (uColCount != uColCount2)\r
-               Quit("ScoreSeqPairLetters, different lengths");\r
-\r
-#if    TRACE_SEQPAIR\r
-       {\r
-       Log("\n");\r
-       Log("ScoreSeqPairLetters\n");\r
-       MSA msaTmp;\r
-       msaTmp.SetSize(2, uColCount);\r
-       msaTmp.CopySeq(0, msa1, uSeqIndex1);\r
-       msaTmp.CopySeq(1, msa2, uSeqIndex2);\r
-       msaTmp.LogMe();\r
-       }\r
-#endif\r
-\r
-       SCORE scoreLetters = 0;\r
-       SCORE scoreGaps = 0;\r
-       bool bGapping1 = false;\r
-       bool bGapping2 = false;\r
-\r
-       unsigned uColStart = 0;\r
-       bool bLeftTermGap = false;\r
-       for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
-               {\r
-               bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);\r
-               bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);\r
-               if (!bGap1 || !bGap2)\r
-                       {\r
-                       if (bGap1 || bGap2)\r
-                               bLeftTermGap = true;\r
-                       uColStart = uColIndex;\r
-                       break;\r
-                       }\r
-               }\r
-\r
-       unsigned uColEnd = uColCount - 1;\r
-       bool bRightTermGap = false;\r
-       for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)\r
-               {\r
-               bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);\r
-               bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);\r
-               if (!bGap1 || !bGap2)\r
-                       {\r
-                       if (bGap1 || bGap2)\r
-                               bRightTermGap = true;\r
-                       uColEnd = (unsigned) iColIndex;\r
-                       break;\r
-                       }\r
-               }\r
-\r
-#if    TRACE_SEQPAIR\r
-       Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);\r
-#endif\r
-\r
-       for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)\r
-               {\r
-               unsigned uLetter1 = msa1.GetLetterEx(uSeqIndex1, uColIndex);\r
-               if (uLetter1 >= g_AlphaSize)\r
-                       continue;\r
-               unsigned uLetter2 = msa2.GetLetterEx(uSeqIndex2, uColIndex);\r
-               if (uLetter2 >= g_AlphaSize)\r
-                       continue;\r
-\r
-               SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2];\r
-               scoreLetters += scoreMatch;\r
-               }\r
-       return scoreLetters;\r
-       }\r
-\r
-SCORE ScoreSeqPairGaps(const MSA &msa1, unsigned uSeqIndex1,\r
-  const MSA &msa2, unsigned uSeqIndex2)\r
-       {\r
-       const unsigned uColCount = msa1.GetColCount();\r
-       const unsigned uColCount2 = msa2.GetColCount();\r
-       if (uColCount != uColCount2)\r
-               Quit("ScoreSeqPairGaps, different lengths");\r
-\r
-#if    TRACE_SEQPAIR\r
-       {\r
-       Log("\n");\r
-       Log("ScoreSeqPairGaps\n");\r
-       MSA msaTmp;\r
-       msaTmp.SetSize(2, uColCount);\r
-       msaTmp.CopySeq(0, msa1, uSeqIndex1);\r
-       msaTmp.CopySeq(1, msa2, uSeqIndex2);\r
-       msaTmp.LogMe();\r
-       }\r
-#endif\r
-\r
-       SCORE scoreGaps = 0;\r
-       bool bGapping1 = false;\r
-       bool bGapping2 = false;\r
-\r
-       unsigned uColStart = 0;\r
-       bool bLeftTermGap = false;\r
-       for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
-               {\r
-               bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);\r
-               bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);\r
-               if (!bGap1 || !bGap2)\r
-                       {\r
-                       if (bGap1 || bGap2)\r
-                               bLeftTermGap = true;\r
-                       uColStart = uColIndex;\r
-                       break;\r
-                       }\r
-               }\r
-\r
-       unsigned uColEnd = uColCount - 1;\r
-       bool bRightTermGap = false;\r
-       for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)\r
-               {\r
-               bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);\r
-               bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);\r
-               if (!bGap1 || !bGap2)\r
-                       {\r
-                       if (bGap1 || bGap2)\r
-                               bRightTermGap = true;\r
-                       uColEnd = (unsigned) iColIndex;\r
-                       break;\r
-                       }\r
-               }\r
-\r
-#if    TRACE_SEQPAIR\r
-       Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);\r
-#endif\r
-\r
-       for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)\r
-               {\r
-               bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);\r
-               bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);\r
-\r
-               if (bGap1 && bGap2)\r
-                       continue;\r
-\r
-               if (bGap1)\r
-                       {\r
-                       if (!bGapping1)\r
-                               {\r
-#if    TRACE_SEQPAIR\r
-                               Log("Gap open seq 1 col %d\n", uColIndex);\r
-#endif\r
-                               if (uColIndex == uColStart)\r
-                                       scoreGaps += TermGapScore(true);\r
-                               else\r
-                                       scoreGaps += g_scoreGapOpen;\r
-                               bGapping1 = true;\r
-                               }\r
-                       else\r
-                               scoreGaps += g_scoreGapExtend;\r
-                       continue;\r
-                       }\r
-\r
-               else if (bGap2)\r
-                       {\r
-                       if (!bGapping2)\r
-                               {\r
-#if    TRACE_SEQPAIR\r
-                               Log("Gap open seq 2 col %d\n", uColIndex);\r
-#endif\r
-                               if (uColIndex == uColStart)\r
-                                       scoreGaps += TermGapScore(true);\r
-                               else\r
-                                       scoreGaps += g_scoreGapOpen;\r
-                               bGapping2 = true;\r
-                               }\r
-                       else\r
-                               scoreGaps += g_scoreGapExtend;\r
-                       continue;\r
-                       }\r
-\r
-               bGapping1 = false;\r
-               bGapping2 = false;\r
-               }\r
-\r
-       if (bGapping1 || bGapping2)\r
-               {\r
-               scoreGaps -= g_scoreGapOpen;\r
-               scoreGaps += TermGapScore(true);\r
-               }\r
-       return scoreGaps;\r
-       }\r
-\r
-// The usual sum-of-pairs objective score: sum the score\r
-// of the alignment of each pair of sequences.\r
-SCORE ObjScoreSP(const MSA &msa, SCORE MatchScore[])\r
-       {\r
-#if    TRACE\r
-       Log("==================ObjScoreSP==============\n");\r
-       Log("msa=\n");\r
-       msa.LogMe();\r
-#endif\r
-       g_SPScoreLetters = 0;\r
-       g_SPScoreGaps = 0;\r
-\r
-       if (0 != MatchScore)\r
-               {\r
-               const unsigned uColCount = msa.GetColCount();\r
-               for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
-                       MatchScore[uColIndex] = 0;\r
-               }\r
-\r
-       const unsigned uSeqCount = msa.GetSeqCount();\r
-       SCORE scoreTotal = 0;\r
-       unsigned uPairCount = 0;\r
-#if    TRACE\r
-       Log("Seq1  Seq2     wt1     wt2    Letters         Gaps  Unwt.Score    Wt.Score       Total\n");\r
-       Log("----  ----  ------  ------  ----------  ----------  ----------  ----------  ----------\n");\r
-#endif\r
-       for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)\r
-               {\r
-               const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);\r
-               for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)\r
-                       {\r
-                       const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);\r
-                       const WEIGHT w = w1*w2;\r
-\r
-                       SCORE scoreLetters = ScoreSeqPairLetters(msa, uSeqIndex1, msa, uSeqIndex2);\r
-                       SCORE scoreGaps = ScoreSeqPairGaps(msa, uSeqIndex1, msa, uSeqIndex2);\r
-                       SCORE scorePair = scoreLetters + scoreGaps;\r
-                       ++uPairCount;\r
-\r
-                       scoreTotal += w*scorePair;\r
-\r
-                       g_SPScoreLetters += w*scoreLetters;\r
-                       g_SPScoreGaps += w*scoreGaps;\r
-#if    TRACE\r
-                       Log("%4d  %4d  %6.3f  %6.3f  %10.2f  %10.2f  %10.2f  %10.2f  %10.2f >%s >%s\n",\r
-                         uSeqIndex1,\r
-                         uSeqIndex2,\r
-                         w1,\r
-                         w2,\r
-                         scoreLetters,\r
-                         scoreGaps,\r
-                         scorePair,\r
-                         scorePair*w1*w2,\r
-                         scoreTotal,\r
-                         msa.GetSeqName(uSeqIndex1),\r
-                         msa.GetSeqName(uSeqIndex2));\r
-#endif\r
-                       }\r
-               }\r
-#if    TEST_SPFAST\r
-       {\r
-       SCORE f = ObjScoreSPFast(msa);\r
-       Log("Fast  = %.6g\n", f);\r
-       Log("Brute = %.6g\n", scoreTotal);\r
-       if (BTEq(f, scoreTotal))\r
-               Log("Agree\n");\r
-       else\r
-               Log("** DISAGREE **\n");\r
-       }\r
-#endif\r
-//     return scoreTotal / uPairCount;\r
-       return scoreTotal;\r
-       }\r
-\r
-// Objective score defined as the dynamic programming score.\r
-// Input is two alignments, which must be of the same length.\r
-// Result is the same profile-profile score that is optimized\r
-// by dynamic programming.\r
-SCORE ObjScoreDP(const MSA &msa1, const MSA &msa2, SCORE MatchScore[])\r
-       {\r
-       const unsigned uColCount = msa1.GetColCount();\r
-       if (msa2.GetColCount() != uColCount)\r
-               Quit("ObjScoreDP, must be same length");\r
-\r
-       const unsigned uColCount1 = msa1.GetColCount();\r
-       const unsigned uColCount2 = msa2.GetColCount();\r
-\r
-       const ProfPos *PA = ProfileFromMSA(msa1);\r
-       const ProfPos *PB = ProfileFromMSA(msa2);\r
-\r
-       return ObjScoreDP_Profs(PA, PB, uColCount1, MatchScore);\r
-       }\r
-\r
-SCORE ObjScoreDP_Profs(const ProfPos *PA, const ProfPos *PB, unsigned uColCount,\r
-  SCORE MatchScore[])\r
-       {\r
-//#if  TRACE\r
-//     Log("Profile 1:\n");\r
-//     ListProfile(PA, uColCount, &msa1);\r
-//\r
-//     Log("Profile 2:\n");\r
-//     ListProfile(PB, uColCount, &msa2);\r
-//#endif\r
-\r
-       SCORE scoreTotal = 0;\r
-\r
-       for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
-               {\r
-               const ProfPos &PPA = PA[uColIndex];\r
-               const ProfPos &PPB = PB[uColIndex];\r
-\r
-               SCORE scoreGap = 0;\r
-               SCORE scoreMatch = 0;\r
-       // If gapped column...\r
-               if (PPA.m_bAllGaps && PPB.m_bAllGaps)\r
-                       scoreGap = 0;\r
-               else if (PPA.m_bAllGaps)\r
-                       {\r
-                       if (uColCount - 1 == uColIndex || !PA[uColIndex+1].m_bAllGaps)\r
-                               scoreGap = PPB.m_scoreGapClose;\r
-                       if (0 == uColIndex || !PA[uColIndex-1].m_bAllGaps)\r
-                               scoreGap += PPB.m_scoreGapOpen;\r
-                       //if (0 == scoreGap)\r
-                       //      scoreGap = PPB.m_scoreGapExtend;\r
-                       }\r
-               else if (PPB.m_bAllGaps)\r
-                       {\r
-                       if (uColCount - 1 == uColIndex || !PB[uColIndex+1].m_bAllGaps)\r
-                               scoreGap = PPA.m_scoreGapClose;\r
-                       if (0 == uColIndex || !PB[uColIndex-1].m_bAllGaps)\r
-                               scoreGap += PPA.m_scoreGapOpen;\r
-                       //if (0 == scoreGap)\r
-                       //      scoreGap = PPA.m_scoreGapExtend;\r
-                       }\r
-               else\r
-                       scoreMatch = ScoreProfPos2(PPA, PPB);\r
-\r
-               if (0 != MatchScore)\r
-                       MatchScore[uColIndex] = scoreMatch;\r
-\r
-               scoreTotal += scoreMatch + scoreGap;\r
-\r
-               extern bool g_bTracePPScore;\r
-               extern MSA *g_ptrPPScoreMSA1;\r
-               extern MSA *g_ptrPPScoreMSA2;\r
-               if (g_bTracePPScore)\r
-                       {\r
-                       const MSA &msa1 = *g_ptrPPScoreMSA1;\r
-                       const MSA &msa2 = *g_ptrPPScoreMSA2;\r
-                       const unsigned uSeqCount1 = msa1.GetSeqCount();\r
-                       const unsigned uSeqCount2 = msa2.GetSeqCount();\r
-\r
-                       for (unsigned n = 0; n < uSeqCount1; ++n)\r
-                               Log("%c", msa1.GetChar(n, uColIndex));\r
-                       Log("  ");\r
-                       for (unsigned n = 0; n < uSeqCount2; ++n)\r
-                               Log("%c", msa2.GetChar(n, uColIndex));\r
-                       Log("  %10.3f", scoreMatch);\r
-                       if (scoreGap != 0)\r
-                               Log("  %10.3f", scoreGap);\r
-                       Log("\n");\r
-                       }\r
-               }\r
-\r
-       delete[] PA;\r
-       delete[] PB;\r
-\r
-       return scoreTotal;\r
-       }\r
-\r
-// Objective score defined as the sum of profile-sequence\r
-// scores for each sequence in the alignment. The profile\r
-// is computed from the entire alignment, so this includes\r
-// the score of each sequence against itself. This is to\r
-// avoid recomputing the profile each time, so we reduce\r
-// complexity but introduce a questionable approximation.\r
-// The goal is to see if we can exploit the apparent\r
-// improvement in performance of log-expectation score\r
-// over the usual sum-of-pairs by optimizing this\r
-// objective score in the iterative refinement stage.\r
-SCORE ObjScorePS(const MSA &msa, SCORE MatchScore[])\r
-       {\r
-       if (g_PPScore != PPSCORE_LE)\r
-               Quit("FastScoreMSA_LASimple: LA");\r
-\r
-       const unsigned uSeqCount = msa.GetSeqCount();\r
-       const unsigned uColCount = msa.GetColCount();\r
-\r
-       const ProfPos *Prof = ProfileFromMSA(msa);\r
-\r
-       if (0 != MatchScore)\r
-               for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
-                       MatchScore[uColIndex] = 0;\r
-\r
-       SCORE scoreTotal = 0;\r
-       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-               {\r
-               const WEIGHT weightSeq = msa.GetSeqWeight(uSeqIndex);\r
-               SCORE scoreSeq = 0;\r
-               for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
-                       {\r
-                       const ProfPos &PP = Prof[uColIndex];\r
-                       if (msa.IsGap(uSeqIndex, uColIndex))\r
-                               {\r
-                               bool bOpen = (0 == uColIndex ||\r
-                                 !msa.IsGap(uSeqIndex, uColIndex - 1));\r
-                               bool bClose = (uColCount - 1 == uColIndex ||\r
-                                 !msa.IsGap(uSeqIndex, uColIndex + 1));\r
-\r
-                               if (bOpen)\r
-                                       scoreSeq += PP.m_scoreGapOpen;\r
-                               if (bClose)\r
-                                       scoreSeq += PP.m_scoreGapClose;\r
-                               //if (!bOpen && !bClose)\r
-                               //      scoreSeq += PP.m_scoreGapExtend;\r
-                               }\r
-                       else if (msa.IsWildcard(uSeqIndex, uColIndex))\r
-                               continue;\r
-                       else\r
-                               {\r
-                               unsigned uLetter = msa.GetLetter(uSeqIndex, uColIndex);\r
-                               const SCORE scoreMatch = PP.m_AAScores[uLetter];\r
-                               if (0 != MatchScore)\r
-                                       MatchScore[uColIndex] += weightSeq*scoreMatch;\r
-                               scoreSeq += scoreMatch;\r
-                               }\r
-                       }\r
-               scoreTotal += weightSeq*scoreSeq;\r
-               }\r
-\r
-       delete[] Prof;\r
-       return scoreTotal;\r
-       }\r
-\r
-// The XP score is the sum of the score of each pair of\r
-// sequences between two profiles which are aligned to\r
-// each other. Notice that for two given profiles aligned\r
-// in different ways, the difference in XP score must be\r
-// the same as the difference in SP score because the\r
-// score of a pair of sequences in one profile doesn't\r
-// depend on the alignment.\r
-SCORE ObjScoreXP(const MSA &msa1, const MSA &msa2)\r
-       {\r
-       const unsigned uColCount1 = msa1.GetColCount();\r
-       const unsigned uColCount2 = msa2.GetColCount();\r
-       if (uColCount1 != uColCount2)\r
-               Quit("ObjScoreXP, alignment lengths differ %u %u", uColCount1, uColCount2);\r
-\r
-       const unsigned uSeqCount1 = msa1.GetSeqCount();\r
-       const unsigned uSeqCount2 = msa2.GetSeqCount();\r
-\r
-#if    TRACE\r
-       Log("     Score  Weight  Weight       Total\n");\r
-       Log("----------  ------  ------  ----------\n");\r
-#endif\r
-\r
-       SCORE scoreTotal = 0;\r
-       unsigned uPairCount = 0;\r
-       for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)\r
-               {\r
-               const WEIGHT w1 = msa1.GetSeqWeight(uSeqIndex1);\r
-               for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)\r
-                       {\r
-                       const WEIGHT w2 = msa2.GetSeqWeight(uSeqIndex2);\r
-                       const WEIGHT w = w1*w2;\r
-                       SCORE scoreLetters = ScoreSeqPairLetters(msa1, uSeqIndex1, msa2, uSeqIndex2);\r
-                       SCORE scoreGaps = ScoreSeqPairGaps(msa1, uSeqIndex1, msa2, uSeqIndex2);\r
-                       SCORE scorePair = scoreLetters + scoreGaps;\r
-                       scoreTotal += w1*w2*scorePair;\r
-                       ++uPairCount;\r
-#if    TRACE\r
-                       Log("%10.2f  %6.3f  %6.3f  %10.2f  >%s >%s\n",\r
-                         scorePair,\r
-                         w1,\r
-                         w2,\r
-                         scorePair*w1*w2,\r
-                         msa1.GetSeqName(uSeqIndex1),\r
-                         msa2.GetSeqName(uSeqIndex2));\r
-#endif\r
-                       }\r
-               }\r
-       if (0 == uPairCount)\r
-               Quit("0 == uPairCount");\r
-\r
-#if    TRACE\r
-       Log("msa1=\n");\r
-       msa1.LogMe();\r
-       Log("msa2=\n");\r
-       msa2.LogMe();\r
-       Log("XP=%g\n", scoreTotal);\r
-#endif\r
-//     return scoreTotal / uPairCount;\r
-       return scoreTotal;\r
-       }\r