Delete unneeded directory
[jabaws.git] / website / archive / binaries / mac / src / muscle / subfam.cpp
diff --git a/website/archive/binaries/mac/src/muscle/subfam.cpp b/website/archive/binaries/mac/src/muscle/subfam.cpp
deleted file mode 100644 (file)
index e720cfa..0000000
+++ /dev/null
@@ -1,409 +0,0 @@
-#include "muscle.h"\r
-#include "tree.h"\r
-#include "textfile.h"  // for test only\r
-#include "msa.h"\r
-#include "seqvect.h"\r
-#include "profile.h"\r
-#ifndef _MSC_VER\r
-#include <unistd.h>    //      for unlink\r
-#endif\r
-\r
-#define TRACE  0\r
-\r
-/***\r
-Find subfamilies from tree by following criteria:\r
-\r
-(a) number of leaves <= max,\r
-(b) is monophyletic, i.e. most recent common ancestor is parent\r
-of no more than one subfamily.\r
-***/\r
-\r
-static unsigned SubFamRecurse(const Tree &tree, unsigned uNodeIndex, unsigned uMaxLeafCount,\r
-  unsigned SubFams[], unsigned &uSubFamCount)\r
-       {\r
-       if (tree.IsLeaf(uNodeIndex))\r
-               return 1;\r
-\r
-       unsigned uLeft = tree.GetLeft(uNodeIndex);\r
-       unsigned uRight = tree.GetRight(uNodeIndex);\r
-       unsigned uLeftCount = SubFamRecurse(tree, uLeft, uMaxLeafCount, SubFams, uSubFamCount);\r
-       unsigned uRightCount = SubFamRecurse(tree, uRight, uMaxLeafCount, SubFams, uSubFamCount);\r
-\r
-       unsigned uLeafCount = uLeftCount + uRightCount;\r
-       if (uLeftCount + uRightCount > uMaxLeafCount)\r
-               {\r
-               if (uLeftCount <= uMaxLeafCount)\r
-                       SubFams[uSubFamCount++] = uLeft;\r
-               if (uRightCount <= uMaxLeafCount)\r
-                       SubFams[uSubFamCount++] = uRight;\r
-               }\r
-       else if (tree.IsRoot(uNodeIndex))\r
-               {\r
-               if (uSubFamCount != 0)\r
-                       Quit("Error in SubFamRecurse");\r
-               SubFams[uSubFamCount++] = uNodeIndex;\r
-               }\r
-\r
-       return uLeafCount;\r
-       }\r
-\r
-void SubFam(const Tree &tree, unsigned uMaxLeafCount, unsigned SubFams[], unsigned *ptruSubFamCount)\r
-       {\r
-       *ptruSubFamCount = 0;\r
-       SubFamRecurse(tree, tree.GetRootNodeIndex(), uMaxLeafCount, SubFams, *ptruSubFamCount);\r
-\r
-#if    TRACE\r
-       {\r
-       Log("\n");\r
-       Log("Tree:\n");\r
-       tree.LogMe();\r
-       //void DrawTree(const Tree &tree);\r
-       //DrawTree(tree);\r
-       Log("\n");\r
-       Log("%d subfams:\n", *ptruSubFamCount);\r
-       for (unsigned i = 0; i < *ptruSubFamCount; ++i)\r
-               Log("  %d=%d", i, SubFams[i]);\r
-       Log("\n");\r
-       }\r
-#endif\r
-       }\r
-\r
-//unsigned SubFams[9999];\r
-//unsigned uSubFamCount;\r
-//\r
-//static unsigned DistFromRoot(const Tree &tree, unsigned uNodeIndex)\r
-//     {\r
-//     const unsigned uRoot = tree.GetRootNodeIndex();\r
-//     unsigned uDist = 0;\r
-//     while (uNodeIndex != uRoot)\r
-//             {\r
-//             ++uDist;\r
-//             uNodeIndex = tree.GetParent(uNodeIndex);\r
-//             }\r
-//     return uDist;\r
-//     }\r
-//\r
-//static void DrawNode(const Tree &tree, unsigned uNodeIndex)\r
-//     {\r
-//     if (!tree.IsLeaf(uNodeIndex))\r
-//             DrawNode(tree, tree.GetLeft(uNodeIndex));\r
-//\r
-//     unsigned uDist = DistFromRoot(tree, uNodeIndex);\r
-//     for (unsigned i = 0; i < 5*uDist; ++i)\r
-//             Log(" ");\r
-//     Log("%d", uNodeIndex);\r
-//     for (unsigned i = 0; i < uSubFamCount; ++i)\r
-//             if (uNodeIndex == SubFams[i])\r
-//                     {\r
-//                     Log("*");\r
-//                     break;\r
-//                     }\r
-//     Log("\n");\r
-//\r
-//     if (!tree.IsLeaf(uNodeIndex))\r
-//             DrawNode(tree, tree.GetRight(uNodeIndex));\r
-//     }\r
-//\r
-//static void DrawTree(const Tree &tree)\r
-//     {\r
-//     unsigned uRoot = tree.GetRootNodeIndex();\r
-//     DrawNode(tree, uRoot);\r
-//     }\r
-//\r
-//void TestSubFams(const char *FileName)\r
-//     {\r
-//     Tree tree;\r
-//     TextFile f(FileName);\r
-//     tree.FromFile(f);\r
-//     SubFam(tree, 5, SubFams, &uSubFamCount);\r
-//     DrawTree(tree);\r
-//     }\r
-\r
-static void SetInFam(const Tree &tree, unsigned uNodeIndex, bool NodeInSubFam[])\r
-       {\r
-       if (tree.IsLeaf(uNodeIndex))\r
-               return;\r
-       unsigned uLeft = tree.GetLeft(uNodeIndex);\r
-       unsigned uRight = tree.GetRight(uNodeIndex);\r
-       NodeInSubFam[uLeft] = true;\r
-       NodeInSubFam[uRight] = true;\r
-\r
-       SetInFam(tree, uLeft, NodeInSubFam);\r
-       SetInFam(tree, uRight, NodeInSubFam);\r
-       }\r
-\r
-void AlignSubFam(SeqVect &vAll, const Tree &GuideTree, unsigned uNodeIndex,\r
-  MSA &msaOut)\r
-       {\r
-       const unsigned uSeqCount = vAll.GetSeqCount();\r
-\r
-       const char *InTmp = "asf_in.tmp";\r
-       const char *OutTmp = "asf_out.tmp";\r
-\r
-       unsigned *Leaves = new unsigned[uSeqCount];\r
-       unsigned uLeafCount;\r
-       GetLeaves(GuideTree, uNodeIndex, Leaves, &uLeafCount);\r
-\r
-       SeqVect v;\r
-       for (unsigned i = 0; i < uLeafCount; ++i)\r
-               {\r
-               unsigned uLeafNodeIndex = Leaves[i];\r
-               unsigned uId = GuideTree.GetLeafId(uLeafNodeIndex);\r
-               Seq &s = vAll.GetSeqById(uId);\r
-               v.AppendSeq(s);\r
-               }\r
-\r
-#if    TRACE\r
-       {\r
-       Log("Align subfam[node=%d, size=%d] ", uNodeIndex, uLeafCount);\r
-       for (unsigned i = 0; i < uLeafCount; ++i)\r
-               Log(" %s", v.GetSeqName(i));\r
-       Log("\n");\r
-       }\r
-#endif\r
-\r
-       TextFile fIn(InTmp, true);\r
-\r
-       v.ToFASTAFile(fIn);\r
-       fIn.Close();\r
-\r
-       char CmdLine[4096];\r
-       sprintf(CmdLine, "probcons %s > %s 2> /dev/null", InTmp, OutTmp);\r
-//     sprintf(CmdLine, "muscle -in %s -out %s -maxiters 1", InTmp, OutTmp);\r
-       int NotUsed = system(CmdLine);\r
-\r
-       TextFile fOut(OutTmp);\r
-       msaOut.FromFile(fOut);\r
-\r
-       for (unsigned uSeqIndex = 0; uSeqIndex < uLeafCount; ++uSeqIndex)\r
-               {\r
-               const char *Name = msaOut.GetSeqName(uSeqIndex);\r
-               unsigned uId = vAll.GetSeqIdFromName(Name);\r
-               msaOut.SetSeqId(uSeqIndex, uId);\r
-               }\r
-\r
-       unlink(InTmp);\r
-       unlink(OutTmp);\r
-\r
-       delete[] Leaves;\r
-       }\r
-\r
-void ProgAlignSubFams()\r
-       {\r
-       MSA msaOut;\r
-\r
-       SetOutputFileName(g_pstrOutFileName);\r
-       SetInputFileName(g_pstrInFileName);\r
-\r
-       SetMaxIters(g_uMaxIters);\r
-       SetSeqWeightMethod(g_SeqWeight1);\r
-\r
-       TextFile fileIn(g_pstrInFileName);\r
-       SeqVect v;\r
-       v.FromFASTAFile(fileIn);\r
-       const unsigned uSeqCount = v.Length();\r
-\r
-       if (0 == uSeqCount)\r
-               Quit("No sequences in input file");\r
-\r
-       ALPHA Alpha = ALPHA_Undefined;\r
-       switch (g_SeqType)\r
-               {\r
-       case SEQTYPE_Auto:\r
-               Alpha = v.GuessAlpha();\r
-               break;\r
-\r
-       case SEQTYPE_Protein:\r
-               Alpha = ALPHA_Amino;\r
-               break;\r
-\r
-       case SEQTYPE_DNA:\r
-               Alpha = ALPHA_DNA;\r
-               break;\r
-\r
-       case SEQTYPE_RNA:\r
-               Alpha = ALPHA_RNA;\r
-               break;\r
-\r
-       default:\r
-               Quit("Invalid seq type");\r
-               }\r
-       SetAlpha(Alpha);\r
-       v.FixAlpha();\r
-\r
-       PTR_SCOREMATRIX UserMatrix = 0;\r
-       if (0 != g_pstrMatrixFileName)\r
-               {\r
-               const char *FileName = g_pstrMatrixFileName;\r
-               const char *Path = getenv("MUSCLE_MXPATH");\r
-               if (Path != 0)\r
-                       {\r
-                       size_t n = strlen(Path) + 1 + strlen(FileName) + 1;\r
-                       char *NewFileName = new char[n];\r
-                       sprintf(NewFileName, "%s/%s", Path, FileName);\r
-                       FileName = NewFileName;\r
-                       }\r
-               TextFile File(FileName);\r
-               UserMatrix = ReadMx(File);\r
-               g_Alpha = ALPHA_Amino;\r
-               g_PPScore = PPSCORE_SP;\r
-               }\r
-\r
-       SetPPScore();\r
-\r
-       if (0 != UserMatrix)\r
-               g_ptrScoreMatrix = UserMatrix;\r
-\r
-       if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)\r
-               {\r
-               SetPPScore(PPSCORE_SPN);\r
-               g_Distance1 = DISTANCE_Kmer4_6;\r
-               }\r
-\r
-       unsigned uMaxL = 0;\r
-       unsigned uTotL = 0;\r
-       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-               {\r
-               unsigned L = v.GetSeq(uSeqIndex).Length();\r
-               uTotL += L;\r
-               if (L > uMaxL)\r
-                       uMaxL = L;\r
-               }\r
-\r
-       SetIter(1);\r
-       g_bDiags = g_bDiags1;\r
-       SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);\r
-\r
-       SetMuscleSeqVect(v);\r
-\r
-       MSA::SetIdCount(uSeqCount);\r
-\r
-// Initialize sequence ids.\r
-// From this point on, ids must somehow propogate from here.\r
-       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-               v.SetSeqId(uSeqIndex, uSeqIndex);\r
-\r
-       if (uSeqCount > 1)\r
-               MHackStart(v);\r
-\r
-       if (0 == uSeqCount)\r
-               {\r
-               msaOut.Clear();\r
-               return;\r
-               }\r
-\r
-       if (1 == uSeqCount && ALPHA_Amino == Alpha)\r
-               {\r
-               const Seq &s = v.GetSeq(0);\r
-               msaOut.FromSeq(s);\r
-               return;\r
-               }\r
-\r
-       Tree GuideTree;\r
-       TreeFromSeqVect(v, GuideTree, g_Cluster1, g_Distance1, g_Root1);\r
-       SetMuscleTree(GuideTree);\r
-\r
-       MSA msa;\r
-       if (g_bLow)\r
-               {\r
-               ProgNode *ProgNodes = 0;\r
-               ProgNodes = ProgressiveAlignE(v, GuideTree, msa);\r
-               delete[] ProgNodes;\r
-               }\r
-       else\r
-               ProgressiveAlign(v, GuideTree, msa);\r
-       SetCurrentAlignment(msa);\r
-       TreeFromMSA(msa, GuideTree, g_Cluster2, g_Distance2, g_Root2);\r
-       SetMuscleTree(GuideTree);\r
-\r
-       unsigned *SubFams = new unsigned[uSeqCount];\r
-       unsigned uSubFamCount;\r
-       SubFam(GuideTree, g_uMaxSubFamCount, SubFams, &uSubFamCount);\r
-\r
-       SetProgressDesc("Align node");\r
-       const unsigned uNodeCount = 2*uSeqCount - 1;\r
-\r
-       ProgNode *ProgNodes = new ProgNode[uNodeCount];\r
-       bool *NodeIsSubFam = new bool[uNodeCount];\r
-       bool *NodeInSubFam = new bool[uNodeCount];\r
-\r
-       for (unsigned i = 0; i < uNodeCount; ++i)\r
-               {\r
-               NodeIsSubFam[i] = false;\r
-               NodeInSubFam[i] = false;\r
-               }\r
-\r
-       for (unsigned i = 0; i < uSubFamCount; ++i)\r
-               {\r
-               unsigned uNodeIndex = SubFams[i];\r
-               assert(uNodeIndex < uNodeCount);\r
-               NodeIsSubFam[uNodeIndex] = true;\r
-               SetInFam(GuideTree, uNodeIndex, NodeInSubFam);\r
-               }\r
-\r
-       unsigned uJoin = 0;\r
-       unsigned uTreeNodeIndex = GuideTree.FirstDepthFirstNode();\r
-       do\r
-               {\r
-               if (NodeIsSubFam[uTreeNodeIndex])\r
-                       {\r
-#if    TRACE\r
-                       Log("Node %d: align subfam\n", uTreeNodeIndex);\r
-#endif\r
-                       ProgNode &Node = ProgNodes[uTreeNodeIndex];\r
-                       AlignSubFam(v, GuideTree, uTreeNodeIndex, Node.m_MSA);\r
-                       Node.m_uLength = Node.m_MSA.GetColCount();\r
-                       }\r
-               else if (!NodeInSubFam[uTreeNodeIndex])\r
-                       {\r
-#if    TRACE\r
-                       Log("Node %d: align two subfams\n", uTreeNodeIndex);\r
-#endif\r
-                       Progress(uJoin, uSubFamCount - 1);\r
-                       ++uJoin;\r
-\r
-                       const unsigned uMergeNodeIndex = uTreeNodeIndex;\r
-                       ProgNode &Parent = ProgNodes[uMergeNodeIndex];\r
-\r
-                       const unsigned uLeft = GuideTree.GetLeft(uTreeNodeIndex);\r
-                       const unsigned uRight = GuideTree.GetRight(uTreeNodeIndex);\r
-\r
-                       ProgNode &Node1 = ProgNodes[uLeft];\r
-                       ProgNode &Node2 = ProgNodes[uRight];\r
-\r
-                       PWPath Path;\r
-                       AlignTwoMSAs(Node1.m_MSA, Node2.m_MSA, Parent.m_MSA, Path);\r
-                       Parent.m_uLength = Parent.m_MSA.GetColCount();\r
-\r
-                       Node1.m_MSA.Clear();\r
-                       Node2.m_MSA.Clear();\r
-                       }\r
-               else\r
-                       {\r
-#if    TRACE\r
-                       Log("Node %d: in subfam\n", uTreeNodeIndex);\r
-#endif\r
-                       ;\r
-                       }\r
-               uTreeNodeIndex = GuideTree.NextDepthFirstNode(uTreeNodeIndex);\r
-               }\r
-       while (NULL_NEIGHBOR != uTreeNodeIndex);\r
-       ProgressStepsDone();\r
-\r
-       unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();\r
-       ProgNode &RootProgNode = ProgNodes[uRootNodeIndex];\r
-\r
-       TextFile fOut(g_pstrOutFileName, true);\r
-       MHackEnd(RootProgNode.m_MSA);\r
-       RootProgNode.m_MSA.ToFile(fOut);\r
-\r
-       delete[] NodeInSubFam;\r
-       delete[] NodeIsSubFam;\r
-       delete[] ProgNodes;\r
-       delete[] SubFams;\r
-\r
-       ProgNodes = 0;\r
-       NodeInSubFam = 0;\r
-       NodeIsSubFam = 0;\r
-       SubFams = 0;\r
-       }\r