Next version of JABA
[jabaws.git] / binaries / src / muscle / refinevert.cpp
1 #include "muscle.h"\r
2 #include "profile.h"\r
3 #include "msa.h"\r
4 #include "pwpath.h"\r
5 #include "seqvect.h"\r
6 #include "clust.h"\r
7 #include "tree.h"\r
8 \r
9 #define TRACE 0\r
10 \r
11 struct Range\r
12         {\r
13         unsigned m_uBestColLeft;\r
14         unsigned m_uBestColRight;\r
15         };\r
16 \r
17 static void ListVertSavings(unsigned uColCount, unsigned uAnchorColCount,\r
18   const Range *Ranges, unsigned uRangeCount)\r
19         {\r
20         if (!g_bVerbose || !g_bAnchors)\r
21                 return;\r
22         double dTotalArea = uColCount*uColCount;\r
23         double dArea = 0.0;\r
24         for (unsigned i = 0; i < uRangeCount; ++i)\r
25                 {\r
26                 unsigned uLength = Ranges[i].m_uBestColRight - Ranges[i].m_uBestColLeft;\r
27                 dArea += uLength*uLength;\r
28                 }\r
29         double dPct = (dTotalArea - dArea)*100.0/dTotalArea;\r
30         Log("Anchor columns found       %u\n", uAnchorColCount);\r
31         Log("DP area saved by anchors   %-4.1f%%\n", dPct);\r
32         }\r
33 \r
34 static void ColsToRanges(const unsigned BestCols[], unsigned uBestColCount,\r
35   unsigned uColCount, Range Ranges[])\r
36         {\r
37 // N best columns produces N+1 vertical blocks.\r
38         const unsigned uRangeCount = uBestColCount + 1;\r
39         for (unsigned uIndex = 0; uIndex < uRangeCount ; ++uIndex)\r
40                 {\r
41                 unsigned uBestColLeft = 0;\r
42                 if (uIndex > 0)\r
43                         uBestColLeft = BestCols[uIndex-1];\r
44                 \r
45                 unsigned uBestColRight = uColCount;\r
46                 if (uIndex < uBestColCount)\r
47                         uBestColRight = BestCols[uIndex];\r
48 \r
49                 Ranges[uIndex].m_uBestColLeft = uBestColLeft;\r
50                 Ranges[uIndex].m_uBestColRight = uBestColRight;\r
51                 }\r
52         }\r
53 \r
54 // Return true if any changes made\r
55 bool RefineVert(MSA &msaIn, const Tree &tree, unsigned uIters)\r
56         {\r
57         bool bAnyChanges = false;\r
58 \r
59         const unsigned uColCountIn = msaIn.GetColCount();\r
60         const unsigned uSeqCountIn = msaIn.GetSeqCount();\r
61 \r
62         if (uColCountIn < 3 || uSeqCountIn < 3)\r
63                 return false;\r
64 \r
65         unsigned *AnchorCols = new unsigned[uColCountIn];\r
66         unsigned uAnchorColCount;\r
67         SetMSAWeightsMuscle(msaIn);\r
68         FindAnchorCols(msaIn, AnchorCols, &uAnchorColCount);\r
69 \r
70         const unsigned uRangeCount = uAnchorColCount + 1;\r
71         Range *Ranges = new Range[uRangeCount];\r
72 \r
73 #if     TRACE\r
74         Log("%u ranges\n", uRangeCount);\r
75 #endif\r
76 \r
77         ColsToRanges(AnchorCols, uAnchorColCount, uColCountIn, Ranges);\r
78         ListVertSavings(uColCountIn, uAnchorColCount, Ranges, uRangeCount);\r
79 \r
80 #if     TRACE\r
81         {\r
82         Log("Anchor cols: ");\r
83         for (unsigned i = 0; i < uAnchorColCount; ++i)\r
84                 Log(" %u", AnchorCols[i]);\r
85         Log("\n");\r
86 \r
87         Log("Ranges:\n");\r
88         for (unsigned i = 0; i < uRangeCount; ++i)\r
89                 Log("%4u - %4u\n", Ranges[i].m_uBestColLeft, Ranges[i].m_uBestColRight);\r
90         }\r
91 #endif\r
92 \r
93         delete[] AnchorCols;\r
94 \r
95         MSA msaOut;\r
96         msaOut.SetSize(uSeqCountIn, 0);\r
97 \r
98         for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCountIn; ++uSeqIndex)\r
99                 {\r
100                 const char *ptrName = msaIn.GetSeqName(uSeqIndex);\r
101                 unsigned uId = msaIn.GetSeqId(uSeqIndex);\r
102                 msaOut.SetSeqName(uSeqIndex, ptrName);\r
103                 msaOut.SetSeqId(uSeqIndex, uId);\r
104                 }\r
105 \r
106         for (unsigned uRangeIndex = 0; uRangeIndex < uRangeCount; ++uRangeIndex)\r
107                 {\r
108                 MSA msaRange;\r
109 \r
110                 const Range &r = Ranges[uRangeIndex];\r
111 \r
112                 const unsigned uFromColIndex = r.m_uBestColLeft;\r
113                 const unsigned uRangeColCount = r.m_uBestColRight - uFromColIndex;\r
114 \r
115                 if (0 == uRangeColCount)\r
116                         continue;\r
117                 else if (1 == uRangeColCount)\r
118                         {\r
119                         MSAFromColRange(msaIn, uFromColIndex, 1, msaRange);\r
120                         MSAAppend(msaOut, msaRange);\r
121                         continue;\r
122                         }\r
123                 MSAFromColRange(msaIn, uFromColIndex, uRangeColCount, msaRange);\r
124 \r
125 #if     TRACE\r
126                 Log("\n-------------\n");\r
127                 Log("Range %u - %u count=%u\n", r.m_uBestColLeft, r.m_uBestColRight, uRangeColCount);\r
128                 Log("Before:\n");\r
129                 msaRange.LogMe();\r
130 #endif\r
131 \r
132                 bool bLockLeft = (0 != uRangeIndex);\r
133                 bool bLockRight = (uRangeCount - 1 != uRangeIndex);\r
134                 bool bAnyChangesThisBlock = RefineHoriz(msaRange, tree, uIters, bLockLeft, bLockRight);\r
135                 bAnyChanges = (bAnyChanges || bAnyChangesThisBlock);\r
136 \r
137 #if     TRACE\r
138                 Log("After:\n");\r
139                 msaRange.LogMe();\r
140 #endif\r
141 \r
142                 MSAAppend(msaOut, msaRange);\r
143 \r
144 #if     TRACE\r
145                 Log("msaOut after Cat:\n");\r
146                 msaOut.LogMe();\r
147 #endif\r
148                 }\r
149 \r
150 #if     DEBUG\r
151 // Sanity check\r
152         AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);\r
153 #endif\r
154 \r
155         delete[] Ranges;\r
156         if (bAnyChanges)\r
157                 msaIn.Copy(msaOut);\r
158         return bAnyChanges;\r
159         }\r