+++ /dev/null
-#include "muscle.h"\r
-#include <stdio.h>\r
-#include <ctype.h>\r
-#include "msa.h"\r
-#include "textfile.h"\r
-\r
-const unsigned uCharsPerLine = 60;\r
-const int MIN_NAME = 10;\r
-const int MAX_NAME = 32;\r
-\r
-static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex);\r
-\r
-void MSA::ToAlnFile(TextFile &File) const\r
- {\r
- if (g_bClwStrict)\r
- File.PutString("CLUSTAL W (1.81) multiple sequence alignment\n");\r
- else\r
- {\r
- File.PutString("MUSCLE ("\r
- SHORT_VERSION ")"\r
- " multiple sequence alignment\n");\r
- File.PutString("\n");\r
- }\r
-\r
- int iLongestNameLength = 0;\r
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)\r
- {\r
- const char *ptrName = GetSeqName(uSeqIndex);\r
- const char *ptrBlank = strchr(ptrName, ' ');\r
- int iLength;\r
- if (0 != ptrBlank)\r
- iLength = (int) (ptrBlank - ptrName);\r
- else\r
- iLength = (int) strlen(ptrName);\r
- if (iLength > iLongestNameLength)\r
- iLongestNameLength = iLength;\r
- }\r
- if (iLongestNameLength > MAX_NAME)\r
- iLongestNameLength = MAX_NAME;\r
- if (iLongestNameLength < MIN_NAME)\r
- iLongestNameLength = MIN_NAME;\r
-\r
- unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;\r
- for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)\r
- {\r
- File.PutString("\n");\r
- unsigned uStartColIndex = uLineIndex*uCharsPerLine;\r
- unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;\r
- if (uEndColIndex >= GetColCount())\r
- uEndColIndex = GetColCount() - 1;\r
- char Name[MAX_NAME+1];\r
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)\r
- {\r
- const char *ptrName = GetSeqName(uSeqIndex);\r
- const char *ptrBlank = strchr(ptrName, ' ');\r
- int iLength;\r
- if (0 != ptrBlank)\r
- iLength = (int) (ptrBlank - ptrName);\r
- else\r
- iLength = (int) strlen(ptrName);\r
- if (iLength > MAX_NAME)\r
- iLength = MAX_NAME;\r
- memset(Name, ' ', MAX_NAME);\r
- memcpy(Name, ptrName, iLength);\r
- Name[iLongestNameLength] = 0;\r
-\r
- File.PutFormat("%s ", Name);\r
- for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;\r
- ++uColIndex)\r
- {\r
- const char c = GetChar(uSeqIndex, uColIndex);\r
- File.PutFormat("%c", toupper(c));\r
- }\r
- File.PutString("\n");\r
- }\r
-\r
- memset(Name, ' ', MAX_NAME);\r
- Name[iLongestNameLength] = 0;\r
- File.PutFormat("%s ", Name);\r
- for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;\r
- ++uColIndex)\r
- {\r
- const char c = GetAlnConsensusChar(*this, uColIndex);\r
- File.PutChar(c);\r
- }\r
- File.PutString("\n");\r
- }\r
- }\r
-\r
-static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex)\r
- {\r
- const unsigned uSeqCount = a.GetSeqCount();\r
- unsigned BitMap = 0;\r
- unsigned Count = 0;\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- unsigned uLetter = a.GetLetterEx(uSeqIndex, uColIndex);\r
- assert(uLetter < 32);\r
- unsigned Bit = (1 << uLetter);\r
- if (!(BitMap & Bit))\r
- ++Count;\r
- BitMap |= Bit;\r
- }\r
-\r
-// '*' indicates positions which have a single, fully conserved residue\r
- if (1 == Count)\r
- return '*';\r
-\r
- if (ALPHA_Amino != g_Alpha)\r
- return ' ';\r
-\r
-#define B(a) (1 << AX_##a)\r
-#define S2(a, b) S(B(a) | B(b))\r
-#define S3(a, b, c) S(B(a) | B(b) | B(c))\r
-#define S4(a, b, c, d) S(B(a) | B(b) | B(c) | B(d))\r
-#define S(w) if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return ':';\r
-\r
-#define W3(a, b, c) W(B(a) | B(b) | B(c))\r
-#define W4(a, b, c, d) W(B(a) | B(b) | B(c) | B(d))\r
-#define W5(a, b, c, d, e) W(B(a) | B(b) | B(c) | B(d) | B(e))\r
-#define W6(a, b, c, d, e, f) W(B(a) | B(b) | B(c) | B(d) | B(e) | B(f))\r
-#define W(w) if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return '.';\r
-\r
-// ':' indicates that one of the following 'strong'\r
-// groups is fully conserved\r
-// STA\r
-// NEQK\r
-// NHQK\r
-// NDEQ\r
-// QHRK\r
-// MILV\r
-// MILF\r
-// HY\r
-// FYW\r
-//\r
- S3(S, T, A)\r
- S4(N, E, Q, K)\r
- S4(N, H, Q, K)\r
- S4(N, D, E, Q)\r
- S4(M, I, L, V)\r
- S4(M, I, L, F)\r
- S2(H, Y)\r
- S3(F, Y, W)\r
-\r
-// '.' indicates that one of the following 'weaker' \r
-// groups is fully conserved\r
-// CSA\r
-// ATV\r
-// SAG\r
-// STNK\r
-// STPA\r
-// SGND\r
-// SNDEQK\r
-// NDEQHK\r
-// NEQHRK\r
-// FVLIM\r
-// HFY\r
- W3(C, S, A)\r
- W3(A, T, V)\r
- W3(S, A, G)\r
- W4(S, T, N, K)\r
- W4(S, T, P, A)\r
- W4(S, G, N, D)\r
- W6(S, N, D, E, Q, K)\r
- W6(N, W, Q, H, R, K)\r
- W5(F, V, L, I, M)\r
- W3(H, F, Y)\r
-\r
- return ' ';\r
- }\r