5 #include "textfile.h"
\r
7 const unsigned uCharsPerLine = 60;
\r
8 const int MIN_NAME = 10;
\r
9 const int MAX_NAME = 32;
\r
11 static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex);
\r
13 void MSA::ToAlnFile(TextFile &File) const
\r
16 File.PutString("CLUSTAL W (1.81) multiple sequence alignment\n");
\r
19 File.PutString("MUSCLE ("
\r
20 MUSCLE_MAJOR_VERSION "." MUSCLE_MINOR_VERSION ")"
\r
21 " multiple sequence alignment\n");
\r
22 File.PutString("\n");
\r
25 int iLongestNameLength = 0;
\r
26 for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
\r
28 const char *ptrName = GetSeqName(uSeqIndex);
\r
29 const char *ptrBlank = strchr(ptrName, ' ');
\r
32 iLength = (int) (ptrBlank - ptrName);
\r
34 iLength = (int) strlen(ptrName);
\r
35 if (iLength > iLongestNameLength)
\r
36 iLongestNameLength = iLength;
\r
38 if (iLongestNameLength > MAX_NAME)
\r
39 iLongestNameLength = MAX_NAME;
\r
40 if (iLongestNameLength < MIN_NAME)
\r
41 iLongestNameLength = MIN_NAME;
\r
43 unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;
\r
44 for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)
\r
46 File.PutString("\n");
\r
47 unsigned uStartColIndex = uLineIndex*uCharsPerLine;
\r
48 unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;
\r
49 if (uEndColIndex >= GetColCount())
\r
50 uEndColIndex = GetColCount() - 1;
\r
51 char Name[MAX_NAME+1];
\r
52 for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
\r
54 const char *ptrName = GetSeqName(uSeqIndex);
\r
55 const char *ptrBlank = strchr(ptrName, ' ');
\r
58 iLength = (int) (ptrBlank - ptrName);
\r
60 iLength = (int) strlen(ptrName);
\r
61 if (iLength > MAX_NAME)
\r
63 memset(Name, ' ', MAX_NAME);
\r
64 memcpy(Name, ptrName, iLength);
\r
65 Name[iLongestNameLength] = 0;
\r
67 File.PutFormat("%s ", Name);
\r
68 for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
\r
71 const char c = GetChar(uSeqIndex, uColIndex);
\r
72 File.PutFormat("%c", toupper(c));
\r
74 File.PutString("\n");
\r
77 memset(Name, ' ', MAX_NAME);
\r
78 Name[iLongestNameLength] = 0;
\r
79 File.PutFormat("%s ", Name);
\r
80 for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
\r
83 const char c = GetAlnConsensusChar(*this, uColIndex);
\r
86 File.PutString("\n");
\r
90 static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex)
\r
92 const unsigned uSeqCount = a.GetSeqCount();
\r
93 unsigned BitMap = 0;
\r
95 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
97 unsigned uLetter = a.GetLetterEx(uSeqIndex, uColIndex);
\r
98 assert(uLetter < 32);
\r
99 unsigned Bit = (1 << uLetter);
\r
100 if (!(BitMap & Bit))
\r
105 // '*' indicates positions which have a single, fully conserved residue
\r
109 if (ALPHA_Amino != g_Alpha)
\r
112 #define B(a) (1 << AX_##a)
\r
113 #define S2(a, b) S(B(a) | B(b))
\r
114 #define S3(a, b, c) S(B(a) | B(b) | B(c))
\r
115 #define S4(a, b, c, d) S(B(a) | B(b) | B(c) | B(d))
\r
116 #define S(w) if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return ':';
\r
118 #define W3(a, b, c) W(B(a) | B(b) | B(c))
\r
119 #define W4(a, b, c, d) W(B(a) | B(b) | B(c) | B(d))
\r
120 #define W5(a, b, c, d, e) W(B(a) | B(b) | B(c) | B(d) | B(e))
\r
121 #define W6(a, b, c, d, e, f) W(B(a) | B(b) | B(c) | B(d) | B(e) | B(f))
\r
122 #define W(w) if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return '.';
\r
124 // ':' indicates that one of the following 'strong'
\r
125 // groups is fully conserved
\r
145 // '.' indicates that one of the following 'weaker'
\r
146 // groups is fully conserved
\r
164 W6(S, N, D, E, Q, K)
\r
165 W6(N, W, Q, H, R, K)
\r