5 #include "textfile.h"
\r
7 const int MAX_NAME = 63;
\r
9 const unsigned uCharsPerLine = 50;
\r
10 const unsigned uCharsPerBlock = 10;
\r
12 // Truncate at first white space or MAX_NAME, whichever comes
\r
13 // first, then pad with blanks up to PadLength.
\r
14 static const char *GetPaddedName(const char *Name, int PadLength)
\r
16 static char PaddedName[MAX_NAME+1];
\r
17 memset(PaddedName, ' ', MAX_NAME);
\r
18 size_t n = strcspn(Name, " \t");
\r
19 memcpy(PaddedName, Name, n);
\r
20 PaddedName[PadLength] = 0;
\r
24 static const char *strfind(const char *s, const char *t)
\r
26 size_t n = strcspn(s, t);
\r
32 // GCG checksum code kindly provided by Eric Martel.
\r
33 unsigned MSA::GetGCGCheckSum(unsigned uSeqIndex) const
\r
35 unsigned CheckSum = 0;
\r
36 const unsigned uColCount = GetColCount();
\r
37 for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
\r
39 unsigned c = (unsigned) GetChar(uSeqIndex, uColIndex);
\r
40 CheckSum += c*(uColIndex%57 + 1);
\r
46 static void MSFFixGaps(MSA &a)
\r
48 const int SeqCount = a.GetSeqCount();
\r
49 const int ColCount = a.GetColCount();
\r
50 for (int SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
\r
52 for (int ColIndex = 0; ColIndex < ColCount; ++ColIndex)
\r
53 if (a.IsGap(SeqIndex, ColIndex))
\r
54 a.SetChar(SeqIndex, ColIndex, '.');
\r
58 void MSA::ToMSFFile(TextFile &File, const char *ptrComment) const
\r
60 // Cast away const, yuck
\r
61 SetMSAWeightsMuscle((MSA &) *this);
\r
62 MSFFixGaps((MSA &) *this);
\r
64 File.PutString("PileUp\n");
\r
66 if (0 != ptrComment)
\r
67 File.PutFormat("Comment: %s\n", ptrComment);
\r
69 File.PutString("\n");
\r
71 char seqtype = (g_Alpha == ALPHA_DNA || g_Alpha == ALPHA_RNA) ? 'N' : 'A';
\r
72 File.PutFormat(" MSF: %u Type: %c Check: 0000 ..\n\n",
\r
73 GetColCount(), seqtype);
\r
75 int iLongestNameLength = 0;
\r
76 for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
\r
78 const char *Name = GetSeqName(uSeqIndex);
\r
79 const char *PaddedName = GetPaddedName(Name, MAX_NAME);
\r
80 int iLength = (int) strcspn(PaddedName, " \t");
\r
81 if (iLength > iLongestNameLength)
\r
82 iLongestNameLength = iLength;
\r
85 for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
\r
87 const char *Name = GetSeqName(uSeqIndex);
\r
88 const char *PaddedName = GetPaddedName(Name, iLongestNameLength);
\r
89 File.PutFormat(" Name: %s", PaddedName);
\r
90 File.PutFormat(" Len: %u Check: %5u Weight: %g\n",
\r
91 GetColCount(), GetGCGCheckSum(uSeqIndex), GetSeqWeight(uSeqIndex));
\r
93 File.PutString("\n//\n");
\r
94 if (0 == GetColCount())
\r
97 unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;
\r
98 for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)
\r
100 File.PutString("\n");
\r
101 unsigned uStartColIndex = uLineIndex*uCharsPerLine;
\r
102 unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;
\r
103 if (uEndColIndex >= GetColCount())
\r
104 uEndColIndex = GetColCount() - 1;
\r
105 for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
\r
107 const char *Name = GetSeqName(uSeqIndex);
\r
108 const char *PaddedName = GetPaddedName(Name, iLongestNameLength);
\r
109 File.PutFormat("%s ", PaddedName);
\r
110 for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
\r
113 if (0 == uColIndex%uCharsPerBlock)
\r
114 File.PutString(" ");
\r
115 char c = GetChar(uSeqIndex, uColIndex);
\r
116 File.PutFormat("%c", c);
\r
118 File.PutString("\n");
\r