--- /dev/null
+#include "muscle.h"\r
+#include <stdio.h>\r
+#include <ctype.h>\r
+#include "msa.h"\r
+#include "textfile.h"\r
+\r
+const int MAX_NAME = 63;\r
+\r
+const unsigned uCharsPerLine = 50;\r
+const unsigned uCharsPerBlock = 10;\r
+\r
+// Truncate at first white space or MAX_NAME, whichever comes\r
+// first, then pad with blanks up to PadLength.\r
+static const char *GetPaddedName(const char *Name, int PadLength)\r
+ {\r
+ static char PaddedName[MAX_NAME+1];\r
+ memset(PaddedName, ' ', MAX_NAME);\r
+ size_t n = strcspn(Name, " \t");\r
+ memcpy(PaddedName, Name, n);\r
+ PaddedName[PadLength] = 0;\r
+ return PaddedName;\r
+ }\r
+\r
+static const char *strfind(const char *s, const char *t)\r
+ {\r
+ size_t n = strcspn(s, t);\r
+ if (0 == n)\r
+ return 0;\r
+ return s + n;\r
+ }\r
+\r
+// GCG checksum code kindly provided by Eric Martel.\r
+unsigned MSA::GetGCGCheckSum(unsigned uSeqIndex) const\r
+ {\r
+ unsigned CheckSum = 0;\r
+ const unsigned uColCount = GetColCount();\r
+ for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
+ {\r
+ unsigned c = (unsigned) GetChar(uSeqIndex, uColIndex);\r
+ CheckSum += c*(uColIndex%57 + 1);\r
+ CheckSum %= 10000; \r
+ }\r
+ return CheckSum;\r
+ }\r
+\r
+static void MSFFixGaps(MSA &a)\r
+ {\r
+ const int SeqCount = a.GetSeqCount();\r
+ const int ColCount = a.GetColCount();\r
+ for (int SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)\r
+ {\r
+ for (int ColIndex = 0; ColIndex < ColCount; ++ColIndex)\r
+ if (a.IsGap(SeqIndex, ColIndex))\r
+ a.SetChar(SeqIndex, ColIndex, '.');\r
+ }\r
+ }\r
+\r
+void MSA::ToMSFFile(TextFile &File, const char *ptrComment) const\r
+ {\r
+// Cast away const, yuck\r
+ SetMSAWeightsMuscle((MSA &) *this);\r
+ MSFFixGaps((MSA &) *this);\r
+\r
+ File.PutString("PileUp\n");\r
+ \r
+ if (0 != ptrComment)\r
+ File.PutFormat("Comment: %s\n", ptrComment);\r
+ else\r
+ File.PutString("\n");\r
+\r
+ char seqtype = (g_Alpha == ALPHA_DNA || g_Alpha == ALPHA_RNA) ? 'N' : 'A';\r
+ File.PutFormat(" MSF: %u Type: %c Check: 0000 ..\n\n",\r
+ GetColCount(), seqtype);\r
+\r
+ int iLongestNameLength = 0;\r
+ for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)\r
+ {\r
+ const char *Name = GetSeqName(uSeqIndex);\r
+ const char *PaddedName = GetPaddedName(Name, MAX_NAME);\r
+ int iLength = (int) strcspn(PaddedName, " \t");\r
+ if (iLength > iLongestNameLength)\r
+ iLongestNameLength = iLength;\r
+ }\r
+ \r
+ for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)\r
+ {\r
+ const char *Name = GetSeqName(uSeqIndex);\r
+ const char *PaddedName = GetPaddedName(Name, iLongestNameLength);\r
+ File.PutFormat(" Name: %s", PaddedName);\r
+ File.PutFormat(" Len: %u Check: %5u Weight: %g\n",\r
+ GetColCount(), GetGCGCheckSum(uSeqIndex), GetSeqWeight(uSeqIndex));\r
+ }\r
+ File.PutString("\n//\n");\r
+ if (0 == GetColCount())\r
+ return;\r
+\r
+ unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;\r
+ for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)\r
+ {\r
+ File.PutString("\n");\r
+ unsigned uStartColIndex = uLineIndex*uCharsPerLine;\r
+ unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;\r
+ if (uEndColIndex >= GetColCount())\r
+ uEndColIndex = GetColCount() - 1;\r
+ for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)\r
+ {\r
+ const char *Name = GetSeqName(uSeqIndex);\r
+ const char *PaddedName = GetPaddedName(Name, iLongestNameLength);\r
+ File.PutFormat("%s ", PaddedName);\r
+ for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;\r
+ ++uColIndex)\r
+ {\r
+ if (0 == uColIndex%uCharsPerBlock)\r
+ File.PutString(" ");\r
+ char c = GetChar(uSeqIndex, uColIndex);\r
+ File.PutFormat("%c", c);\r
+ }\r
+ File.PutString("\n");\r
+ }\r
+ }\r
+ }\r