+++ /dev/null
-#include "muscle.h"\r
-#include <stdio.h>\r
-#include <ctype.h>\r
-#include "msa.h"\r
-#include "textfile.h"\r
-\r
-const int MAX_NAME = 63;\r
-\r
-const unsigned uCharsPerLine = 50;\r
-const unsigned uCharsPerBlock = 10;\r
-\r
-// Truncate at first white space or MAX_NAME, whichever comes\r
-// first, then pad with blanks up to PadLength.\r
-static const char *GetPaddedName(const char *Name, int PadLength)\r
- {\r
- static char PaddedName[MAX_NAME+1];\r
- memset(PaddedName, ' ', MAX_NAME);\r
- size_t n = strcspn(Name, " \t");\r
- memcpy(PaddedName, Name, n);\r
- PaddedName[PadLength] = 0;\r
- return PaddedName;\r
- }\r
-\r
-static const char *strfind(const char *s, const char *t)\r
- {\r
- size_t n = strcspn(s, t);\r
- if (0 == n)\r
- return 0;\r
- return s + n;\r
- }\r
-\r
-// GCG checksum code kindly provided by Eric Martel.\r
-unsigned MSA::GetGCGCheckSum(unsigned uSeqIndex) const\r
- {\r
- unsigned CheckSum = 0;\r
- const unsigned uColCount = GetColCount();\r
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
- {\r
- unsigned c = (unsigned) GetChar(uSeqIndex, uColIndex);\r
- CheckSum += c*(uColIndex%57 + 1);\r
- CheckSum %= 10000; \r
- }\r
- return CheckSum;\r
- }\r
-\r
-static void MSFFixGaps(MSA &a)\r
- {\r
- const int SeqCount = a.GetSeqCount();\r
- const int ColCount = a.GetColCount();\r
- for (int SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)\r
- {\r
- for (int ColIndex = 0; ColIndex < ColCount; ++ColIndex)\r
- if (a.IsGap(SeqIndex, ColIndex))\r
- a.SetChar(SeqIndex, ColIndex, '.');\r
- }\r
- }\r
-\r
-void MSA::ToMSFFile(TextFile &File, const char *ptrComment) const\r
- {\r
-// Cast away const, yuck\r
- SetMSAWeightsMuscle((MSA &) *this);\r
- MSFFixGaps((MSA &) *this);\r
-\r
- File.PutString("PileUp\n");\r
- \r
- if (0 != ptrComment)\r
- File.PutFormat("Comment: %s\n", ptrComment);\r
- else\r
- File.PutString("\n");\r
-\r
- char seqtype = (g_Alpha == ALPHA_DNA || g_Alpha == ALPHA_RNA) ? 'N' : 'A';\r
- File.PutFormat(" MSF: %u Type: %c Check: 0000 ..\n\n",\r
- GetColCount(), seqtype);\r
-\r
- int iLongestNameLength = 0;\r
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)\r
- {\r
- const char *Name = GetSeqName(uSeqIndex);\r
- const char *PaddedName = GetPaddedName(Name, MAX_NAME);\r
- int iLength = (int) strcspn(PaddedName, " \t");\r
- if (iLength > iLongestNameLength)\r
- iLongestNameLength = iLength;\r
- }\r
- \r
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)\r
- {\r
- const char *Name = GetSeqName(uSeqIndex);\r
- const char *PaddedName = GetPaddedName(Name, iLongestNameLength);\r
- File.PutFormat(" Name: %s", PaddedName);\r
- File.PutFormat(" Len: %u Check: %5u Weight: %g\n",\r
- GetColCount(), GetGCGCheckSum(uSeqIndex), GetSeqWeight(uSeqIndex));\r
- }\r
- File.PutString("\n//\n");\r
- if (0 == GetColCount())\r
- return;\r
-\r
- unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;\r
- for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)\r
- {\r
- File.PutString("\n");\r
- unsigned uStartColIndex = uLineIndex*uCharsPerLine;\r
- unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;\r
- if (uEndColIndex >= GetColCount())\r
- uEndColIndex = GetColCount() - 1;\r
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)\r
- {\r
- const char *Name = GetSeqName(uSeqIndex);\r
- const char *PaddedName = GetPaddedName(Name, iLongestNameLength);\r
- File.PutFormat("%s ", PaddedName);\r
- for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;\r
- ++uColIndex)\r
- {\r
- if (0 == uColIndex%uCharsPerBlock)\r
- File.PutString(" ");\r
- char c = GetChar(uSeqIndex, uColIndex);\r
- File.PutFormat("%c", c);\r
- }\r
- File.PutString("\n");\r
- }\r
- }\r
- }\r