--- /dev/null
+#include "muscle.h"\r
+#include "seq.h"\r
+#include "textfile.h"\r
+#include "msa.h"\r
+//#include <ctype.h>\r
+\r
+const size_t MAX_FASTA_LINE = 16000;\r
+\r
+void Seq::SetName(const char *ptrName)\r
+ {\r
+ delete[] m_ptrName;\r
+ size_t n = strlen(ptrName) + 1;\r
+ m_ptrName = new char[n];\r
+ strcpy(m_ptrName, ptrName);\r
+ }\r
+\r
+void Seq::ToFASTAFile(TextFile &File) const\r
+ {\r
+ File.PutFormat(">%s\n", m_ptrName);\r
+ unsigned uColCount = Length();\r
+ for (unsigned n = 0; n < uColCount; ++n)\r
+ {\r
+ if (n > 0 && n%60 == 0)\r
+ File.PutString("\n");\r
+ File.PutChar(at(n));\r
+ }\r
+ File.PutString("\n");\r
+ }\r
+\r
+// Return true on end-of-file\r
+bool Seq::FromFASTAFile(TextFile &File)\r
+ {\r
+ Clear();\r
+\r
+ char szLine[MAX_FASTA_LINE];\r
+ bool bEof = File.GetLine(szLine, sizeof(szLine));\r
+ if (bEof)\r
+ return true;\r
+ if ('>' != szLine[0])\r
+ Quit("Expecting '>' in FASTA file %s line %u",\r
+ File.GetFileName(), File.GetLineNr());\r
+\r
+ size_t n = strlen(szLine);\r
+ if (1 == n)\r
+ Quit("Missing annotation following '>' in FASTA file %s line %u",\r
+ File.GetFileName(), File.GetLineNr());\r
+\r
+ m_ptrName = new char[n];\r
+ strcpy(m_ptrName, szLine + 1);\r
+\r
+ TEXTFILEPOS Pos = File.GetPos();\r
+ for (;;)\r
+ {\r
+ bEof = File.GetLine(szLine, sizeof(szLine));\r
+ if (bEof)\r
+ {\r
+ if (0 == size())\r
+ {\r
+ Quit("Empty sequence in FASTA file %s line %u",\r
+ File.GetFileName(), File.GetLineNr());\r
+ return true;\r
+ }\r
+ return false;\r
+ }\r
+ if ('>' == szLine[0])\r
+ {\r
+ if (0 == size())\r
+ Quit("Empty sequence in FASTA file %s line %u",\r
+ File.GetFileName(), File.GetLineNr());\r
+ // Rewind to beginning of this line, it's the start of the\r
+ // next sequence.\r
+ File.SetPos(Pos);\r
+ return false;\r
+ }\r
+ const char *ptrChar = szLine;\r
+ while (char c = *ptrChar++)\r
+ {\r
+ if (isspace(c))\r
+ continue;\r
+ if (IsGapChar(c))\r
+ continue;\r
+ if (!IsResidueChar(c))\r
+ {\r
+ if (isprint(c))\r
+ {\r
+ char w = GetWildcardChar();\r
+ Warning("Invalid residue '%c' in FASTA file %s line %d, replaced by '%c'",\r
+ c, File.GetFileName(), File.GetLineNr(), w);\r
+ c = w;\r
+ }\r
+ else\r
+ Quit("Invalid byte hex %02x in FASTA file %s line %d",\r
+ (unsigned char) c, File.GetFileName(), File.GetLineNr());\r
+ }\r
+ c = toupper(c);\r
+ push_back(c);\r
+ }\r
+ Pos = File.GetPos();\r
+ }\r
+ }\r
+\r
+void Seq::ExtractUngapped(MSA &msa) const\r
+ {\r
+ msa.Clear();\r
+ unsigned uColCount = Length();\r
+ msa.SetSize(1, 1);\r
+ unsigned uUngappedPos = 0;\r
+ for (unsigned n = 0; n < uColCount; ++n)\r
+ {\r
+ char c = at(n);\r
+ if (!IsGapChar(c))\r
+ msa.SetChar(0, uUngappedPos++, c);\r
+ }\r
+ msa.SetSeqName(0, m_ptrName);\r
+ }\r
+\r
+void Seq::Copy(const Seq &rhs)\r
+ {\r
+ clear();\r
+ const unsigned uLength = rhs.Length();\r
+ for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)\r
+ push_back(rhs.at(uColIndex));\r
+ const char *ptrName = rhs.GetName();\r
+ size_t n = strlen(ptrName) + 1;\r
+ m_ptrName = new char[n];\r
+ strcpy(m_ptrName, ptrName);\r
+ SetId(rhs.GetId());\r
+ }\r
+\r
+void Seq::CopyReversed(const Seq &rhs)\r
+ {\r
+ clear();\r
+ const unsigned uLength = rhs.Length();\r
+ const unsigned uBase = rhs.Length() - 1;\r
+ for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)\r
+ push_back(rhs.at(uBase - uColIndex));\r
+ const char *ptrName = rhs.GetName();\r
+ size_t n = strlen(ptrName) + 1;\r
+ m_ptrName = new char[n];\r
+ strcpy(m_ptrName, ptrName);\r
+ }\r
+\r
+void Seq::StripGaps()\r
+ {\r
+ for (CharVect::iterator p = begin(); p != end(); )\r
+ {\r
+ char c = *p;\r
+ if (IsGapChar(c))\r
+ erase(p);\r
+ else\r
+ ++p;\r
+ }\r
+ }\r
+\r
+void Seq::StripGapsAndWhitespace()\r
+ {\r
+ for (CharVect::iterator p = begin(); p != end(); )\r
+ {\r
+ char c = *p;\r
+ if (isspace(c) || IsGapChar(c))\r
+ erase(p);\r
+ else\r
+ ++p;\r
+ }\r
+ }\r
+\r
+void Seq::ToUpper()\r
+ {\r
+ for (CharVect::iterator p = begin(); p != end(); ++p)\r
+ {\r
+ char c = *p;\r
+ if (islower(c))\r
+ *p = toupper(c);\r
+ }\r
+ }\r
+\r
+unsigned Seq::GetLetter(unsigned uIndex) const\r
+ {\r
+ assert(uIndex < Length());\r
+ char c = operator[](uIndex);\r
+ return CharToLetter(c);\r
+ }\r
+\r
+bool Seq::EqIgnoreCase(const Seq &s) const\r
+ {\r
+ const unsigned n = Length();\r
+ if (n != s.Length())\r
+ return false;\r
+ for (unsigned i = 0; i < n; ++i)\r
+ {\r
+ const char c1 = at(i);\r
+ const char c2 = s.at(i);\r
+ if (IsGapChar(c1))\r
+ {\r
+ if (!IsGapChar(c2))\r
+ return false;\r
+ }\r
+ else\r
+ {\r
+ if (toupper(c1) != toupper(c2))\r
+ return false;\r
+ }\r
+ }\r
+ return true;\r
+ }\r
+\r
+bool Seq::Eq(const Seq &s) const\r
+ {\r
+ const unsigned n = Length();\r
+ if (n != s.Length())\r
+ return false;\r
+ for (unsigned i = 0; i < n; ++i)\r
+ {\r
+ const char c1 = at(i);\r
+ const char c2 = s.at(i);\r
+ if (c1 != c2)\r
+ return false;\r
+ }\r
+ return true;\r
+ }\r
+\r
+bool Seq::EqIgnoreCaseAndGaps(const Seq &s) const\r
+ {\r
+ const unsigned uThisLength = Length();\r
+ const unsigned uOtherLength = s.Length();\r
+ \r
+ unsigned uThisPos = 0;\r
+ unsigned uOtherPos = 0;\r
+\r
+ int cThis;\r
+ int cOther;\r
+ for (;;)\r
+ {\r
+ if (uThisPos == uThisLength && uOtherPos == uOtherLength)\r
+ break;\r
+\r
+ // Set cThis to next non-gap character in this string\r
+ // or -1 if end-of-string.\r
+ for (;;)\r
+ {\r
+ if (uThisPos == uThisLength)\r
+ {\r
+ cThis = -1;\r
+ break;\r
+ }\r
+ else\r
+ {\r
+ cThis = at(uThisPos);\r
+ ++uThisPos;\r
+ if (!IsGapChar(cThis))\r
+ {\r
+ cThis = toupper(cThis);\r
+ break;\r
+ }\r
+ }\r
+ }\r
+\r
+ // Set cOther to next non-gap character in s\r
+ // or -1 if end-of-string.\r
+ for (;;)\r
+ {\r
+ if (uOtherPos == uOtherLength)\r
+ {\r
+ cOther = -1;\r
+ break;\r
+ }\r
+ else\r
+ {\r
+ cOther = s.at(uOtherPos);\r
+ ++uOtherPos;\r
+ if (!IsGapChar(cOther))\r
+ {\r
+ cOther = toupper(cOther);\r
+ break;\r
+ }\r
+ }\r
+ }\r
+\r
+ // Compare characters are corresponding ungapped position\r
+ if (cThis != cOther)\r
+ return false;\r
+ }\r
+ return true;\r
+ }\r
+\r
+unsigned Seq::GetUngappedLength() const\r
+ {\r
+ unsigned uUngappedLength = 0;\r
+ for (CharVect::const_iterator p = begin(); p != end(); ++p)\r
+ {\r
+ char c = *p;\r
+ if (!IsGapChar(c))\r
+ ++uUngappedLength;\r
+ }\r
+ return uUngappedLength;\r
+ }\r
+\r
+void Seq::LogMe() const\r
+ {\r
+ Log(">%s\n", m_ptrName);\r
+ const unsigned n = Length();\r
+ for (unsigned i = 0; i < n; ++i)\r
+ Log("%c", at(i));\r
+ Log("\n");\r
+ }\r
+\r
+void Seq::FromString(const char *pstrSeq, const char *pstrName)\r
+ {\r
+ clear();\r
+ const unsigned uLength = (unsigned) strlen(pstrSeq);\r
+ for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)\r
+ push_back(pstrSeq[uColIndex]);\r
+ size_t n = strlen(pstrName) + 1;\r
+ m_ptrName = new char[n];\r
+ strcpy(m_ptrName, pstrName);\r
+ }\r
+\r
+bool Seq::HasGap() const\r
+ {\r
+ for (CharVect::const_iterator p = begin(); p != end(); ++p)\r
+ {\r
+ char c = *p;\r
+ if (IsGapChar(c))\r
+ return true;\r
+ }\r
+ return false;\r
+ }\r
+\r
+void Seq::FixAlpha()\r
+ {\r
+ for (CharVect::iterator p = begin(); p != end(); ++p)\r
+ {\r
+ char c = *p;\r
+ if (!IsResidueChar(c))\r
+ {\r
+ char w = GetWildcardChar();\r
+ // Warning("Invalid residue '%c', replaced by '%c'", c, w);\r
+ InvalidLetterWarning(c, w);\r
+ *p = w;\r
+ }\r
+ }\r
+ }\r