+++ /dev/null
-#include "muscle.h"\r
-#include "seq.h"\r
-#include "textfile.h"\r
-#include "msa.h"\r
-//#include <ctype.h>\r
-\r
-const size_t MAX_FASTA_LINE = 16000;\r
-\r
-void Seq::SetName(const char *ptrName)\r
- {\r
- delete[] m_ptrName;\r
- size_t n = strlen(ptrName) + 1;\r
- m_ptrName = new char[n];\r
- strcpy(m_ptrName, ptrName);\r
- }\r
-\r
-void Seq::ToFASTAFile(TextFile &File) const\r
- {\r
- File.PutFormat(">%s\n", m_ptrName);\r
- unsigned uColCount = Length();\r
- for (unsigned n = 0; n < uColCount; ++n)\r
- {\r
- if (n > 0 && n%60 == 0)\r
- File.PutString("\n");\r
- File.PutChar(at(n));\r
- }\r
- File.PutString("\n");\r
- }\r
-\r
-// Return true on end-of-file\r
-bool Seq::FromFASTAFile(TextFile &File)\r
- {\r
- Clear();\r
-\r
- char szLine[MAX_FASTA_LINE];\r
- bool bEof = File.GetLine(szLine, sizeof(szLine));\r
- if (bEof)\r
- return true;\r
- if ('>' != szLine[0])\r
- Quit("Expecting '>' in FASTA file %s line %u",\r
- File.GetFileName(), File.GetLineNr());\r
-\r
- size_t n = strlen(szLine);\r
- if (1 == n)\r
- Quit("Missing annotation following '>' in FASTA file %s line %u",\r
- File.GetFileName(), File.GetLineNr());\r
-\r
- m_ptrName = new char[n];\r
- strcpy(m_ptrName, szLine + 1);\r
-\r
- TEXTFILEPOS Pos = File.GetPos();\r
- for (;;)\r
- {\r
- bEof = File.GetLine(szLine, sizeof(szLine));\r
- if (bEof)\r
- {\r
- if (0 == size())\r
- {\r
- Quit("Empty sequence in FASTA file %s line %u",\r
- File.GetFileName(), File.GetLineNr());\r
- return true;\r
- }\r
- return false;\r
- }\r
- if ('>' == szLine[0])\r
- {\r
- if (0 == size())\r
- Quit("Empty sequence in FASTA file %s line %u",\r
- File.GetFileName(), File.GetLineNr());\r
- // Rewind to beginning of this line, it's the start of the\r
- // next sequence.\r
- File.SetPos(Pos);\r
- return false;\r
- }\r
- const char *ptrChar = szLine;\r
- while (char c = *ptrChar++)\r
- {\r
- if (isspace(c))\r
- continue;\r
- if (IsGapChar(c))\r
- continue;\r
- if (!IsResidueChar(c))\r
- {\r
- if (isprint(c))\r
- {\r
- char w = GetWildcardChar();\r
- Warning("Invalid residue '%c' in FASTA file %s line %d, replaced by '%c'",\r
- c, File.GetFileName(), File.GetLineNr(), w);\r
- c = w;\r
- }\r
- else\r
- Quit("Invalid byte hex %02x in FASTA file %s line %d",\r
- (unsigned char) c, File.GetFileName(), File.GetLineNr());\r
- }\r
- c = toupper(c);\r
- push_back(c);\r
- }\r
- Pos = File.GetPos();\r
- }\r
- }\r
-\r
-void Seq::ExtractUngapped(MSA &msa) const\r
- {\r
- msa.Clear();\r
- unsigned uColCount = Length();\r
- msa.SetSize(1, 1);\r
- unsigned uUngappedPos = 0;\r
- for (unsigned n = 0; n < uColCount; ++n)\r
- {\r
- char c = at(n);\r
- if (!IsGapChar(c))\r
- msa.SetChar(0, uUngappedPos++, c);\r
- }\r
- msa.SetSeqName(0, m_ptrName);\r
- }\r
-\r
-void Seq::Copy(const Seq &rhs)\r
- {\r
- clear();\r
- const unsigned uLength = rhs.Length();\r
- for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)\r
- push_back(rhs.at(uColIndex));\r
- const char *ptrName = rhs.GetName();\r
- size_t n = strlen(ptrName) + 1;\r
- m_ptrName = new char[n];\r
- strcpy(m_ptrName, ptrName);\r
- SetId(rhs.GetId());\r
- }\r
-\r
-void Seq::CopyReversed(const Seq &rhs)\r
- {\r
- clear();\r
- const unsigned uLength = rhs.Length();\r
- const unsigned uBase = rhs.Length() - 1;\r
- for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)\r
- push_back(rhs.at(uBase - uColIndex));\r
- const char *ptrName = rhs.GetName();\r
- size_t n = strlen(ptrName) + 1;\r
- m_ptrName = new char[n];\r
- strcpy(m_ptrName, ptrName);\r
- }\r
-\r
-void Seq::StripGaps()\r
- {\r
- for (CharVect::iterator p = begin(); p != end(); )\r
- {\r
- char c = *p;\r
- if (IsGapChar(c))\r
- erase(p);\r
- else\r
- ++p;\r
- }\r
- }\r
-\r
-void Seq::StripGapsAndWhitespace()\r
- {\r
- for (CharVect::iterator p = begin(); p != end(); )\r
- {\r
- char c = *p;\r
- if (isspace(c) || IsGapChar(c))\r
- erase(p);\r
- else\r
- ++p;\r
- }\r
- }\r
-\r
-void Seq::ToUpper()\r
- {\r
- for (CharVect::iterator p = begin(); p != end(); ++p)\r
- {\r
- char c = *p;\r
- if (islower(c))\r
- *p = toupper(c);\r
- }\r
- }\r
-\r
-unsigned Seq::GetLetter(unsigned uIndex) const\r
- {\r
- assert(uIndex < Length());\r
- char c = operator[](uIndex);\r
- return CharToLetter(c);\r
- }\r
-\r
-bool Seq::EqIgnoreCase(const Seq &s) const\r
- {\r
- const unsigned n = Length();\r
- if (n != s.Length())\r
- return false;\r
- for (unsigned i = 0; i < n; ++i)\r
- {\r
- const char c1 = at(i);\r
- const char c2 = s.at(i);\r
- if (IsGapChar(c1))\r
- {\r
- if (!IsGapChar(c2))\r
- return false;\r
- }\r
- else\r
- {\r
- if (toupper(c1) != toupper(c2))\r
- return false;\r
- }\r
- }\r
- return true;\r
- }\r
-\r
-bool Seq::Eq(const Seq &s) const\r
- {\r
- const unsigned n = Length();\r
- if (n != s.Length())\r
- return false;\r
- for (unsigned i = 0; i < n; ++i)\r
- {\r
- const char c1 = at(i);\r
- const char c2 = s.at(i);\r
- if (c1 != c2)\r
- return false;\r
- }\r
- return true;\r
- }\r
-\r
-bool Seq::EqIgnoreCaseAndGaps(const Seq &s) const\r
- {\r
- const unsigned uThisLength = Length();\r
- const unsigned uOtherLength = s.Length();\r
- \r
- unsigned uThisPos = 0;\r
- unsigned uOtherPos = 0;\r
-\r
- int cThis;\r
- int cOther;\r
- for (;;)\r
- {\r
- if (uThisPos == uThisLength && uOtherPos == uOtherLength)\r
- break;\r
-\r
- // Set cThis to next non-gap character in this string\r
- // or -1 if end-of-string.\r
- for (;;)\r
- {\r
- if (uThisPos == uThisLength)\r
- {\r
- cThis = -1;\r
- break;\r
- }\r
- else\r
- {\r
- cThis = at(uThisPos);\r
- ++uThisPos;\r
- if (!IsGapChar(cThis))\r
- {\r
- cThis = toupper(cThis);\r
- break;\r
- }\r
- }\r
- }\r
-\r
- // Set cOther to next non-gap character in s\r
- // or -1 if end-of-string.\r
- for (;;)\r
- {\r
- if (uOtherPos == uOtherLength)\r
- {\r
- cOther = -1;\r
- break;\r
- }\r
- else\r
- {\r
- cOther = s.at(uOtherPos);\r
- ++uOtherPos;\r
- if (!IsGapChar(cOther))\r
- {\r
- cOther = toupper(cOther);\r
- break;\r
- }\r
- }\r
- }\r
-\r
- // Compare characters are corresponding ungapped position\r
- if (cThis != cOther)\r
- return false;\r
- }\r
- return true;\r
- }\r
-\r
-unsigned Seq::GetUngappedLength() const\r
- {\r
- unsigned uUngappedLength = 0;\r
- for (CharVect::const_iterator p = begin(); p != end(); ++p)\r
- {\r
- char c = *p;\r
- if (!IsGapChar(c))\r
- ++uUngappedLength;\r
- }\r
- return uUngappedLength;\r
- }\r
-\r
-void Seq::LogMe() const\r
- {\r
- Log(">%s\n", m_ptrName);\r
- const unsigned n = Length();\r
- for (unsigned i = 0; i < n; ++i)\r
- Log("%c", at(i));\r
- Log("\n");\r
- }\r
-\r
-void Seq::FromString(const char *pstrSeq, const char *pstrName)\r
- {\r
- clear();\r
- const unsigned uLength = (unsigned) strlen(pstrSeq);\r
- for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)\r
- push_back(pstrSeq[uColIndex]);\r
- size_t n = strlen(pstrName) + 1;\r
- m_ptrName = new char[n];\r
- strcpy(m_ptrName, pstrName);\r
- }\r
-\r
-bool Seq::HasGap() const\r
- {\r
- for (CharVect::const_iterator p = begin(); p != end(); ++p)\r
- {\r
- char c = *p;\r
- if (IsGapChar(c))\r
- return true;\r
- }\r
- return false;\r
- }\r
-\r
-void Seq::FixAlpha()\r
- {\r
- for (CharVect::iterator p = begin(); p != end(); ++p)\r
- {\r
- char c = *p;\r
- if (!IsResidueChar(c))\r
- {\r
- char w = GetWildcardChar();\r
- // Warning("Invalid residue '%c', replaced by '%c'", c, w);\r
- InvalidLetterWarning(c, w);\r
- *p = w;\r
- }\r
- }\r
- }\r