Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / muscle / seq.cpp
diff --git a/website/archive/binaries/mac/src/muscle/seq.cpp b/website/archive/binaries/mac/src/muscle/seq.cpp
new file mode 100644 (file)
index 0000000..e682758
--- /dev/null
@@ -0,0 +1,342 @@
+#include "muscle.h"\r
+#include "seq.h"\r
+#include "textfile.h"\r
+#include "msa.h"\r
+//#include <ctype.h>\r
+\r
+const size_t MAX_FASTA_LINE = 16000;\r
+\r
+void Seq::SetName(const char *ptrName)\r
+       {\r
+       delete[] m_ptrName;\r
+       size_t n = strlen(ptrName) + 1;\r
+       m_ptrName = new char[n];\r
+       strcpy(m_ptrName, ptrName);\r
+       }\r
+\r
+void Seq::ToFASTAFile(TextFile &File) const\r
+       {\r
+       File.PutFormat(">%s\n", m_ptrName);\r
+       unsigned uColCount = Length();\r
+       for (unsigned n = 0; n < uColCount; ++n)\r
+               {\r
+               if (n > 0 && n%60 == 0)\r
+                       File.PutString("\n");\r
+               File.PutChar(at(n));\r
+               }\r
+       File.PutString("\n");\r
+       }\r
+\r
+// Return true on end-of-file\r
+bool Seq::FromFASTAFile(TextFile &File)\r
+       {\r
+       Clear();\r
+\r
+       char szLine[MAX_FASTA_LINE];\r
+       bool bEof = File.GetLine(szLine, sizeof(szLine));\r
+       if (bEof)\r
+               return true;\r
+       if ('>' != szLine[0])\r
+               Quit("Expecting '>' in FASTA file %s line %u",\r
+                 File.GetFileName(), File.GetLineNr());\r
+\r
+       size_t n = strlen(szLine);\r
+       if (1 == n)\r
+               Quit("Missing annotation following '>' in FASTA file %s line %u",\r
+                 File.GetFileName(), File.GetLineNr());\r
+\r
+       m_ptrName = new char[n];\r
+       strcpy(m_ptrName, szLine + 1);\r
+\r
+       TEXTFILEPOS Pos = File.GetPos();\r
+       for (;;)\r
+               {\r
+               bEof = File.GetLine(szLine, sizeof(szLine));\r
+               if (bEof)\r
+                       {\r
+                       if (0 == size())\r
+                               {\r
+                               Quit("Empty sequence in FASTA file %s line %u",\r
+                                 File.GetFileName(), File.GetLineNr());\r
+                               return true;\r
+                               }\r
+                       return false;\r
+                       }\r
+               if ('>' == szLine[0])\r
+                       {\r
+                       if (0 == size())\r
+                               Quit("Empty sequence in FASTA file %s line %u",\r
+                                 File.GetFileName(), File.GetLineNr());\r
+               // Rewind to beginning of this line, it's the start of the\r
+               // next sequence.\r
+                       File.SetPos(Pos);\r
+                       return false;\r
+                       }\r
+               const char *ptrChar = szLine;\r
+               while (char c = *ptrChar++)\r
+                       {\r
+                       if (isspace(c))\r
+                               continue;\r
+                       if (IsGapChar(c))\r
+                               continue;\r
+                       if (!IsResidueChar(c))\r
+                               {\r
+                               if (isprint(c))\r
+                                       {\r
+                                       char w = GetWildcardChar();\r
+                                       Warning("Invalid residue '%c' in FASTA file %s line %d, replaced by '%c'",\r
+                                         c, File.GetFileName(), File.GetLineNr(), w);\r
+                                       c = w;\r
+                                       }\r
+                               else\r
+                                       Quit("Invalid byte hex %02x in FASTA file %s line %d",\r
+                                         (unsigned char) c, File.GetFileName(), File.GetLineNr());\r
+                               }\r
+                       c = toupper(c);\r
+                       push_back(c);\r
+                       }\r
+               Pos = File.GetPos();\r
+               }\r
+       }\r
+\r
+void Seq::ExtractUngapped(MSA &msa) const\r
+       {\r
+       msa.Clear();\r
+       unsigned uColCount = Length();\r
+       msa.SetSize(1, 1);\r
+       unsigned uUngappedPos = 0;\r
+       for (unsigned n = 0; n < uColCount; ++n)\r
+               {\r
+               char c = at(n);\r
+               if (!IsGapChar(c))\r
+                       msa.SetChar(0, uUngappedPos++, c);\r
+               }\r
+       msa.SetSeqName(0, m_ptrName);\r
+       }\r
+\r
+void Seq::Copy(const Seq &rhs)\r
+       {\r
+       clear();\r
+       const unsigned uLength = rhs.Length();\r
+       for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)\r
+               push_back(rhs.at(uColIndex));\r
+       const char *ptrName = rhs.GetName();\r
+       size_t n = strlen(ptrName) + 1;\r
+       m_ptrName = new char[n];\r
+       strcpy(m_ptrName, ptrName);\r
+       SetId(rhs.GetId());\r
+       }\r
+\r
+void Seq::CopyReversed(const Seq &rhs)\r
+       {\r
+       clear();\r
+       const unsigned uLength = rhs.Length();\r
+       const unsigned uBase = rhs.Length() - 1;\r
+       for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)\r
+               push_back(rhs.at(uBase - uColIndex));\r
+       const char *ptrName = rhs.GetName();\r
+       size_t n = strlen(ptrName) + 1;\r
+       m_ptrName = new char[n];\r
+       strcpy(m_ptrName, ptrName);\r
+       }\r
+\r
+void Seq::StripGaps()\r
+       {\r
+       for (CharVect::iterator p = begin(); p != end(); )\r
+               {\r
+               char c = *p;\r
+               if (IsGapChar(c))\r
+                       erase(p);\r
+               else\r
+                       ++p;\r
+               }\r
+       }\r
+\r
+void Seq::StripGapsAndWhitespace()\r
+       {\r
+       for (CharVect::iterator p = begin(); p != end(); )\r
+               {\r
+               char c = *p;\r
+               if (isspace(c) || IsGapChar(c))\r
+                       erase(p);\r
+               else\r
+                       ++p;\r
+               }\r
+       }\r
+\r
+void Seq::ToUpper()\r
+       {\r
+       for (CharVect::iterator p = begin(); p != end(); ++p)\r
+               {\r
+               char c = *p;\r
+               if (islower(c))\r
+                       *p = toupper(c);\r
+               }\r
+       }\r
+\r
+unsigned Seq::GetLetter(unsigned uIndex) const\r
+       {\r
+       assert(uIndex < Length());\r
+       char c = operator[](uIndex);\r
+       return CharToLetter(c);\r
+       }\r
+\r
+bool Seq::EqIgnoreCase(const Seq &s) const\r
+       {\r
+       const unsigned n = Length();\r
+       if (n != s.Length())\r
+               return false;\r
+       for (unsigned i = 0; i < n; ++i)\r
+               {\r
+               const char c1 = at(i);\r
+               const char c2 = s.at(i);\r
+               if (IsGapChar(c1))\r
+                       {\r
+                       if (!IsGapChar(c2))\r
+                               return false;\r
+                       }\r
+               else\r
+                       {\r
+                       if (toupper(c1) != toupper(c2))\r
+                               return false;\r
+                       }\r
+               }\r
+       return true;\r
+       }\r
+\r
+bool Seq::Eq(const Seq &s) const\r
+       {\r
+       const unsigned n = Length();\r
+       if (n != s.Length())\r
+               return false;\r
+       for (unsigned i = 0; i < n; ++i)\r
+               {\r
+               const char c1 = at(i);\r
+               const char c2 = s.at(i);\r
+               if (c1 != c2)\r
+                       return false;\r
+               }\r
+       return true;\r
+       }\r
+\r
+bool Seq::EqIgnoreCaseAndGaps(const Seq &s) const\r
+       {\r
+       const unsigned uThisLength = Length();\r
+       const unsigned uOtherLength = s.Length();\r
+       \r
+       unsigned uThisPos = 0;\r
+       unsigned uOtherPos = 0;\r
+\r
+       int cThis;\r
+       int cOther;\r
+       for (;;)\r
+               {\r
+               if (uThisPos == uThisLength && uOtherPos == uOtherLength)\r
+                       break;\r
+\r
+       // Set cThis to next non-gap character in this string\r
+       // or -1 if end-of-string.\r
+               for (;;)\r
+                       {\r
+                       if (uThisPos == uThisLength)\r
+                               {\r
+                               cThis = -1;\r
+                               break;\r
+                               }\r
+                       else\r
+                               {\r
+                               cThis = at(uThisPos);\r
+                               ++uThisPos;\r
+                               if (!IsGapChar(cThis))\r
+                                       {\r
+                                       cThis = toupper(cThis);\r
+                                       break;\r
+                                       }\r
+                               }\r
+                       }\r
+\r
+       // Set cOther to next non-gap character in s\r
+       // or -1 if end-of-string.\r
+               for (;;)\r
+                       {\r
+                       if (uOtherPos == uOtherLength)\r
+                               {\r
+                               cOther = -1;\r
+                               break;\r
+                               }\r
+                       else\r
+                               {\r
+                               cOther = s.at(uOtherPos);\r
+                               ++uOtherPos;\r
+                               if (!IsGapChar(cOther))\r
+                                       {\r
+                                       cOther = toupper(cOther);\r
+                                       break;\r
+                                       }\r
+                               }\r
+                       }\r
+\r
+       // Compare characters are corresponding ungapped position\r
+               if (cThis != cOther)\r
+                       return false;\r
+               }\r
+       return true;\r
+       }\r
+\r
+unsigned Seq::GetUngappedLength() const\r
+       {\r
+       unsigned uUngappedLength = 0;\r
+       for (CharVect::const_iterator p = begin(); p != end(); ++p)\r
+               {\r
+               char c = *p;\r
+               if (!IsGapChar(c))\r
+                       ++uUngappedLength;\r
+               }\r
+       return uUngappedLength;\r
+       }\r
+\r
+void Seq::LogMe() const\r
+       {\r
+       Log(">%s\n", m_ptrName);\r
+       const unsigned n = Length();\r
+       for (unsigned i = 0; i < n; ++i)\r
+               Log("%c", at(i));\r
+       Log("\n");\r
+       }\r
+\r
+void Seq::FromString(const char *pstrSeq, const char *pstrName)\r
+       {\r
+       clear();\r
+       const unsigned uLength = (unsigned) strlen(pstrSeq);\r
+       for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)\r
+               push_back(pstrSeq[uColIndex]);\r
+       size_t n = strlen(pstrName) + 1;\r
+       m_ptrName = new char[n];\r
+       strcpy(m_ptrName, pstrName);\r
+       }\r
+\r
+bool Seq::HasGap() const\r
+       {\r
+       for (CharVect::const_iterator p = begin(); p != end(); ++p)\r
+               {\r
+               char c = *p;\r
+               if (IsGapChar(c))\r
+                       return true;\r
+               }\r
+       return false;\r
+       }\r
+\r
+void Seq::FixAlpha()\r
+       {\r
+       for (CharVect::iterator p = begin(); p != end(); ++p)\r
+               {\r
+               char c = *p;\r
+               if (!IsResidueChar(c))\r
+                       {\r
+                       char w = GetWildcardChar();\r
+                       // Warning("Invalid residue '%c', replaced by '%c'", c, w);\r
+                       InvalidLetterWarning(c, w);\r
+                       *p = w;\r
+                       }\r
+               }\r
+       }\r