+++ /dev/null
-#include "muscle.h"\r
-#include "seqvect.h"\r
-#include "textfile.h"\r
-#include "msa.h"\r
-\r
-const size_t MAX_FASTA_LINE = 16000;\r
-\r
-SeqVect::~SeqVect()\r
- {\r
- Clear();\r
- }\r
-\r
-void SeqVect::Clear()\r
- {\r
- for (size_t n = 0; n < size(); ++n)\r
- delete (*this)[n];\r
- }\r
-\r
-void SeqVect::ToFASTAFile(TextFile &File) const\r
- {\r
- unsigned uSeqCount = Length();\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- Seq *ptrSeq = at(uSeqIndex);\r
- ptrSeq->ToFASTAFile(File);\r
- }\r
- }\r
-\r
-void SeqVect::FromFASTAFile(TextFile &File)\r
- {\r
- Clear();\r
-\r
- FILE *f = File.GetStdioFile();\r
- for (;;)\r
- {\r
- char *Label;\r
- unsigned uLength;\r
- char *SeqData = GetFastaSeq(f, &uLength, &Label);\r
- if (0 == SeqData)\r
- return;\r
- Seq *ptrSeq = new Seq;\r
-\r
- for (unsigned i = 0; i < uLength; ++i)\r
- {\r
- char c = SeqData[i];\r
- ptrSeq->push_back(c);\r
- }\r
-\r
- ptrSeq->SetName(Label);\r
- push_back(ptrSeq);\r
-\r
- delete[] SeqData;\r
- delete[] Label;\r
- }\r
- }\r
-\r
-void SeqVect::PadToMSA(MSA &msa)\r
- {\r
- unsigned uSeqCount = Length();\r
- if (0 == uSeqCount)\r
- {\r
- msa.Clear();\r
- return;\r
- }\r
-\r
- unsigned uLongestSeqLength = 0;\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- Seq *ptrSeq = at(uSeqIndex);\r
- unsigned uColCount = ptrSeq->Length();\r
- if (uColCount > uLongestSeqLength)\r
- uLongestSeqLength = uColCount;\r
- }\r
- msa.SetSize(uSeqCount, uLongestSeqLength);\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- Seq *ptrSeq = at(uSeqIndex);\r
- msa.SetSeqName(uSeqIndex, ptrSeq->GetName());\r
- unsigned uColCount = ptrSeq->Length();\r
- unsigned uColIndex;\r
- for (uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
- {\r
- char c = ptrSeq->at(uColIndex);\r
- msa.SetChar(uSeqIndex, uColIndex, c);\r
- }\r
- while (uColIndex < uLongestSeqLength)\r
- msa.SetChar(uSeqIndex, uColIndex++, '.');\r
- }\r
- }\r
-\r
-void SeqVect::Copy(const SeqVect &rhs)\r
- {\r
- clear();\r
- unsigned uSeqCount = rhs.Length();\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- Seq *ptrSeq = rhs.at(uSeqIndex);\r
- Seq *ptrSeqCopy = new Seq;\r
- ptrSeqCopy->Copy(*ptrSeq);\r
- push_back(ptrSeqCopy);\r
- }\r
- }\r
-\r
-void SeqVect::StripGaps()\r
- {\r
- unsigned uSeqCount = Length();\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- Seq *ptrSeq = at(uSeqIndex);\r
- ptrSeq->StripGaps();\r
- }\r
- }\r
-\r
-void SeqVect::StripGapsAndWhitespace()\r
- {\r
- unsigned uSeqCount = Length();\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- Seq *ptrSeq = at(uSeqIndex);\r
- ptrSeq->StripGapsAndWhitespace();\r
- }\r
- }\r
-\r
-void SeqVect::ToUpper()\r
- {\r
- unsigned uSeqCount = Length();\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- Seq *ptrSeq = at(uSeqIndex);\r
- ptrSeq->ToUpper();\r
- }\r
- }\r
-\r
-bool SeqVect::FindName(const char *ptrName, unsigned *ptruIndex) const\r
- {\r
- unsigned uSeqCount = Length();\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- const Seq *ptrSeq = at(uSeqIndex);\r
- if (0 == stricmp(ptrSeq->GetName(), ptrName))\r
- {\r
- *ptruIndex = uSeqIndex;\r
- return true;\r
- }\r
- }\r
- return false;\r
- }\r
-\r
-void SeqVect::AppendSeq(const Seq &s)\r
- {\r
- Seq *ptrSeqCopy = new Seq;\r
- ptrSeqCopy->Copy(s);\r
- push_back(ptrSeqCopy);\r
- }\r
-\r
-void SeqVect::LogMe() const\r
- {\r
- unsigned uSeqCount = Length();\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- const Seq *ptrSeq = at(uSeqIndex);\r
- ptrSeq->LogMe();\r
- }\r
- }\r
-\r
-const char *SeqVect::GetSeqName(unsigned uSeqIndex) const\r
- {\r
- assert(uSeqIndex < size());\r
- const Seq *ptrSeq = at(uSeqIndex);\r
- return ptrSeq->GetName();\r
- }\r
-\r
-unsigned SeqVect::GetSeqId(unsigned uSeqIndex) const\r
- {\r
- assert(uSeqIndex < size());\r
- const Seq *ptrSeq = at(uSeqIndex);\r
- return ptrSeq->GetId();\r
- }\r
-\r
-unsigned SeqVect::GetSeqIdFromName(const char *Name) const\r
- {\r
- const unsigned uSeqCount = GetSeqCount();\r
- for (unsigned i = 0; i < uSeqCount; ++i)\r
- {\r
- if (!strcmp(Name, GetSeqName(i)))\r
- return GetSeqId(i);\r
- }\r
- Quit("SeqVect::GetSeqIdFromName(%s): not found", Name);\r
- return 0;\r
- }\r
-\r
-Seq &SeqVect::GetSeqById(unsigned uId)\r
- {\r
- const unsigned uSeqCount = GetSeqCount();\r
- for (unsigned i = 0; i < uSeqCount; ++i)\r
- {\r
- if (GetSeqId(i) == uId)\r
- return GetSeq(i);\r
- }\r
- Quit("SeqVect::GetSeqIdByUd(%d): not found", uId);\r
- return (Seq &) *((Seq *) 0);\r
- }\r
-\r
-unsigned SeqVect::GetSeqLength(unsigned uSeqIndex) const\r
- {\r
- assert(uSeqIndex < size());\r
- const Seq *ptrSeq = at(uSeqIndex);\r
- return ptrSeq->Length();\r
- }\r
-\r
-Seq &SeqVect::GetSeq(unsigned uSeqIndex)\r
- {\r
- assert(uSeqIndex < size());\r
- return *at(uSeqIndex);\r
- }\r
-\r
-const Seq &SeqVect::GetSeq(unsigned uSeqIndex) const\r
- {\r
- assert(uSeqIndex < size());\r
- return *at(uSeqIndex);\r
- }\r
-\r
-void SeqVect::SetSeqId(unsigned uSeqIndex, unsigned uId)\r
- {\r
- assert(uSeqIndex < size());\r
- Seq *ptrSeq = at(uSeqIndex);\r
- return ptrSeq->SetId(uId);\r
- }\r
-\r
-ALPHA SeqVect::GuessAlpha() const\r
- {\r
-// If at least MIN_NUCLEO_PCT of the first CHAR_COUNT non-gap\r
-// letters belong to the nucleotide alphabet, guess nucleo.\r
-// Otherwise amino.\r
- const unsigned CHAR_COUNT = 100;\r
- const unsigned MIN_NUCLEO_PCT = 95;\r
-\r
- const unsigned uSeqCount = GetSeqCount();\r
- if (0 == uSeqCount)\r
- return ALPHA_Amino;\r
-\r
- unsigned uSeqIndex = 0;\r
- unsigned uPos = 0;\r
- unsigned uSeqLength = GetSeqLength(0);\r
- unsigned uDNACount = 0;\r
- unsigned uRNACount = 0;\r
- unsigned uTotal = 0;\r
- const Seq *ptrSeq = &GetSeq(0);\r
- for (;;)\r
- {\r
- while (uPos >= uSeqLength)\r
- {\r
- ++uSeqIndex;\r
- if (uSeqIndex >= uSeqCount)\r
- break;\r
- ptrSeq = &GetSeq(uSeqIndex);\r
- uSeqLength = ptrSeq->Length();\r
- uPos = 0;\r
- }\r
- if (uSeqIndex >= uSeqCount)\r
- break;\r
- char c = ptrSeq->at(uPos++);\r
- if (IsGapChar(c))\r
- continue;\r
- if (IsDNA(c))\r
- ++uDNACount;\r
- if (IsRNA(c))\r
- ++uRNACount;\r
- ++uTotal;\r
- if (uTotal >= CHAR_COUNT)\r
- break;\r
- }\r
- if (uTotal != 0 && ((uDNACount*100)/uTotal) >= MIN_NUCLEO_PCT)\r
- return ALPHA_DNA;\r
- if (uTotal != 0 && ((uRNACount*100)/uTotal) >= MIN_NUCLEO_PCT)\r
- return ALPHA_RNA;\r
- return ALPHA_Amino;\r
- }\r
-\r
-void SeqVect::FixAlpha()\r
- {\r
- ClearInvalidLetterWarning();\r
- unsigned uSeqCount = Length();\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- Seq *ptrSeq = at(uSeqIndex);\r
- ptrSeq->FixAlpha();\r
- }\r
- ReportInvalidLetters();\r
- }\r