3 #include "textfile.h"
\r
7 const size_t MAX_FASTA_LINE = 16000;
\r
9 void Seq::SetName(const char *ptrName)
\r
12 size_t n = strlen(ptrName) + 1;
\r
13 m_ptrName = new char[n];
\r
14 strcpy(m_ptrName, ptrName);
\r
17 void Seq::ToFASTAFile(TextFile &File) const
\r
19 File.PutFormat(">%s\n", m_ptrName);
\r
20 unsigned uColCount = Length();
\r
21 for (unsigned n = 0; n < uColCount; ++n)
\r
23 if (n > 0 && n%60 == 0)
\r
24 File.PutString("\n");
\r
25 File.PutChar(at(n));
\r
27 File.PutString("\n");
\r
30 // Return true on end-of-file
\r
31 bool Seq::FromFASTAFile(TextFile &File)
\r
35 char szLine[MAX_FASTA_LINE];
\r
36 bool bEof = File.GetLine(szLine, sizeof(szLine));
\r
39 if ('>' != szLine[0])
\r
40 Quit("Expecting '>' in FASTA file %s line %u",
\r
41 File.GetFileName(), File.GetLineNr());
\r
43 size_t n = strlen(szLine);
\r
45 Quit("Missing annotation following '>' in FASTA file %s line %u",
\r
46 File.GetFileName(), File.GetLineNr());
\r
48 m_ptrName = new char[n];
\r
49 strcpy(m_ptrName, szLine + 1);
\r
51 TEXTFILEPOS Pos = File.GetPos();
\r
54 bEof = File.GetLine(szLine, sizeof(szLine));
\r
59 Quit("Empty sequence in FASTA file %s line %u",
\r
60 File.GetFileName(), File.GetLineNr());
\r
65 if ('>' == szLine[0])
\r
68 Quit("Empty sequence in FASTA file %s line %u",
\r
69 File.GetFileName(), File.GetLineNr());
\r
70 // Rewind to beginning of this line, it's the start of the
\r
75 const char *ptrChar = szLine;
\r
76 while (char c = *ptrChar++)
\r
82 if (!IsResidueChar(c))
\r
86 char w = GetWildcardChar();
\r
87 Warning("Invalid residue '%c' in FASTA file %s line %d, replaced by '%c'",
\r
88 c, File.GetFileName(), File.GetLineNr(), w);
\r
92 Quit("Invalid byte hex %02x in FASTA file %s line %d",
\r
93 (unsigned char) c, File.GetFileName(), File.GetLineNr());
\r
98 Pos = File.GetPos();
\r
102 void Seq::ExtractUngapped(MSA &msa) const
\r
105 unsigned uColCount = Length();
\r
107 unsigned uUngappedPos = 0;
\r
108 for (unsigned n = 0; n < uColCount; ++n)
\r
112 msa.SetChar(0, uUngappedPos++, c);
\r
114 msa.SetSeqName(0, m_ptrName);
\r
117 void Seq::Copy(const Seq &rhs)
\r
120 const unsigned uLength = rhs.Length();
\r
121 for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)
\r
122 push_back(rhs.at(uColIndex));
\r
123 const char *ptrName = rhs.GetName();
\r
124 size_t n = strlen(ptrName) + 1;
\r
125 m_ptrName = new char[n];
\r
126 strcpy(m_ptrName, ptrName);
\r
127 SetId(rhs.GetId());
\r
130 void Seq::CopyReversed(const Seq &rhs)
\r
133 const unsigned uLength = rhs.Length();
\r
134 const unsigned uBase = rhs.Length() - 1;
\r
135 for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)
\r
136 push_back(rhs.at(uBase - uColIndex));
\r
137 const char *ptrName = rhs.GetName();
\r
138 size_t n = strlen(ptrName) + 1;
\r
139 m_ptrName = new char[n];
\r
140 strcpy(m_ptrName, ptrName);
\r
143 void Seq::StripGaps()
\r
145 for (CharVect::iterator p = begin(); p != end(); )
\r
155 void Seq::StripGapsAndWhitespace()
\r
157 for (CharVect::iterator p = begin(); p != end(); )
\r
160 if (isspace(c) || IsGapChar(c))
\r
167 void Seq::ToUpper()
\r
169 for (CharVect::iterator p = begin(); p != end(); ++p)
\r
177 unsigned Seq::GetLetter(unsigned uIndex) const
\r
179 assert(uIndex < Length());
\r
180 char c = operator[](uIndex);
\r
181 return CharToLetter(c);
\r
184 bool Seq::EqIgnoreCase(const Seq &s) const
\r
186 const unsigned n = Length();
\r
187 if (n != s.Length())
\r
189 for (unsigned i = 0; i < n; ++i)
\r
191 const char c1 = at(i);
\r
192 const char c2 = s.at(i);
\r
195 if (!IsGapChar(c2))
\r
200 if (toupper(c1) != toupper(c2))
\r
207 bool Seq::Eq(const Seq &s) const
\r
209 const unsigned n = Length();
\r
210 if (n != s.Length())
\r
212 for (unsigned i = 0; i < n; ++i)
\r
214 const char c1 = at(i);
\r
215 const char c2 = s.at(i);
\r
222 bool Seq::EqIgnoreCaseAndGaps(const Seq &s) const
\r
224 const unsigned uThisLength = Length();
\r
225 const unsigned uOtherLength = s.Length();
\r
227 unsigned uThisPos = 0;
\r
228 unsigned uOtherPos = 0;
\r
234 if (uThisPos == uThisLength && uOtherPos == uOtherLength)
\r
237 // Set cThis to next non-gap character in this string
\r
238 // or -1 if end-of-string.
\r
241 if (uThisPos == uThisLength)
\r
248 cThis = at(uThisPos);
\r
250 if (!IsGapChar(cThis))
\r
252 cThis = toupper(cThis);
\r
258 // Set cOther to next non-gap character in s
\r
259 // or -1 if end-of-string.
\r
262 if (uOtherPos == uOtherLength)
\r
269 cOther = s.at(uOtherPos);
\r
271 if (!IsGapChar(cOther))
\r
273 cOther = toupper(cOther);
\r
279 // Compare characters are corresponding ungapped position
\r
280 if (cThis != cOther)
\r
286 unsigned Seq::GetUngappedLength() const
\r
288 unsigned uUngappedLength = 0;
\r
289 for (CharVect::const_iterator p = begin(); p != end(); ++p)
\r
295 return uUngappedLength;
\r
298 void Seq::LogMe() const
\r
300 Log(">%s\n", m_ptrName);
\r
301 const unsigned n = Length();
\r
302 for (unsigned i = 0; i < n; ++i)
\r
307 void Seq::FromString(const char *pstrSeq, const char *pstrName)
\r
310 const unsigned uLength = (unsigned) strlen(pstrSeq);
\r
311 for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)
\r
312 push_back(pstrSeq[uColIndex]);
\r
313 size_t n = strlen(pstrName) + 1;
\r
314 m_ptrName = new char[n];
\r
315 strcpy(m_ptrName, pstrName);
\r
318 bool Seq::HasGap() const
\r
320 for (CharVect::const_iterator p = begin(); p != end(); ++p)
\r
329 void Seq::FixAlpha()
\r
331 for (CharVect::iterator p = begin(); p != end(); ++p)
\r
334 if (!IsResidueChar(c))
\r
336 char w = GetWildcardChar();
\r
337 // Warning("Invalid residue '%c', replaced by '%c'", c, w);
\r
338 InvalidLetterWarning(c, w);
\r