3 #include "textfile.h"
\r
7 const unsigned DEFAULT_SEQ_LENGTH = 500;
\r
9 unsigned MSA::m_uIdCount = 0;
\r
23 m_uCacheSeqCount = 0;
\r
24 m_uCacheSeqLength = 0;
\r
34 for (unsigned n = 0; n < m_uSeqCount; ++n)
\r
36 delete[] m_szSeqs[n];
\r
37 delete[] m_szNames[n];
\r
43 delete[] m_IdToSeqIndex;
\r
44 delete[] m_SeqIndexToId;
\r
57 void MSA::SetSize(unsigned uSeqCount, unsigned uColCount)
\r
61 m_uSeqCount = uSeqCount;
\r
62 m_uCacheSeqLength = uColCount;
\r
65 if (0 == uSeqCount && 0 == uColCount)
\r
68 m_szSeqs = new char *[uSeqCount];
\r
69 m_szNames = new char *[uSeqCount];
\r
70 m_Weights = new WEIGHT[uSeqCount];
\r
72 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
74 m_szSeqs[uSeqIndex] = new char[uColCount+1];
\r
75 m_szNames[uSeqIndex] = 0;
\r
77 m_Weights[uSeqIndex] = BTInsane;
\r
78 memset(m_szSeqs[uSeqIndex], '?', uColCount);
\r
80 m_szSeqs[uSeqIndex][uColCount] = 0;
\r
85 m_IdToSeqIndex = new unsigned[m_uIdCount];
\r
86 m_SeqIndexToId = new unsigned[m_uSeqCount];
\r
88 memset(m_IdToSeqIndex, 0xff, m_uIdCount*sizeof(unsigned));
\r
89 memset(m_SeqIndexToId, 0xff, m_uSeqCount*sizeof(unsigned));
\r
94 void MSA::LogMe() const
\r
96 if (0 == GetColCount())
\r
102 const unsigned uColsPerLine = 50;
\r
103 unsigned uLinesPerSeq = (GetColCount() - 1)/uColsPerLine + 1;
\r
104 for (unsigned n = 0; n < uLinesPerSeq; ++n)
\r
107 unsigned iStart = n*uColsPerLine;
\r
108 unsigned iEnd = GetColCount();
\r
109 if (iEnd - iStart + 1 > uColsPerLine)
\r
110 iEnd = iStart + uColsPerLine;
\r
112 for (i = iStart; i < iEnd; ++i)
\r
116 for (i = iStart; i + 9 < iEnd; i += 10)
\r
118 if (n == uLinesPerSeq - 1)
\r
119 Log(" %-10u", GetColCount());
\r
121 for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
\r
123 Log("%12.12s", m_szNames[uSeqIndex]);
\r
124 if (m_Weights[uSeqIndex] != BTInsane)
\r
125 Log(" (%5.3f)", m_Weights[uSeqIndex]);
\r
129 for (i = iStart; i < iEnd; ++i)
\r
130 Log("%c", GetChar(uSeqIndex, i));
\r
131 if (0 != m_SeqIndexToId)
\r
132 Log(" [%5u]", m_SeqIndexToId[uSeqIndex]);
\r
139 char MSA::GetChar(unsigned uSeqIndex, unsigned uIndex) const
\r
141 // TODO: Performance cost?
\r
142 if (uSeqIndex >= m_uSeqCount || uIndex >= m_uColCount)
\r
143 Quit("MSA::GetChar(%u/%u,%u/%u)",
\r
144 uSeqIndex, m_uSeqCount, uIndex, m_uColCount);
\r
146 char c = m_szSeqs[uSeqIndex][uIndex];
\r
147 // assert(IsLegalChar(c));
\r
151 unsigned MSA::GetLetter(unsigned uSeqIndex, unsigned uIndex) const
\r
153 // TODO: Performance cost?
\r
154 char c = GetChar(uSeqIndex, uIndex);
\r
155 unsigned uLetter = CharToLetter(c);
\r
159 if (uSeqIndex < m_uSeqCount && uIndex < m_uColCount)
\r
160 c = m_szSeqs[uSeqIndex][uIndex];
\r
161 Quit("MSA::GetLetter(%u/%u, %u/%u)='%c'/%u",
\r
162 uSeqIndex, m_uSeqCount, uIndex, m_uColCount, c, uLetter);
\r
167 unsigned MSA::GetLetterEx(unsigned uSeqIndex, unsigned uIndex) const
\r
169 // TODO: Performance cost?
\r
170 char c = GetChar(uSeqIndex, uIndex);
\r
171 unsigned uLetter = CharToLetterEx(c);
\r
175 void MSA::SetSeqName(unsigned uSeqIndex, const char szName[])
\r
177 if (uSeqIndex >= m_uSeqCount)
\r
178 Quit("MSA::SetSeqName(%u, %s), count=%u", uSeqIndex, m_uSeqCount);
\r
179 delete[] m_szNames[uSeqIndex];
\r
180 int n = (int) strlen(szName) + 1;
\r
181 m_szNames[uSeqIndex] = new char[n];
\r
182 memcpy(m_szNames[uSeqIndex], szName, n);
\r
185 const char *MSA::GetSeqName(unsigned uSeqIndex) const
\r
187 if (uSeqIndex >= m_uSeqCount)
\r
188 Quit("MSA::GetSeqName(%u), count=%u", uSeqIndex, m_uSeqCount);
\r
189 return m_szNames[uSeqIndex];
\r
192 bool MSA::IsGap(unsigned uSeqIndex, unsigned uIndex) const
\r
194 char c = GetChar(uSeqIndex, uIndex);
\r
195 return IsGapChar(c);
\r
198 bool MSA::IsWildcard(unsigned uSeqIndex, unsigned uIndex) const
\r
200 char c = GetChar(uSeqIndex, uIndex);
\r
201 return IsWildcardChar(c);
\r
204 void MSA::SetChar(unsigned uSeqIndex, unsigned uIndex, char c)
\r
206 if (uSeqIndex >= m_uSeqCount || uIndex > m_uCacheSeqLength)
\r
207 Quit("MSA::SetChar(%u,%u)", uSeqIndex, uIndex);
\r
209 if (uIndex == m_uCacheSeqLength)
\r
211 const unsigned uNewCacheSeqLength = m_uCacheSeqLength + DEFAULT_SEQ_LENGTH;
\r
212 for (unsigned n = 0; n < m_uSeqCount; ++n)
\r
214 char *ptrNewSeq = new char[uNewCacheSeqLength+1];
\r
215 memcpy(ptrNewSeq, m_szSeqs[n], m_uCacheSeqLength);
\r
216 memset(ptrNewSeq + m_uCacheSeqLength, '?', DEFAULT_SEQ_LENGTH);
\r
217 ptrNewSeq[uNewCacheSeqLength] = 0;
\r
218 delete[] m_szSeqs[n];
\r
219 m_szSeqs[n] = ptrNewSeq;
\r
222 m_uColCount = uIndex;
\r
223 m_uCacheSeqLength = uNewCacheSeqLength;
\r
226 if (uIndex >= m_uColCount)
\r
227 m_uColCount = uIndex + 1;
\r
228 m_szSeqs[uSeqIndex][uIndex] = c;
\r
231 void MSA::GetSeq(unsigned uSeqIndex, Seq &seq) const
\r
233 assert(uSeqIndex < m_uSeqCount);
\r
237 for (unsigned n = 0; n < m_uColCount; ++n)
\r
238 if (!IsGap(uSeqIndex, n))
\r
240 char c = GetChar(uSeqIndex, n);
\r
242 Quit("Invalid character '%c' in sequence", c);
\r
246 const char *ptrName = GetSeqName(uSeqIndex);
\r
247 seq.SetName(ptrName);
\r
250 bool MSA::HasGap() const
\r
252 for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
\r
253 for (unsigned n = 0; n < GetColCount(); ++n)
\r
254 if (IsGap(uSeqIndex, n))
\r
259 bool MSA::IsLegalLetter(unsigned uLetter) const
\r
261 return uLetter < 20;
\r
264 void MSA::SetSeqCount(unsigned uSeqCount)
\r
267 SetSize(uSeqCount, DEFAULT_SEQ_LENGTH);
\r
270 void MSA::CopyCol(unsigned uFromCol, unsigned uToCol)
\r
272 assert(uFromCol < GetColCount());
\r
273 assert(uToCol < GetColCount());
\r
274 if (uFromCol == uToCol)
\r
277 for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
\r
279 const char c = GetChar(uSeqIndex, uFromCol);
\r
280 SetChar(uSeqIndex, uToCol, c);
\r
284 void MSA::Copy(const MSA &msa)
\r
287 const unsigned uSeqCount = msa.GetSeqCount();
\r
288 const unsigned uColCount = msa.GetColCount();
\r
289 SetSize(uSeqCount, uColCount);
\r
291 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
293 SetSeqName(uSeqIndex, msa.GetSeqName(uSeqIndex));
\r
294 const unsigned uId = msa.GetSeqId(uSeqIndex);
\r
295 SetSeqId(uSeqIndex, uId);
\r
296 for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
\r
298 const char c = msa.GetChar(uSeqIndex, uColIndex);
\r
299 SetChar(uSeqIndex, uColIndex, c);
\r
304 bool MSA::IsGapColumn(unsigned uColIndex) const
\r
306 assert(GetSeqCount() > 0);
\r
307 for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
\r
308 if (!IsGap(uSeqIndex, uColIndex))
\r
313 bool MSA::GetSeqIndex(const char *ptrSeqName, unsigned *ptruSeqIndex) const
\r
315 for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
\r
316 if (0 == stricmp(ptrSeqName, GetSeqName(uSeqIndex)))
\r
318 *ptruSeqIndex = uSeqIndex;
\r
324 void MSA::DeleteCol(unsigned uColIndex)
\r
326 assert(uColIndex < m_uColCount);
\r
327 size_t n = m_uColCount - uColIndex;
\r
330 for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
\r
332 char *ptrSeq = m_szSeqs[uSeqIndex];
\r
333 memmove(ptrSeq + uColIndex, ptrSeq + uColIndex + 1, n);
\r
339 void MSA::DeleteColumns(unsigned uColIndex, unsigned uColCount)
\r
341 for (unsigned n = 0; n < uColCount; ++n)
\r
342 DeleteCol(uColIndex);
\r
345 void MSA::FromFile(TextFile &File)
\r
347 FromFASTAFile(File);
\r
350 // Weights sum to 1, WCounts sum to NIC
\r
351 WEIGHT MSA::GetSeqWeight(unsigned uSeqIndex) const
\r
353 assert(uSeqIndex < m_uSeqCount);
\r
354 WEIGHT w = m_Weights[uSeqIndex];
\r
356 Quit("Seq weight not set");
\r
360 void MSA::SetSeqWeight(unsigned uSeqIndex, WEIGHT w) const
\r
362 assert(uSeqIndex < m_uSeqCount);
\r
363 m_Weights[uSeqIndex] = w;
\r
366 void MSA::NormalizeWeights(WEIGHT wDesiredTotal) const
\r
369 for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
\r
370 wTotal += m_Weights[uSeqIndex];
\r
375 const WEIGHT f = wDesiredTotal/wTotal;
\r
376 for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
\r
377 m_Weights[uSeqIndex] *= f;
\r
380 void MSA::CalcWeights() const
\r
382 Quit("Calc weights not implemented");
\r
385 static void FmtChar(char c, unsigned uWidth)
\r
388 for (unsigned n = 0; n < uWidth - 1; ++n)
\r
392 static void FmtInt(unsigned u, unsigned uWidth)
\r
394 static char szStr[1024];
\r
395 assert(uWidth < sizeof(szStr));
\r
397 sprintf(szStr, "%u", u);
\r
399 strcpy(szStr, ".");
\r
401 unsigned n = (unsigned) strlen(szStr);
\r
403 for (unsigned i = 0; i < uWidth - n; ++i)
\r
407 static void FmtInt0(unsigned u, unsigned uWidth)
\r
409 static char szStr[1024];
\r
410 assert(uWidth < sizeof(szStr));
\r
411 sprintf(szStr, "%u", u);
\r
413 unsigned n = (unsigned) strlen(szStr);
\r
415 for (unsigned i = 0; i < uWidth - n; ++i)
\r
419 static void FmtPad(unsigned n)
\r
421 for (unsigned i = 0; i < n; ++i)
\r
425 void MSA::FromSeq(const Seq &s)
\r
427 unsigned uSeqLength = s.Length();
\r
428 SetSize(1, uSeqLength);
\r
429 SetSeqName(0, s.GetName());
\r
430 if (0 != m_SeqIndexToId)
\r
431 SetSeqId(0, s.GetId());
\r
432 for (unsigned n = 0; n < uSeqLength; ++n)
\r
433 SetChar(0, n, s[n]);
\r
436 unsigned MSA::GetCharCount(unsigned uSeqIndex, unsigned uColIndex) const
\r
438 assert(uSeqIndex < GetSeqCount());
\r
439 assert(uColIndex < GetColCount());
\r
442 for (unsigned n = 0; n <= uColIndex; ++n)
\r
443 if (!IsGap(uSeqIndex, n))
\r
448 void MSA::CopySeq(unsigned uToSeqIndex, const MSA &msaFrom, unsigned uFromSeqIndex)
\r
450 assert(uToSeqIndex < m_uSeqCount);
\r
451 const unsigned uColCount = msaFrom.GetColCount();
\r
452 assert(m_uColCount == uColCount ||
\r
453 (0 == m_uColCount && uColCount <= m_uCacheSeqLength));
\r
455 memcpy(m_szSeqs[uToSeqIndex], msaFrom.GetSeqBuffer(uFromSeqIndex), uColCount);
\r
456 SetSeqName(uToSeqIndex, msaFrom.GetSeqName(uFromSeqIndex));
\r
457 if (0 == m_uColCount)
\r
458 m_uColCount = uColCount;
\r
461 const char *MSA::GetSeqBuffer(unsigned uSeqIndex) const
\r
463 assert(uSeqIndex < m_uSeqCount);
\r
464 return m_szSeqs[uSeqIndex];
\r
467 void MSA::DeleteSeq(unsigned uSeqIndex)
\r
469 assert(uSeqIndex < m_uSeqCount);
\r
471 delete m_szSeqs[uSeqIndex];
\r
472 delete m_szNames[uSeqIndex];
\r
474 const unsigned uBytesToMove = (m_uSeqCount - uSeqIndex)*sizeof(char *);
\r
475 if (uBytesToMove > 0)
\r
477 memmove(m_szSeqs + uSeqIndex, m_szSeqs + uSeqIndex + 1, uBytesToMove);
\r
478 memmove(m_szNames + uSeqIndex, m_szNames + uSeqIndex + 1, uBytesToMove);
\r
483 delete[] m_Weights;
\r
487 bool MSA::IsEmptyCol(unsigned uColIndex) const
\r
489 const unsigned uSeqCount = GetSeqCount();
\r
490 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
491 if (!IsGap(uSeqIndex, uColIndex))
\r
496 //void MSA::DeleteEmptyCols(bool bProgress)
\r
498 // unsigned uColCount = GetColCount();
\r
499 // for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
\r
501 // if (IsEmptyCol(uColIndex))
\r
505 // Log("Deleting col %u of %u\n", uColIndex, uColCount);
\r
506 // printf("Deleting col %u of %u\n", uColIndex, uColCount);
\r
508 // DeleteCol(uColIndex);
\r
514 unsigned MSA::AlignedColIndexToColIndex(unsigned uAlignedColIndex) const
\r
516 Quit("MSA::AlignedColIndexToColIndex not implemented");
\r
520 WEIGHT MSA::GetTotalSeqWeight() const
\r
523 const unsigned uSeqCount = GetSeqCount();
\r
524 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
525 wTotal += m_Weights[uSeqIndex];
\r
529 bool MSA::SeqsEq(const MSA &a1, unsigned uSeqIndex1, const MSA &a2,
\r
530 unsigned uSeqIndex2)
\r
535 a1.GetSeq(uSeqIndex1, s1);
\r
536 a2.GetSeq(uSeqIndex2, s2);
\r
541 return s1.EqIgnoreCase(s2);
\r
544 unsigned MSA::GetSeqLength(unsigned uSeqIndex) const
\r
546 assert(uSeqIndex < GetSeqCount());
\r
548 const unsigned uColCount = GetColCount();
\r
549 unsigned uLength = 0;
\r
550 for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
\r
551 if (!IsGap(uSeqIndex, uColIndex))
\r
556 void MSA::GetPWID(unsigned uSeqIndex1, unsigned uSeqIndex2, double *ptrPWID,
\r
557 unsigned *ptruPosCount) const
\r
559 assert(uSeqIndex1 < GetSeqCount());
\r
560 assert(uSeqIndex2 < GetSeqCount());
\r
562 unsigned uSameCount = 0;
\r
563 unsigned uPosCount = 0;
\r
564 const unsigned uColCount = GetColCount();
\r
565 for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
\r
567 char c1 = GetChar(uSeqIndex1, uColIndex);
\r
570 char c2 = GetChar(uSeqIndex2, uColIndex);
\r
577 *ptruPosCount = uPosCount;
\r
579 *ptrPWID = 100.0 * (double) uSameCount / (double) uPosCount;
\r
584 void MSA::UnWeight()
\r
586 for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
\r
587 m_Weights[uSeqIndex] = BTInsane;
\r
590 unsigned MSA::UniqueResidueTypes(unsigned uColIndex) const
\r
592 assert(uColIndex < GetColCount());
\r
594 unsigned Counts[MAX_ALPHA];
\r
595 memset(Counts, 0, sizeof(Counts));
\r
596 const unsigned uSeqCount = GetSeqCount();
\r
597 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
599 if (IsGap(uSeqIndex, uColIndex) || IsWildcard(uSeqIndex, uColIndex))
\r
601 const unsigned uLetter = GetLetter(uSeqIndex, uColIndex);
\r
602 ++(Counts[uLetter]);
\r
604 unsigned uUniqueCount = 0;
\r
605 for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
\r
606 if (Counts[uLetter] > 0)
\r
608 return uUniqueCount;
\r
611 double MSA::GetOcc(unsigned uColIndex) const
\r
613 unsigned uGapCount = 0;
\r
614 for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
\r
615 if (IsGap(uSeqIndex, uColIndex))
\r
617 unsigned uSeqCount = GetSeqCount();
\r
618 return (double) (uSeqCount - uGapCount) / (double) uSeqCount;
\r
621 void MSA::ToFile(TextFile &File) const
\r
630 ToPhySequentialFile(File);
\r
632 ToPhyInterleavedFile(File);
\r
635 if (0 != g_pstrScoreFileName)
\r
636 WriteScoreFile(*this);
\r
639 bool MSA::ColumnHasGap(unsigned uColIndex) const
\r
641 const unsigned uSeqCount = GetSeqCount();
\r
642 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
643 if (IsGap(uSeqIndex, uColIndex))
\r
648 void MSA::SetIdCount(unsigned uIdCount)
\r
650 //if (m_uIdCount != 0)
\r
651 // Quit("MSA::SetIdCount: may only be called once");
\r
653 if (m_uIdCount > 0)
\r
655 if (uIdCount > m_uIdCount)
\r
656 Quit("MSA::SetIdCount: cannot increase count");
\r
659 m_uIdCount = uIdCount;
\r
662 void MSA::SetSeqId(unsigned uSeqIndex, unsigned uId)
\r
664 assert(uSeqIndex < m_uSeqCount);
\r
665 assert(uId < m_uIdCount);
\r
666 if (0 == m_SeqIndexToId)
\r
668 if (0 == m_uIdCount)
\r
669 Quit("MSA::SetSeqId, SetIdCount has not been called");
\r
670 m_IdToSeqIndex = new unsigned[m_uIdCount];
\r
671 m_SeqIndexToId = new unsigned[m_uSeqCount];
\r
673 memset(m_IdToSeqIndex, 0xff, m_uIdCount*sizeof(unsigned));
\r
674 memset(m_SeqIndexToId, 0xff, m_uSeqCount*sizeof(unsigned));
\r
676 m_SeqIndexToId[uSeqIndex] = uId;
\r
677 m_IdToSeqIndex[uId] = uSeqIndex;
\r
680 unsigned MSA::GetSeqIndex(unsigned uId) const
\r
682 assert(uId < m_uIdCount);
\r
683 assert(0 != m_IdToSeqIndex);
\r
684 unsigned uSeqIndex = m_IdToSeqIndex[uId];
\r
685 assert(uSeqIndex < m_uSeqCount);
\r
689 bool MSA::GetSeqIndex(unsigned uId, unsigned *ptruIndex) const
\r
691 for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
\r
693 if (uId == m_SeqIndexToId[uSeqIndex])
\r
695 *ptruIndex = uSeqIndex;
\r
702 unsigned MSA::GetSeqId(unsigned uSeqIndex) const
\r
704 assert(uSeqIndex < m_uSeqCount);
\r
705 unsigned uId = m_SeqIndexToId[uSeqIndex];
\r
706 assert(uId < m_uIdCount);
\r
710 bool MSA::WeightsSet() const
\r
712 return BTInsane != m_Weights[0];
\r
715 void MSASubsetByIds(const MSA &msaIn, const unsigned Ids[], unsigned uIdCount,
\r
718 const unsigned uColCount = msaIn.GetColCount();
\r
719 msaOut.SetSize(uIdCount, uColCount);
\r
720 for (unsigned uSeqIndexOut = 0; uSeqIndexOut < uIdCount; ++uSeqIndexOut)
\r
722 const unsigned uId = Ids[uSeqIndexOut];
\r
724 const unsigned uSeqIndexIn = msaIn.GetSeqIndex(uId);
\r
725 const char *ptrName = msaIn.GetSeqName(uSeqIndexIn);
\r
727 msaOut.SetSeqId(uSeqIndexOut, uId);
\r
728 msaOut.SetSeqName(uSeqIndexOut, ptrName);
\r
730 for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
\r
732 const char c = msaIn.GetChar(uSeqIndexIn, uColIndex);
\r
733 msaOut.SetChar(uSeqIndexOut, uColIndex, c);
\r
738 // Caller must allocate ptrSeq and ptrLabel as new char[n].
\r
739 void MSA::AppendSeq(char *ptrSeq, unsigned uSeqLength, char *ptrLabel)
\r
741 if (m_uSeqCount > m_uCacheSeqCount)
\r
742 Quit("Internal error MSA::AppendSeq");
\r
743 if (m_uSeqCount == m_uCacheSeqCount)
\r
744 ExpandCache(m_uSeqCount + 4, uSeqLength);
\r
745 m_szSeqs[m_uSeqCount] = ptrSeq;
\r
746 m_szNames[m_uSeqCount] = ptrLabel;
\r
750 void MSA::ExpandCache(unsigned uSeqCount, unsigned uColCount)
\r
752 if (m_IdToSeqIndex != 0 || m_SeqIndexToId != 0 || uSeqCount < m_uSeqCount)
\r
753 Quit("Internal error MSA::ExpandCache");
\r
755 if (m_uSeqCount > 0 && uColCount != m_uColCount)
\r
756 Quit("Internal error MSA::ExpandCache, ColCount changed");
\r
758 char **NewSeqs = new char *[uSeqCount];
\r
759 char **NewNames = new char *[uSeqCount];
\r
760 WEIGHT *NewWeights = new WEIGHT[uSeqCount];
\r
762 for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
\r
764 NewSeqs[uSeqIndex] = m_szSeqs[uSeqIndex];
\r
765 NewNames[uSeqIndex] = m_szNames[uSeqIndex];
\r
766 NewWeights[uSeqIndex] = m_Weights[uSeqIndex];
\r
769 for (unsigned uSeqIndex = m_uSeqCount; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
771 char *Seq = new char[uColCount];
\r
772 NewSeqs[uSeqIndex] = Seq;
\r
774 memset(Seq, '?', uColCount);
\r
779 delete[] m_szNames;
\r
780 delete[] m_Weights;
\r
782 m_szSeqs = NewSeqs;
\r
783 m_szNames = NewNames;
\r
784 m_Weights = NewWeights;
\r
786 m_uCacheSeqCount = uSeqCount;
\r
787 m_uCacheSeqLength = uColCount;
\r
788 m_uColCount = uColCount;
\r
791 void MSA::FixAlpha()
\r
793 ClearInvalidLetterWarning();
\r
794 for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
\r
796 for (unsigned uColIndex = 0; uColIndex < m_uColCount; ++uColIndex)
\r
798 char c = GetChar(uSeqIndex, uColIndex);
\r
799 if (!IsResidueChar(c) && !IsGapChar(c))
\r
801 char w = GetWildcardChar();
\r
802 // Warning("Invalid letter '%c', replaced by '%c'", c, w);
\r
803 InvalidLetterWarning(c, w);
\r
804 SetChar(uSeqIndex, uColIndex, w);
\r
808 ReportInvalidLetters();
\r
811 ALPHA MSA::GuessAlpha() const
\r
813 // If at least MIN_NUCLEO_PCT of the first CHAR_COUNT non-gap
\r
814 // letters belong to the nucleotide alphabet, guess nucleo.
\r
815 // Otherwise amino.
\r
816 const unsigned CHAR_COUNT = 100;
\r
817 const unsigned MIN_NUCLEO_PCT = 95;
\r
819 const unsigned uSeqCount = GetSeqCount();
\r
820 const unsigned uColCount = GetColCount();
\r
821 if (0 == uSeqCount)
\r
822 return ALPHA_Amino;
\r
824 unsigned uDNACount = 0;
\r
825 unsigned uRNACount = 0;
\r
826 unsigned uTotal = 0;
\r
830 unsigned uSeqIndex = i/uColCount;
\r
831 if (uSeqIndex >= uSeqCount)
\r
833 unsigned uColIndex = i%uColCount;
\r
835 char c = GetChar(uSeqIndex, uColIndex);
\r
843 if (uTotal >= CHAR_COUNT)
\r
846 if (uTotal != 0 && ((uRNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
\r
848 if (uTotal != 0 && ((uDNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
\r
850 return ALPHA_Amino;
\r