4 const int MAX_SEQ_NAME = 63;
\r
19 // Ways to create an MSA
\r
20 void FromFile(TextFile &File);
\r
21 void FromFASTAFile(TextFile &File);
\r
22 void FromSeq(const Seq &s);
\r
24 void ToFile(TextFile &File) const;
\r
25 void ToFASTAFile(TextFile &File) const;
\r
26 void ToMSFFile(TextFile &File, const char *ptrComment = 0) const;
\r
27 void ToAlnFile(TextFile &File) const;
\r
28 void ToHTMLFile(TextFile &File) const;
\r
29 void ToPhySequentialFile(TextFile &File) const;
\r
30 void ToPhyInterleavedFile(TextFile &File) const;
\r
32 void SetSize(unsigned uSeqCount, unsigned uColCount);
\r
33 void SetSeqCount(unsigned uSeqCount);
\r
34 char GetChar(unsigned uSeqIndex, unsigned uIndex) const;
\r
35 unsigned GetLetter(unsigned uSeqIndex, unsigned uIndex) const;
\r
36 unsigned GetLetterEx(unsigned uSeqIndex, unsigned uIndex) const;
\r
37 const char *GetSeqName(unsigned uSeqIndex) const;
\r
38 unsigned GetSeqId(unsigned uSeqIndex) const;
\r
39 unsigned GetSeqIndex(unsigned uId) const;
\r
40 bool GetSeqIndex(unsigned uId, unsigned *ptruIndex) const;
\r
41 double GetOcc(unsigned uColIndex) const;
\r
42 void GetFractionalWeightedCounts(unsigned uColIndex, bool bNormalize,
\r
43 FCOUNT fcCounts[], FCOUNT *ptrfcGapStart, FCOUNT *ptrfcGapEnd,
\r
44 FCOUNT *fcGapExtend, FCOUNT *ptrfOcc,
\r
45 FCOUNT *fcLL, FCOUNT *fcLG, FCOUNT *fcGL, FCOUNT *fcGG) const;
\r
46 bool IsGap(unsigned uSeqIndex, unsigned uColIndex) const;
\r
47 bool IsWildcard(unsigned uSeqIndex, unsigned uColIndex) const;
\r
48 bool IsGapColumn(unsigned uColIndex) const;
\r
49 bool ColumnHasGap(unsigned uColIndex) const;
\r
50 bool IsGapSeq(unsigned uSeqIndex) const;
\r
52 void SetChar(unsigned uSeqIndex, unsigned uColIndex, char c);
\r
53 void SetSeqName(unsigned uSeqIndex, const char szName[]);
\r
54 void SetSeqId(unsigned uSeqIndex, unsigned uId);
\r
55 bool HasGap() const;
\r
56 bool IsLegalLetter(unsigned uLetter) const;
\r
57 void GetSeq(unsigned uSeqIndex, Seq &seq) const;
\r
58 void Copy(const MSA &msa);
\r
59 double GetCons(unsigned uColIndex) const;
\r
60 double GetAvgCons() const;
\r
61 double GetPctIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const;
\r
62 bool GetSeqIndex(const char *ptrSeqName, unsigned *ptruSeqIndex) const;
\r
63 void DeleteCol(unsigned uColIndex);
\r
64 void DeleteColumns(unsigned uColIndex, unsigned uColCount);
\r
65 void CopySeq(unsigned uToSeqIndex, const MSA &msaFrom, unsigned uFromSeqIndex);
\r
66 void DeleteSeq(unsigned uSeqIndex);
\r
67 // void DeleteEmptyCols(bool bProgress = false);
\r
68 bool IsEmptyCol(unsigned uColIndex) const;
\r
70 WEIGHT GetSeqWeight(unsigned uSeqIndex) const;
\r
71 WEIGHT GetTotalSeqWeight() const;
\r
72 void SetSeqWeight(unsigned uSeqIndex, WEIGHT w) const;
\r
73 void NormalizeWeights(WEIGHT wTotal) const;
\r
74 bool WeightsSet() const;
\r
76 unsigned GetGCGCheckSum(unsigned uSeqIndex) const;
\r
78 ALPHA GuessAlpha() const;
\r
81 unsigned UniqueResidueTypes(unsigned uColIndex) const;
\r
85 void GetNodeCounts(unsigned uAlignedColIndex, NodeCounts &Counts) const;
\r
86 void ValidateBreakMatrices() const;
\r
87 unsigned GetCharCount(unsigned uSeqIndex, unsigned uColIndex) const;
\r
88 const char *GetSeqBuffer(unsigned uSeqIndex) const;
\r
89 unsigned AlignedColIndexToColIndex(unsigned uAlignedColIndex) const;
\r
90 unsigned GetSeqLength(unsigned uSeqIndex) const;
\r
91 void GetPWID(unsigned uSeqIndex1, unsigned uSeqIndex2, double *ptrdPWID,
\r
92 unsigned *ptruPosCount) const;
\r
94 void GetPairMap(unsigned uSeqIndex1, unsigned uSeqIndex2, int iMap1[],
\r
98 void ListWeights() const;
\r
100 void GapInfoToDataBuffer(DataBuffer &Buffer) const;
\r
101 void GapInfoFromDataBuffer(const DataBuffer &Buffer);
\r
102 double GetPctGroupIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const;
\r
108 unsigned GetSeqCount() const
\r
110 return m_uSeqCount;
\r
112 unsigned GetColCount() const
\r
114 return m_uColCount;
\r
117 static bool SeqsEq(const MSA &a1, unsigned uSeqIndex1, const MSA &a2,
\r
118 unsigned uSeqIndex2);
\r
120 static void SetIdCount(unsigned uIdCount);
\r
123 friend void SetMSAWeightsMuscle(MSA &msa);
\r
124 friend void SetThreeWayWeightsMuscle(MSA &msa);
\r
125 void SetHenikoffWeightsPB() const;
\r
126 void SetHenikoffWeights() const;
\r
127 void SetGSCWeights() const;
\r
128 void SetUniformWeights() const;
\r
129 void SetClustalWWeights(const Tree &tree);
\r
132 void AppendSeq(char *ptrSeq, unsigned uSeqLength, char *ptrLabel);
\r
133 void ExpandCache(unsigned uSeqCount, unsigned uColCount);
\r
134 void CalcWeights() const;
\r
135 void GetNameFromFASTAAnnotationLine(const char szLine[],
\r
136 char szName[], unsigned uBytes);
\r
137 void CopyCol(unsigned uFromCol, unsigned uToCol);
\r
138 unsigned CalcBLOSUMWeights(ClusterTree &BlosumCluster) const;
\r
139 void SetBLOSUMSubtreeWeight(const ClusterNode *ptrNode, double dWeight) const;
\r
140 unsigned SetBLOSUMNodeWeight(const ClusterNode *ptrNode, double dMinDist) const;
\r
141 void SetSubtreeWeight2(const ClusterNode *ptrNode) const;
\r
142 void SetSubtreeGSCWeight(ClusterNode *ptrNode) const;
\r
144 void CalcHenikoffWeightsColPB(unsigned uColIndex) const;
\r
145 void CalcHenikoffWeightsCol(unsigned uColIndex) const;
\r
148 unsigned m_uSeqCount;
\r
149 unsigned m_uColCount;
\r
150 unsigned m_uCacheSeqLength;
\r
151 unsigned m_uCacheSeqCount;
\r
155 static unsigned m_uIdCount;
\r
157 unsigned *m_IdToSeqIndex;
\r
158 unsigned *m_SeqIndexToId;
\r
163 void SeqVectFromMSA(const MSA &msa, SeqVect &v);
\r
164 void DeleteGappedCols(MSA &msa);
\r
165 void MSAFromColRange(const MSA &msaIn, unsigned uFromColIndex, unsigned uColCount,
\r
167 void MSACat(const MSA &msa1, const MSA &msa2, MSA &msaCat);
\r
168 void MSAAppend(MSA &msa1, const MSA &msa2);
\r
169 void MSAFromSeqSubset(const MSA &msaIn, const unsigned uSeqIndexes[], unsigned uSeqCount,
\r
171 void AssertMSAEq(const MSA &msa1, const MSA &msa2);
\r
172 void AssertMSAEqIgnoreCaseAndGaps(const MSA &msa1, const MSA &msa2);
\r
173 void MSASubsetByIds(const MSA &msaIn, const unsigned Ids[], unsigned uIdCount,
\r
175 void SetMSAWeightsMuscle(MSA &msa);
\r
176 void SetClustalWWeightsMuscle(MSA &msa);
\r
177 void SetThreeWayWeightsMuscle(MSA &msa);
\r