--- /dev/null
+#ifndef MSA_h\r
+#define MSA_h\r
+\r
+const int MAX_SEQ_NAME = 63;\r
+struct PathEdge;\r
+class TextFile;\r
+class Seq;\r
+class ClusterNode;\r
+class NodeCounts;\r
+class DataBuffer;\r
+\r
+class MSA\r
+ {\r
+public:\r
+ MSA();\r
+ virtual ~MSA();\r
+\r
+public:\r
+// Ways to create an MSA\r
+ void FromFile(TextFile &File);\r
+ void FromFASTAFile(TextFile &File);\r
+ void FromSeq(const Seq &s);\r
+\r
+ void ToFile(TextFile &File) const;\r
+ void ToFASTAFile(TextFile &File) const;\r
+ void ToMSFFile(TextFile &File, const char *ptrComment = 0) const;\r
+ void ToAlnFile(TextFile &File) const;\r
+ void ToHTMLFile(TextFile &File) const;\r
+ void ToPhySequentialFile(TextFile &File) const;\r
+ void ToPhyInterleavedFile(TextFile &File) const;\r
+\r
+ void SetSize(unsigned uSeqCount, unsigned uColCount);\r
+ void SetSeqCount(unsigned uSeqCount);\r
+ char GetChar(unsigned uSeqIndex, unsigned uIndex) const;\r
+ unsigned GetLetter(unsigned uSeqIndex, unsigned uIndex) const;\r
+ unsigned GetLetterEx(unsigned uSeqIndex, unsigned uIndex) const;\r
+ const char *GetSeqName(unsigned uSeqIndex) const;\r
+ unsigned GetSeqId(unsigned uSeqIndex) const;\r
+ unsigned GetSeqIndex(unsigned uId) const;\r
+ bool GetSeqIndex(unsigned uId, unsigned *ptruIndex) const;\r
+ double GetOcc(unsigned uColIndex) const;\r
+ void GetFractionalWeightedCounts(unsigned uColIndex, bool bNormalize,\r
+ FCOUNT fcCounts[], FCOUNT *ptrfcGapStart, FCOUNT *ptrfcGapEnd,\r
+ FCOUNT *fcGapExtend, FCOUNT *ptrfOcc,\r
+ FCOUNT *fcLL, FCOUNT *fcLG, FCOUNT *fcGL, FCOUNT *fcGG) const;\r
+ bool IsGap(unsigned uSeqIndex, unsigned uColIndex) const;\r
+ bool IsWildcard(unsigned uSeqIndex, unsigned uColIndex) const;\r
+ bool IsGapColumn(unsigned uColIndex) const;\r
+ bool ColumnHasGap(unsigned uColIndex) const;\r
+ bool IsGapSeq(unsigned uSeqIndex) const;\r
+\r
+ void SetChar(unsigned uSeqIndex, unsigned uColIndex, char c);\r
+ void SetSeqName(unsigned uSeqIndex, const char szName[]);\r
+ void SetSeqId(unsigned uSeqIndex, unsigned uId);\r
+ bool HasGap() const;\r
+ bool IsLegalLetter(unsigned uLetter) const;\r
+ void GetSeq(unsigned uSeqIndex, Seq &seq) const;\r
+ void Copy(const MSA &msa);\r
+ double GetCons(unsigned uColIndex) const;\r
+ double GetAvgCons() const;\r
+ double GetPctIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const;\r
+ bool GetSeqIndex(const char *ptrSeqName, unsigned *ptruSeqIndex) const;\r
+ void DeleteCol(unsigned uColIndex);\r
+ void DeleteColumns(unsigned uColIndex, unsigned uColCount);\r
+ void CopySeq(unsigned uToSeqIndex, const MSA &msaFrom, unsigned uFromSeqIndex);\r
+ void DeleteSeq(unsigned uSeqIndex);\r
+// void DeleteEmptyCols(bool bProgress = false);\r
+ bool IsEmptyCol(unsigned uColIndex) const;\r
+\r
+ WEIGHT GetSeqWeight(unsigned uSeqIndex) const;\r
+ WEIGHT GetTotalSeqWeight() const;\r
+ void SetSeqWeight(unsigned uSeqIndex, WEIGHT w) const;\r
+ void NormalizeWeights(WEIGHT wTotal) const;\r
+ bool WeightsSet() const;\r
+\r
+ unsigned GetGCGCheckSum(unsigned uSeqIndex) const;\r
+\r
+ ALPHA GuessAlpha() const;\r
+ void FixAlpha();\r
+\r
+ unsigned UniqueResidueTypes(unsigned uColIndex) const;\r
+\r
+ void UnWeight();\r
+\r
+ void GetNodeCounts(unsigned uAlignedColIndex, NodeCounts &Counts) const;\r
+ void ValidateBreakMatrices() const;\r
+ unsigned GetCharCount(unsigned uSeqIndex, unsigned uColIndex) const;\r
+ const char *GetSeqBuffer(unsigned uSeqIndex) const;\r
+ unsigned AlignedColIndexToColIndex(unsigned uAlignedColIndex) const;\r
+ unsigned GetSeqLength(unsigned uSeqIndex) const;\r
+ void GetPWID(unsigned uSeqIndex1, unsigned uSeqIndex2, double *ptrdPWID,\r
+ unsigned *ptruPosCount) const;\r
+\r
+ void GetPairMap(unsigned uSeqIndex1, unsigned uSeqIndex2, int iMap1[],\r
+ int iMap2[]) const;\r
+\r
+ void LogMe() const;\r
+ void ListWeights() const;\r
+\r
+ void GapInfoToDataBuffer(DataBuffer &Buffer) const;\r
+ void GapInfoFromDataBuffer(const DataBuffer &Buffer);\r
+ double GetPctGroupIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const;\r
+\r
+ void Clear()\r
+ {\r
+ Free();\r
+ }\r
+ unsigned GetSeqCount() const\r
+ {\r
+ return m_uSeqCount;\r
+ }\r
+ unsigned GetColCount() const\r
+ {\r
+ return m_uColCount;\r
+ }\r
+\r
+ static bool SeqsEq(const MSA &a1, unsigned uSeqIndex1, const MSA &a2,\r
+ unsigned uSeqIndex2);\r
+\r
+ static void SetIdCount(unsigned uIdCount);\r
+\r
+private:\r
+ friend void SetMSAWeightsMuscle(MSA &msa);\r
+ friend void SetThreeWayWeightsMuscle(MSA &msa);\r
+ void SetHenikoffWeightsPB() const;\r
+ void SetHenikoffWeights() const;\r
+ void SetGSCWeights() const;\r
+ void SetUniformWeights() const;\r
+ void SetClustalWWeights(const Tree &tree);\r
+\r
+ void Free();\r
+ void AppendSeq(char *ptrSeq, unsigned uSeqLength, char *ptrLabel);\r
+ void ExpandCache(unsigned uSeqCount, unsigned uColCount);\r
+ void CalcWeights() const;\r
+ void GetNameFromFASTAAnnotationLine(const char szLine[],\r
+ char szName[], unsigned uBytes);\r
+ void CopyCol(unsigned uFromCol, unsigned uToCol);\r
+ unsigned CalcBLOSUMWeights(ClusterTree &BlosumCluster) const;\r
+ void SetBLOSUMSubtreeWeight(const ClusterNode *ptrNode, double dWeight) const;\r
+ unsigned SetBLOSUMNodeWeight(const ClusterNode *ptrNode, double dMinDist) const;\r
+ void SetSubtreeWeight2(const ClusterNode *ptrNode) const;\r
+ void SetSubtreeGSCWeight(ClusterNode *ptrNode) const;\r
+\r
+ void CalcHenikoffWeightsColPB(unsigned uColIndex) const;\r
+ void CalcHenikoffWeightsCol(unsigned uColIndex) const;\r
+\r
+private:\r
+ unsigned m_uSeqCount;\r
+ unsigned m_uColCount;\r
+ unsigned m_uCacheSeqLength;\r
+ unsigned m_uCacheSeqCount;\r
+ char **m_szSeqs;\r
+ char **m_szNames;\r
+\r
+ static unsigned m_uIdCount;\r
+\r
+ unsigned *m_IdToSeqIndex;\r
+ unsigned *m_SeqIndexToId;\r
+\r
+ WEIGHT *m_Weights;\r
+ };\r
+\r
+void SeqVectFromMSA(const MSA &msa, SeqVect &v);\r
+void DeleteGappedCols(MSA &msa);\r
+void MSAFromColRange(const MSA &msaIn, unsigned uFromColIndex, unsigned uColCount,\r
+ MSA &msaOut);\r
+void MSACat(const MSA &msa1, const MSA &msa2, MSA &msaCat);\r
+void MSAAppend(MSA &msa1, const MSA &msa2);\r
+void MSAFromSeqSubset(const MSA &msaIn, const unsigned uSeqIndexes[], unsigned uSeqCount,\r
+ MSA &msaOut);\r
+void AssertMSAEq(const MSA &msa1, const MSA &msa2);\r
+void AssertMSAEqIgnoreCaseAndGaps(const MSA &msa1, const MSA &msa2);\r
+void MSASubsetByIds(const MSA &msaIn, const unsigned Ids[], unsigned uIdCount,\r
+ MSA &msaOut);\r
+void SetMSAWeightsMuscle(MSA &msa);\r
+void SetClustalWWeightsMuscle(MSA &msa);\r
+void SetThreeWayWeightsMuscle(MSA &msa);\r
+\r
+#endif // MSA_h\r