+++ /dev/null
-#ifndef MSA_h\r
-#define MSA_h\r
-\r
-const int MAX_SEQ_NAME = 63;\r
-struct PathEdge;\r
-class TextFile;\r
-class Seq;\r
-class ClusterNode;\r
-class NodeCounts;\r
-class DataBuffer;\r
-\r
-class MSA\r
- {\r
-public:\r
- MSA();\r
- virtual ~MSA();\r
-\r
-public:\r
-// Ways to create an MSA\r
- void FromFile(TextFile &File);\r
- void FromFASTAFile(TextFile &File);\r
- void FromSeq(const Seq &s);\r
-\r
- void ToFile(TextFile &File) const;\r
- void ToFASTAFile(TextFile &File) const;\r
- void ToMSFFile(TextFile &File, const char *ptrComment = 0) const;\r
- void ToAlnFile(TextFile &File) const;\r
- void ToHTMLFile(TextFile &File) const;\r
- void ToPhySequentialFile(TextFile &File) const;\r
- void ToPhyInterleavedFile(TextFile &File) const;\r
-\r
- void SetSize(unsigned uSeqCount, unsigned uColCount);\r
- void SetSeqCount(unsigned uSeqCount);\r
- char GetChar(unsigned uSeqIndex, unsigned uIndex) const;\r
- unsigned GetLetter(unsigned uSeqIndex, unsigned uIndex) const;\r
- unsigned GetLetterEx(unsigned uSeqIndex, unsigned uIndex) const;\r
- const char *GetSeqName(unsigned uSeqIndex) const;\r
- unsigned GetSeqId(unsigned uSeqIndex) const;\r
- unsigned GetSeqIndex(unsigned uId) const;\r
- bool GetSeqIndex(unsigned uId, unsigned *ptruIndex) const;\r
- double GetOcc(unsigned uColIndex) const;\r
- void GetFractionalWeightedCounts(unsigned uColIndex, bool bNormalize,\r
- FCOUNT fcCounts[], FCOUNT *ptrfcGapStart, FCOUNT *ptrfcGapEnd,\r
- FCOUNT *fcGapExtend, FCOUNT *ptrfOcc,\r
- FCOUNT *fcLL, FCOUNT *fcLG, FCOUNT *fcGL, FCOUNT *fcGG) const;\r
- bool IsGap(unsigned uSeqIndex, unsigned uColIndex) const;\r
- bool IsWildcard(unsigned uSeqIndex, unsigned uColIndex) const;\r
- bool IsGapColumn(unsigned uColIndex) const;\r
- bool ColumnHasGap(unsigned uColIndex) const;\r
- bool IsGapSeq(unsigned uSeqIndex) const;\r
-\r
- void SetChar(unsigned uSeqIndex, unsigned uColIndex, char c);\r
- void SetSeqName(unsigned uSeqIndex, const char szName[]);\r
- void SetSeqId(unsigned uSeqIndex, unsigned uId);\r
- bool HasGap() const;\r
- bool IsLegalLetter(unsigned uLetter) const;\r
- void GetSeq(unsigned uSeqIndex, Seq &seq) const;\r
- void Copy(const MSA &msa);\r
- double GetCons(unsigned uColIndex) const;\r
- double GetAvgCons() const;\r
- double GetPctIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const;\r
- bool GetSeqIndex(const char *ptrSeqName, unsigned *ptruSeqIndex) const;\r
- void DeleteCol(unsigned uColIndex);\r
- void DeleteColumns(unsigned uColIndex, unsigned uColCount);\r
- void CopySeq(unsigned uToSeqIndex, const MSA &msaFrom, unsigned uFromSeqIndex);\r
- void DeleteSeq(unsigned uSeqIndex);\r
-// void DeleteEmptyCols(bool bProgress = false);\r
- bool IsEmptyCol(unsigned uColIndex) const;\r
-\r
- WEIGHT GetSeqWeight(unsigned uSeqIndex) const;\r
- WEIGHT GetTotalSeqWeight() const;\r
- void SetSeqWeight(unsigned uSeqIndex, WEIGHT w) const;\r
- void NormalizeWeights(WEIGHT wTotal) const;\r
- bool WeightsSet() const;\r
-\r
- unsigned GetGCGCheckSum(unsigned uSeqIndex) const;\r
-\r
- ALPHA GuessAlpha() const;\r
- void FixAlpha();\r
-\r
- unsigned UniqueResidueTypes(unsigned uColIndex) const;\r
-\r
- void UnWeight();\r
-\r
- void GetNodeCounts(unsigned uAlignedColIndex, NodeCounts &Counts) const;\r
- void ValidateBreakMatrices() const;\r
- unsigned GetCharCount(unsigned uSeqIndex, unsigned uColIndex) const;\r
- const char *GetSeqBuffer(unsigned uSeqIndex) const;\r
- unsigned AlignedColIndexToColIndex(unsigned uAlignedColIndex) const;\r
- unsigned GetSeqLength(unsigned uSeqIndex) const;\r
- void GetPWID(unsigned uSeqIndex1, unsigned uSeqIndex2, double *ptrdPWID,\r
- unsigned *ptruPosCount) const;\r
-\r
- void GetPairMap(unsigned uSeqIndex1, unsigned uSeqIndex2, int iMap1[],\r
- int iMap2[]) const;\r
-\r
- void LogMe() const;\r
- void ListWeights() const;\r
-\r
- void GapInfoToDataBuffer(DataBuffer &Buffer) const;\r
- void GapInfoFromDataBuffer(const DataBuffer &Buffer);\r
- double GetPctGroupIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const;\r
-\r
- void Clear()\r
- {\r
- Free();\r
- }\r
- unsigned GetSeqCount() const\r
- {\r
- return m_uSeqCount;\r
- }\r
- unsigned GetColCount() const\r
- {\r
- return m_uColCount;\r
- }\r
-\r
- static bool SeqsEq(const MSA &a1, unsigned uSeqIndex1, const MSA &a2,\r
- unsigned uSeqIndex2);\r
-\r
- static void SetIdCount(unsigned uIdCount);\r
-\r
-private:\r
- friend void SetMSAWeightsMuscle(MSA &msa);\r
- friend void SetThreeWayWeightsMuscle(MSA &msa);\r
- void SetHenikoffWeightsPB() const;\r
- void SetHenikoffWeights() const;\r
- void SetGSCWeights() const;\r
- void SetUniformWeights() const;\r
- void SetClustalWWeights(const Tree &tree);\r
-\r
- void Free();\r
- void AppendSeq(char *ptrSeq, unsigned uSeqLength, char *ptrLabel);\r
- void ExpandCache(unsigned uSeqCount, unsigned uColCount);\r
- void CalcWeights() const;\r
- void GetNameFromFASTAAnnotationLine(const char szLine[],\r
- char szName[], unsigned uBytes);\r
- void CopyCol(unsigned uFromCol, unsigned uToCol);\r
- unsigned CalcBLOSUMWeights(ClusterTree &BlosumCluster) const;\r
- void SetBLOSUMSubtreeWeight(const ClusterNode *ptrNode, double dWeight) const;\r
- unsigned SetBLOSUMNodeWeight(const ClusterNode *ptrNode, double dMinDist) const;\r
- void SetSubtreeWeight2(const ClusterNode *ptrNode) const;\r
- void SetSubtreeGSCWeight(ClusterNode *ptrNode) const;\r
-\r
- void CalcHenikoffWeightsColPB(unsigned uColIndex) const;\r
- void CalcHenikoffWeightsCol(unsigned uColIndex) const;\r
-\r
-private:\r
- unsigned m_uSeqCount;\r
- unsigned m_uColCount;\r
- unsigned m_uCacheSeqLength;\r
- unsigned m_uCacheSeqCount;\r
- char **m_szSeqs;\r
- char **m_szNames;\r
-\r
- static unsigned m_uIdCount;\r
-\r
- unsigned *m_IdToSeqIndex;\r
- unsigned *m_SeqIndexToId;\r
-\r
- WEIGHT *m_Weights;\r
- };\r
-\r
-void SeqVectFromMSA(const MSA &msa, SeqVect &v);\r
-void DeleteGappedCols(MSA &msa);\r
-void MSAFromColRange(const MSA &msaIn, unsigned uFromColIndex, unsigned uColCount,\r
- MSA &msaOut);\r
-void MSACat(const MSA &msa1, const MSA &msa2, MSA &msaCat);\r
-void MSAAppend(MSA &msa1, const MSA &msa2);\r
-void MSAFromSeqSubset(const MSA &msaIn, const unsigned uSeqIndexes[], unsigned uSeqCount,\r
- MSA &msaOut);\r
-void AssertMSAEq(const MSA &msa1, const MSA &msa2);\r
-void AssertMSAEqIgnoreCaseAndGaps(const MSA &msa1, const MSA &msa2);\r
-void MSASubsetByIds(const MSA &msaIn, const unsigned Ids[], unsigned uIdCount,\r
- MSA &msaOut);\r
-void SetMSAWeightsMuscle(MSA &msa);\r
-void SetClustalWWeightsMuscle(MSA &msa);\r
-void SetThreeWayWeightsMuscle(MSA &msa);\r
-\r
-#endif // MSA_h\r