#include "Globaldp.hpp" namespace MXSCARNA { double Globaldp::ribosum_matrix[16][16] = { { -2.49, -7.04, -8.24, -4.32, -8.84, -14.37, -4.68, -12.64, -6.86, -5.03, -8.39, -5.84, -4.01, -11.32, -6.16, -9.05 }, { -7.04, -2.11, -8.89, -2.04, -9.37, -9.08, -5.86, -10.45, -9.73, -3.81, -11.05, -4.72, -5.33, -8.67, -6.93, -7.83 }, { -8.24, -8.89, -0.80, -5.13, -10.41, -14.53, -4.57, -10.14, -8.61, -5.77, -5.38, -6.60, -5.43, -8.87, -5.94, -11.07 }, { -4.32, -2.04, -5.13, 4.49, -5.56, -6.71, 1.67, -5.17, -5.33, 2.70, -5.61, 0.59, 1.61, -4.81, -0.51, -2.98 }, { -8.84, -9.37, -10.41, -5.56, -5.13, -10.45, -3.57, -8.49, -7.98, -5.95, -11.36, -7.93, -2.42, -7.08, -5.63, -8.39 }, { -14.37, -9.08, -14.53, -6.71, -10.45, -3.59, -5.71, -5.77, -12.43, -3.70, -12.58, -7.88, -6.88, -7.40, -8.41, -5.41 }, { -4.68, -5.86, -4.57, 1.67, -3.57, -5.71, 5.36, -4.96, -6.00, 2.11, -4.66, -0.27, 2.75, -4.91, 1.32, -3.67 }, { -12.64, -10.45, -10.14, -5.17, -8.49, -5.77, -4.96, -2.28, -7.71, -5.84, -13.69, -5.61, -4.72, -3.83, -7.36, -5.21 }, { -6.86, -9.73, -8.61, -5.33, -7.98, -12.43, -6.00, -7.71, -1.05, -4.88, -8.67, -6.10, -5.85, -6.63, -7.55, -11.54 }, { -5.03, -3.81, -5.77, 2.70, -5.95, -3.70, 2.11, -5.84, -4.88, 5.62, -4.13, 1.21, 1.60, -4.49, -0.08, -3.90 }, { -8.39, -11.05, -5.38, -5.61, -11.36, -12.58, -4.66, -13.69, -8.67, -4.13, -1.98, -5.77, -5.75, -12.01, -4.27, -10.79 }, { -5.84, -4.72, -6.60, 0.59, -7.93, -7.88, -0.27, -5.61, -6.10, 1.21, -5.77, 3.47, -0.57, -5.30, -2.09, -4.45 }, { -4.01, -5.33, -5.43, 1.61, -2.42, -6.88, 2.75, -4.72, -5.85, 1.60, -5.75, -0.57, 4.97, -2.98, 1.14, -3.39 }, { -11.32, -8.67, -8.87, -4.81, -7.08, -7.40, -4.91, -3.83, -6.63, -4.49, -12.01, -5.30, -2.98, -3.21, -4.76, -5.97 }, { -6.16, -6.93, -5.94, -0.51, -5.63, -8.41, 1.32, -7.36, -7.55, -0.08, -4.27, -2.09, 1.14, -4.76, 3.36, -4.28 }, { -9.05, -7.83, -11.07, -2.98, -8.39, -5.41, -3.67, -5.21, -11.54, -3.90, -10.79, -4.45, -3.39, -5.97, -4.28, -0.02 } }; Trimat* Globaldp:: makeProfileBPPMatrix(const MultiSequence *Sequences) { int length = Sequences->GetSequence(0)->GetLength(); float thr = THR; Trimat *consBppMat = new Trimat(length + 1); fill(consBppMat->begin(), consBppMat->end(), 0); int number = Sequences->GetNumSequences(); for(int seqNum = 0; seqNum < number; seqNum++) { SafeVector *tmpMap = Sequences->GetSequence(seqNum)->GetMappingNumber(); int label = Sequences->GetSequence(seqNum)->GetLabel(); BPPMatrix *tmpBppMatrix = BPPMatrices[label]; for(int i = 1; i <= length ; i++) { int originI = tmpMap->at(i); for(int j = i; j <= length; j++) { int originJ = tmpMap->at(j); if(originI != 0 && originJ != 0) { float tmpProb = tmpBppMatrix->GetProb(originI, originJ); if(tmpProb >= thr) { consBppMat->ref(i, j) += tmpProb; } } } } } /* compute the mean of base pairing probability */ for(int i = 1; i <= length; i++) { for(int j = i; j <= length; j++) { consBppMat->ref(i,j) = consBppMat->ref(i,j)/(float)number; } } return consBppMat; } float Globaldp:: incrementalScorePSCPair(const StemCandidate &sc1, const StemCandidate &sc2) { int wordLength = WORDLENGTH; int pos1, rvpos1; if (sc1.GetDistance() > 0) { pos1 = sc1.GetPosition(); rvpos1 = sc1.GetRvposition(); } else { pos1 = sc1.GetRvposition(); rvpos1 = sc1.GetPosition(); } int pos2, rvpos2; if (sc2.GetDistance() > 0) { pos2 = sc2.GetPosition(); rvpos2 = sc2.GetRvposition(); } else { pos2 = sc2.GetRvposition(); rvpos2 = sc2.GetPosition(); } /* cout << "1:" << i << " " << j << " " << sc1.GetDistance() << " " << sc2.GetDistance() << endl; cout << sc1.GetPosition() << " " << sc1.GetRvposition() << " " << sc2.GetPosition() << " " << sc2.GetRvposition() << endl; */ float score = posterior[sc1.GetPosition() + wordLength - 1][sc2.GetPosition() + wordLength - 1] // * sc1.GetBaseScore(wordLength - 1) * profileBPPMat1->ref(pos1 + wordLength - 1, rvpos1) * posterior[sc1.GetRvposition()][sc2.GetRvposition()] // * sc2.GetBaseScore(wordLength - 1); * profileBPPMat2->ref(pos2 + wordLength - 1, rvpos2); /* incrementalWordSimilarity(sc1, sc2, i, j) + incrementalProbabilityScore(sc1, sc2) * multiDeltaScore + incrementalStackingScore(sc1, sc2) * multiDeltaStacking; */ return score*sc1.GetNumSeq()*sc2.GetNumSeq(); } float Globaldp:: incrementalProbabilityScore(const StemCandidate &sc1, const StemCandidate &sc2) { int wordLength = WORDLENGTH; return sc1.GetBaseScore(wordLength-1) + sc2.GetBaseScore(wordLength-1); } float Globaldp:: incrementalStackingScore(const StemCandidate &sc1, const StemCandidate &sc2) { return - (sc1.GetStacking() + sc2.GetStacking()); } float Globaldp:: incrementalWordSimilarity(const StemCandidate &sc1, const StemCandidate &sc2, int i, int j) { int numSeq1 = sc1.GetNumSeq(); int numSeq2 = sc2.GetNumSeq(); float score = 0; for(int k = 0; k < 16; k++) { for(int l = 0; l < 16; l++) { score += countContConp1[k][i]*countContConp2[l][j]*ribosum_matrix[k][l]; } } return score/(numSeq1*numSeq2); } float Globaldp:: scorePSCPair(const StemCandidate &sc1, const StemCandidate &sc2) { int wordLength = WORDLENGTH; float score = 0; int pos1, rvpos1; if (sc1.GetDistance() > 0) { pos1 = sc1.GetPosition(); rvpos1 = sc1.GetRvposition(); } else { pos1 = sc1.GetRvposition(); rvpos1 = sc1.GetPosition(); } int pos2, rvpos2; if (sc2.GetDistance() > 0) { pos2 = sc2.GetPosition(); rvpos2 = sc2.GetRvposition(); } else { pos2 = sc2.GetRvposition(); rvpos2 = sc2.GetPosition(); } for (int k = 0; k < wordLength; k++) { score += posterior[sc1.GetPosition() + k][sc2.GetPosition() + k] * profileBPPMat1->ref(pos1 + k, rvpos1 + wordLength - 1 - k) // * sc1.GetBaseScore(k) * posterior[sc1.GetRvposition() + wordLength - 1 - k][sc2.GetRvposition() + wordLength - 1 - k] // * sc2.GetBaseScore(k); * profileBPPMat2->ref(pos2 + k, rvpos2 + wordLength - 1 - k); } // validation code /* if (profileBPPMat1->ref(pos1 , rvpos1 + wordLength - 1) != sc1.GetBaseScore(0)) { cout << "1 " << profileBPPMat1->ref(pos1, rvpos1 + wordLength - 1) << " " << sc1.GetBaseScore(0) << endl; } if (profileBPPMat2->ref(pos2, rvpos2 + wordLength - 1) != sc2.GetBaseScore(0)) { cout << profileBPPMat2->ref(pos2, rvpos2 + wordLength - 1) << " " << sc2.GetBaseScore(0) << endl; } if (profileBPPMat1->ref(pos1 + 1, rvpos1 + wordLength - 1 - 1) != sc1.GetBaseScore(1)) { cout << "1 " << profileBPPMat1->ref(pos1 + 1, rvpos1 + wordLength - 1 - 1) << " " << sc1.GetBaseScore(1) << endl; } if (profileBPPMat2->ref(pos2 + 1, rvpos2 + wordLength - 1 - 1) != sc2.GetBaseScore(1)) { cout << profileBPPMat2->ref(pos2 + 1, rvpos2 + wordLength - 1 - 1) << " " << sc2.GetBaseScore(1) << endl; }*/ // cout << sc1.GetPosition() << " " << sc1.GetRvposition() << " " << sc1.GetDistance() << endl; // cout << sc2.GetPosition() << " " << sc2.GetRvposition() << " " << sc2.GetDistance() << endl; /* wordSimilarity(sc1, sc2, i, j) + probabilityScore(sc1, sc2) * multiScore + stackingScore(sc1, sc2) * multiStacking */ /* if (sc1.GetDistance() < 0 && sc2.GetDistance() < 0) { cout << "2:" << i << " " << j << " " << sc1.GetDistance() << " " << sc2.GetDistance() << endl; cout << sc1.GetPosition() << " " << sc1.GetRvposition() << " " << sc2.GetPosition() << " " << sc2.GetRvposition() << endl; cout << "2:score" << score << endl; } */ return score*sc1.GetNumSeq()*sc2.GetNumSeq(); } float Globaldp:: wordSimilarity(const StemCandidate &sc1, const StemCandidate &sc2, int i, int j) { int wordLength = WORDLENGTH; int numSeq1 = sc1.GetNumSeq(); int numSeq2 = sc2.GetNumSeq(); float score = 0; for(int k = 0; k < 16; k++) { for(int l = 0; l < 16; l++) { for(int iter = 0; iter < wordLength; iter++) { score += countConp1[iter][k][i]*countConp2[iter][l][j]*ribosum_matrix[k][l]; } } } return score/(numSeq1*numSeq2); } int Globaldp:: returnBasePairType(char s, char r) { if (s == 'A') { if (r == 'A') return 0; else if (r == 'C') return 1; else if (r == 'G') return 2; else if (r == 'U') return 3; } else if (s == 'C') { if (r == 'A') return 4; else if (r == 'C') return 5; else if (r == 'G') return 6; else if (r == 'U') return 7; } else if (s == 'G') { if (r == 'A') return 8; else if (r == 'C') return 9; else if (r == 'G') return 10; else if (r == 'U') return 11; } else if (s == 'U') { if (r == 'A') return 12; else if (r == 'C') return 13; else if (r == 'G') return 14; else if (r == 'U') return 15; } return 16; } float Globaldp:: probabilityScore(const StemCandidate &sc1, const StemCandidate &sc2) { return sc1.GetScore() + sc2.GetScore(); } float Globaldp:: stackingScore(const StemCandidate &sc1, const StemCandidate &sc2) { return - (sc1.GetStemStacking() + sc2.GetStemStacking()); } float Globaldp:: distancePenalty(const StemCandidate &sc1, const StemCandidate &sc2) { return std::sqrt((float)std::abs(sc1.GetDistance() - sc2.GetDistance())); } float Globaldp:: Run() { Initialize(); DP(); float score = traceBack(); // printMatch(); //cout << "score=" << score << endl; return score; } void Globaldp:: Initialize() { int nPscs1 = pscs1->size(); int nPscs2 = pscs2->size(); for(int i = 0; i < nPscs1; i++) { for(int j = 0; j < nPscs2; j++) { VM[i][j] = 0; VG[i][j] = 0; } } VM[0][0] = 0; VG[0][0] = IMPOSSIBLE; for (int i = 1; i < nPscs1; i++) { VM[i][0] = IMPOSSIBLE; VG[i][0] = 0; } for (int j = 1; j < nPscs2; j++) { VM[0][j] = IMPOSSIBLE; VG[0][j] = 0; } for (int i = 0; i < nPscs1; i++) { for (int j = 0; j < nPscs2; j++) { traceMI[i][j] = 0; traceMJ[i][j] = 0; traceGI[i][j] = 0; traceGJ[i][j] = 0; } } int wordLength = WORDLENGTH; int size1 = pscs1->size(); int size2 = pscs2->size(); for(int i = 0; i < wordLength; i++) { for(int j = 0; j < 17; j++) { for(int k = 0; k < (int)pscs1->size(); k++) { countConp1[i][j][k] = 0; } } } for(int i = 0; i < wordLength; i++) { for(int j = 0; j < 17; j++) { for(int k = 0; k < (int)pscs2->size(); k++) { countConp2[i][j][k] = 0; } } } for(int i = 0; i < 17; i++) { for(int j = 0; j < (int)pscs1->size()+1; j++) { countContConp1[i][j] = 0; } } for(int i = 0; i < 17; i++) { for(int j = 0; j < (int)pscs2->size()+1; j++) { countContConp2[i][j] = 0; } } for(int iter = 1; iter < size1; iter++) { const StemCandidate &sc1 = pscs1->at(iter); int numSeq1 = sc1.GetNumSeq(); for (int i = 0; i < wordLength; i++) { for(int k = 0; k < numSeq1; k++) { // const Sequence *seq = seqs1->GetSequence(k); string substr = sc1.GetSubstr(k); string rvstr = sc1.GetRvstr(k); char sChar = substr[i]; char rChar = rvstr[wordLength - 1 -i]; int type = returnBasePairType(sChar, rChar); ++countConp1[i][type][iter]; } } for(int k = 0; k < numSeq1; k++) { // const Sequence *seq = seqs1->GetSequence(k); string substr = sc1.GetSubstr(k); string rvstr = sc1.GetRvstr(k); char sChar = substr[wordLength-1]; char rChar = rvstr[0]; int type = returnBasePairType(sChar, rChar); ++countContConp1[type][iter]; } } for(int iter = 1; iter < size2; iter++) { const StemCandidate &sc2 = pscs2->at(iter); int numSeq2 = sc2.GetNumSeq(); for (int i = 0; i < wordLength; i++) { for(int k = 0; k < numSeq2; k++) { // const Sequence *seq = seqs2->GetSequence(k); string substr = sc2.GetSubstr(k); string rvstr = sc2.GetRvstr(k); char sChar = substr[i]; char rChar = rvstr[wordLength - 1 -i]; int type = returnBasePairType(sChar, rChar); ++countConp2[i][type][iter]; } } for(int k = 0; k < numSeq2; k++) { // const Sequence *seq = seqs2->GetSequence(k); string substr = sc2.GetSubstr(k); string rvstr = sc2.GetRvstr(k); char sChar = substr[wordLength-1]; char rChar = rvstr[0]; int type = returnBasePairType(sChar, rChar); ++countContConp2[type][iter]; } } profileBPPMat1 = makeProfileBPPMatrix(seqs1); profileBPPMat2 = makeProfileBPPMatrix(seqs2); } void Globaldp:: DP() { int nPscs1 = pscs1->size(); int nPscs2 = pscs2->size(); for(int i = 1; i < nPscs1; i++) { const StemCandidate &sc1 = pscs1->at(i); for(int j = 1; j < nPscs2; j++) { const StemCandidate &sc2 = pscs2->at(j); float max = IMPOSSIBLE; int traceI = 0; int traceJ = 0; int alpha = sc1.GetContPos(), beta = sc2.GetContPos(); if( (alpha > 0) && (beta > 0) ) { max = VM[alpha][beta] + incrementalScorePSCPair(sc1, sc2); traceI = alpha; traceJ = beta; } float similarity = scorePSCPair(sc1, sc2); int p = sc1.GetBeforePos(), q = sc2.GetBeforePos(); float tmpScore = VM[p][q] + similarity; // cout << i << " " << j << " " << p << " " << q << " " << tmpScore << " " << VM[p][q] << " " << similarity << " " << endl; if(max <= tmpScore) { max = tmpScore; traceI = p; traceJ = q; } tmpScore = VG[p][q] + similarity; if(max <= tmpScore) { max = tmpScore; traceI = traceGI[p][q]; traceJ = traceGJ[p][q]; } VM[i][j] = max; traceMI[i][j] = traceI; traceMJ[i][j] = traceJ; max = VM[i][j-1]; traceI = i; traceJ = j-1; tmpScore = VM[i-1][j]; if(max <= tmpScore) { max = tmpScore; traceI = i-1; traceJ = j; } tmpScore = VG[i-1][j]; if(max <= tmpScore) { max = tmpScore; traceI = traceGI[i-1][j]; traceJ = traceGJ[i-1][j]; } tmpScore = VG[i][j-1]; if(max <= tmpScore) { max = tmpScore; traceI = traceGI[i][j-1]; traceJ = traceGJ[i][j-1]; } VG[i][j] = max; traceGI[i][j] = traceI; traceGJ[i][j] = traceJ; } } } float Globaldp:: traceBack() { int nPscs1 = pscs1->size(); int nPscs2 = pscs2->size(); float max = IMPOSSIBLE; int traceI, traceJ; for (int i = 1; i < nPscs1; i++) { for (int j = 1; j < nPscs2; j++) { if(max < VM[i][j]) { max = VM[i][j]; traceI = i; traceJ = j; } } } while ( (traceI != 0) && (traceJ != 0) ) { matchPSCS1->push_back(traceI); matchPSCS2->push_back(traceJ); int tmpI = traceMI[traceI][traceJ]; int tmpJ = traceMJ[traceI][traceJ]; traceI = tmpI; traceJ = tmpJ; } if(traceI != 0 && traceJ != 0) { std::cout << "error in trace back of pscs:" << traceI << " " << traceJ << endl; } return max; } void Globaldp:: printMatch() { int size = matchPSCS1->size(); for(int iter = 0; iter < size; iter++) { int i = matchPSCS1->at(iter); int j = matchPSCS2->at(iter); const StemCandidate &sc1 = pscs1->at(i); const StemCandidate &sc2 = pscs2->at(j); std::cout << iter << "\t" << sc1.GetPosition() << "\t" << sc1.GetRvposition() << "\t" << sc2.GetPosition() << "\t" << sc2.GetRvposition() << endl; } } }