1 #include "Globaldp.hpp"
5 double Globaldp::ribosum_matrix[16][16]
6 = { { -2.49, -7.04, -8.24, -4.32, -8.84, -14.37, -4.68, -12.64, -6.86, -5.03, -8.39, -5.84, -4.01, -11.32, -6.16, -9.05 },
7 { -7.04, -2.11, -8.89, -2.04, -9.37, -9.08, -5.86, -10.45, -9.73, -3.81, -11.05, -4.72, -5.33, -8.67, -6.93, -7.83 },
8 { -8.24, -8.89, -0.80, -5.13, -10.41, -14.53, -4.57, -10.14, -8.61, -5.77, -5.38, -6.60, -5.43, -8.87, -5.94, -11.07 },
9 { -4.32, -2.04, -5.13, 4.49, -5.56, -6.71, 1.67, -5.17, -5.33, 2.70, -5.61, 0.59, 1.61, -4.81, -0.51, -2.98 },
10 { -8.84, -9.37, -10.41, -5.56, -5.13, -10.45, -3.57, -8.49, -7.98, -5.95, -11.36, -7.93, -2.42, -7.08, -5.63, -8.39 },
11 { -14.37, -9.08, -14.53, -6.71, -10.45, -3.59, -5.71, -5.77, -12.43, -3.70, -12.58, -7.88, -6.88, -7.40, -8.41, -5.41 },
12 { -4.68, -5.86, -4.57, 1.67, -3.57, -5.71, 5.36, -4.96, -6.00, 2.11, -4.66, -0.27, 2.75, -4.91, 1.32, -3.67 },
13 { -12.64, -10.45, -10.14, -5.17, -8.49, -5.77, -4.96, -2.28, -7.71, -5.84, -13.69, -5.61, -4.72, -3.83, -7.36, -5.21 },
14 { -6.86, -9.73, -8.61, -5.33, -7.98, -12.43, -6.00, -7.71, -1.05, -4.88, -8.67, -6.10, -5.85, -6.63, -7.55, -11.54 },
15 { -5.03, -3.81, -5.77, 2.70, -5.95, -3.70, 2.11, -5.84, -4.88, 5.62, -4.13, 1.21, 1.60, -4.49, -0.08, -3.90 },
16 { -8.39, -11.05, -5.38, -5.61, -11.36, -12.58, -4.66, -13.69, -8.67, -4.13, -1.98, -5.77, -5.75, -12.01, -4.27, -10.79 },
17 { -5.84, -4.72, -6.60, 0.59, -7.93, -7.88, -0.27, -5.61, -6.10, 1.21, -5.77, 3.47, -0.57, -5.30, -2.09, -4.45 },
18 { -4.01, -5.33, -5.43, 1.61, -2.42, -6.88, 2.75, -4.72, -5.85, 1.60, -5.75, -0.57, 4.97, -2.98, 1.14, -3.39 },
19 { -11.32, -8.67, -8.87, -4.81, -7.08, -7.40, -4.91, -3.83, -6.63, -4.49, -12.01, -5.30, -2.98, -3.21, -4.76, -5.97 },
20 { -6.16, -6.93, -5.94, -0.51, -5.63, -8.41, 1.32, -7.36, -7.55, -0.08, -4.27, -2.09, 1.14, -4.76, 3.36, -4.28 },
21 { -9.05, -7.83, -11.07, -2.98, -8.39, -5.41, -3.67, -5.21, -11.54, -3.90, -10.79, -4.45, -3.39, -5.97, -4.28, -0.02 }
27 makeProfileBPPMatrix(const MultiSequence *Sequences)
29 int length = Sequences->GetSequence(0)->GetLength();
31 Trimat<float> *consBppMat = new Trimat<float>(length + 1);
32 fill(consBppMat->begin(), consBppMat->end(), 0);
34 int number = Sequences->GetNumSequences();
35 for(int seqNum = 0; seqNum < number; seqNum++) {
36 SafeVector<int> *tmpMap = Sequences->GetSequence(seqNum)->GetMappingNumber();
37 int label = Sequences->GetSequence(seqNum)->GetLabel();
38 BPPMatrix *tmpBppMatrix = BPPMatrices[label];
40 for(int i = 1; i <= length ; i++) {
41 int originI = tmpMap->at(i);
42 for(int j = i; j <= length; j++) {
43 int originJ = tmpMap->at(j);
44 if(originI != 0 && originJ != 0) {
45 float tmpProb = tmpBppMatrix->GetProb(originI, originJ);
48 consBppMat->ref(i, j) += tmpProb;
55 /* compute the mean of base pairing probability */
56 for(int i = 1; i <= length; i++) {
57 for(int j = i; j <= length; j++) {
58 consBppMat->ref(i,j) = consBppMat->ref(i,j)/(float)number;
67 incrementalScorePSCPair(const StemCandidate &sc1, const StemCandidate &sc2)
69 int wordLength = WORDLENGTH;
72 if (sc1.GetDistance() > 0) {
73 pos1 = sc1.GetPosition();
74 rvpos1 = sc1.GetRvposition();
77 pos1 = sc1.GetRvposition();
78 rvpos1 = sc1.GetPosition();
82 if (sc2.GetDistance() > 0) {
83 pos2 = sc2.GetPosition();
84 rvpos2 = sc2.GetRvposition();
87 pos2 = sc2.GetRvposition();
88 rvpos2 = sc2.GetPosition();
91 cout << "1:" << i << " " << j << " " << sc1.GetDistance() << " " << sc2.GetDistance() << endl;
92 cout << sc1.GetPosition() << " " << sc1.GetRvposition() << " " << sc2.GetPosition() << " " << sc2.GetRvposition() << endl;
95 posterior[sc1.GetPosition() + wordLength - 1][sc2.GetPosition() + wordLength - 1]
96 // * sc1.GetBaseScore(wordLength - 1)
97 * profileBPPMat1->ref(pos1 + wordLength - 1, rvpos1)
98 * posterior[sc1.GetRvposition()][sc2.GetRvposition()]
99 // * sc2.GetBaseScore(wordLength - 1);
100 * profileBPPMat2->ref(pos2 + wordLength - 1, rvpos2);
104 incrementalWordSimilarity(sc1, sc2, i, j)
105 + incrementalProbabilityScore(sc1, sc2) * multiDeltaScore
106 + incrementalStackingScore(sc1, sc2) * multiDeltaStacking;
109 return score*sc1.GetNumSeq()*sc2.GetNumSeq();
114 incrementalProbabilityScore(const StemCandidate &sc1, const StemCandidate &sc2)
116 int wordLength = WORDLENGTH;
117 return sc1.GetBaseScore(wordLength-1) + sc2.GetBaseScore(wordLength-1);
122 incrementalStackingScore(const StemCandidate &sc1, const StemCandidate &sc2)
124 return - (sc1.GetStacking() + sc2.GetStacking());
129 incrementalWordSimilarity(const StemCandidate &sc1, const StemCandidate &sc2, int i, int j)
131 int numSeq1 = sc1.GetNumSeq();
132 int numSeq2 = sc2.GetNumSeq();
136 for(int k = 0; k < 16; k++) {
137 for(int l = 0; l < 16; l++) {
138 score += countContConp1[k][i]*countContConp2[l][j]*ribosum_matrix[k][l];
142 return score/(numSeq1*numSeq2);
147 scorePSCPair(const StemCandidate &sc1, const StemCandidate &sc2)
151 int wordLength = WORDLENGTH;
155 if (sc1.GetDistance() > 0) {
156 pos1 = sc1.GetPosition();
157 rvpos1 = sc1.GetRvposition();
160 pos1 = sc1.GetRvposition();
161 rvpos1 = sc1.GetPosition();
165 if (sc2.GetDistance() > 0) {
166 pos2 = sc2.GetPosition();
167 rvpos2 = sc2.GetRvposition();
170 pos2 = sc2.GetRvposition();
171 rvpos2 = sc2.GetPosition();
174 for (int k = 0; k < wordLength; k++) {
176 posterior[sc1.GetPosition() + k][sc2.GetPosition() + k]
177 * profileBPPMat1->ref(pos1 + k, rvpos1 + wordLength - 1 - k)
178 // * sc1.GetBaseScore(k)
179 * posterior[sc1.GetRvposition() + wordLength - 1 - k][sc2.GetRvposition() + wordLength - 1 - k]
180 // * sc2.GetBaseScore(k);
181 * profileBPPMat2->ref(pos2 + k, rvpos2 + wordLength - 1 - k);
185 if (profileBPPMat1->ref(pos1 , rvpos1 + wordLength - 1) != sc1.GetBaseScore(0)) {
186 cout << "1 " << profileBPPMat1->ref(pos1, rvpos1 + wordLength - 1) << " " << sc1.GetBaseScore(0) << endl;
188 if (profileBPPMat2->ref(pos2, rvpos2 + wordLength - 1) != sc2.GetBaseScore(0)) {
189 cout << profileBPPMat2->ref(pos2, rvpos2 + wordLength - 1) << " " << sc2.GetBaseScore(0) << endl;
191 if (profileBPPMat1->ref(pos1 + 1, rvpos1 + wordLength - 1 - 1) != sc1.GetBaseScore(1)) {
192 cout << "1 " << profileBPPMat1->ref(pos1 + 1, rvpos1 + wordLength - 1 - 1) << " " << sc1.GetBaseScore(1) << endl;
194 if (profileBPPMat2->ref(pos2 + 1, rvpos2 + wordLength - 1 - 1) != sc2.GetBaseScore(1)) {
195 cout << profileBPPMat2->ref(pos2 + 1, rvpos2 + wordLength - 1 - 1) << " " << sc2.GetBaseScore(1) << endl;
198 // cout << sc1.GetPosition() << " " << sc1.GetRvposition() << " " << sc1.GetDistance() << endl;
199 // cout << sc2.GetPosition() << " " << sc2.GetRvposition() << " " << sc2.GetDistance() << endl;
201 wordSimilarity(sc1, sc2, i, j)
202 + probabilityScore(sc1, sc2) * multiScore
203 + stackingScore(sc1, sc2) * multiStacking
207 if (sc1.GetDistance() < 0 && sc2.GetDistance() < 0) {
208 cout << "2:" << i << " " << j << " " << sc1.GetDistance() << " " << sc2.GetDistance() << endl;
209 cout << sc1.GetPosition() << " " << sc1.GetRvposition() << " " << sc2.GetPosition() << " " << sc2.GetRvposition() << endl;
210 cout << "2:score" << score << endl;
214 return score*sc1.GetNumSeq()*sc2.GetNumSeq();
219 wordSimilarity(const StemCandidate &sc1, const StemCandidate &sc2, int i, int j)
221 int wordLength = WORDLENGTH;
223 int numSeq1 = sc1.GetNumSeq();
224 int numSeq2 = sc2.GetNumSeq();
228 for(int k = 0; k < 16; k++) {
229 for(int l = 0; l < 16; l++) {
230 for(int iter = 0; iter < wordLength; iter++) {
231 score += countConp1[iter][k][i]*countConp2[iter][l][j]*ribosum_matrix[k][l];
236 return score/(numSeq1*numSeq2);
241 returnBasePairType(char s, char r)
244 if (r == 'A') return 0;
245 else if (r == 'C') return 1;
246 else if (r == 'G') return 2;
247 else if (r == 'U') return 3;
250 if (r == 'A') return 4;
251 else if (r == 'C') return 5;
252 else if (r == 'G') return 6;
253 else if (r == 'U') return 7;
256 if (r == 'A') return 8;
257 else if (r == 'C') return 9;
258 else if (r == 'G') return 10;
259 else if (r == 'U') return 11;
262 if (r == 'A') return 12;
263 else if (r == 'C') return 13;
264 else if (r == 'G') return 14;
265 else if (r == 'U') return 15;
273 probabilityScore(const StemCandidate &sc1, const StemCandidate &sc2)
275 return sc1.GetScore() + sc2.GetScore();
280 stackingScore(const StemCandidate &sc1, const StemCandidate &sc2)
282 return - (sc1.GetStemStacking() + sc2.GetStemStacking());
287 distancePenalty(const StemCandidate &sc1, const StemCandidate &sc2)
289 return std::sqrt((float)std::abs(sc1.GetDistance() - sc2.GetDistance()));
298 float score = traceBack();
301 //cout << "score=" << score << endl;
309 int nPscs1 = pscs1->size();
310 int nPscs2 = pscs2->size();
313 for(int i = 0; i < nPscs1; i++) {
314 for(int j = 0; j < nPscs2; j++) {
321 VG[0][0] = IMPOSSIBLE;
323 for (int i = 1; i < nPscs1; i++) {
324 VM[i][0] = IMPOSSIBLE; VG[i][0] = 0;
326 for (int j = 1; j < nPscs2; j++) {
327 VM[0][j] = IMPOSSIBLE; VG[0][j] = 0;
330 for (int i = 0; i < nPscs1; i++) {
331 for (int j = 0; j < nPscs2; j++) {
332 traceMI[i][j] = 0; traceMJ[i][j] = 0;
333 traceGI[i][j] = 0; traceGJ[i][j] = 0;
337 int wordLength = WORDLENGTH;
338 int size1 = pscs1->size();
339 int size2 = pscs2->size();
341 for(int i = 0; i < wordLength; i++) {
342 for(int j = 0; j < 17; j++) {
343 for(int k = 0; k < (int)pscs1->size(); k++) {
344 countConp1[i][j][k] = 0;
348 for(int i = 0; i < wordLength; i++) {
349 for(int j = 0; j < 17; j++) {
350 for(int k = 0; k < (int)pscs2->size(); k++) {
351 countConp2[i][j][k] = 0;
355 for(int i = 0; i < 17; i++) {
356 for(int j = 0; j < (int)pscs1->size()+1; j++) {
357 countContConp1[i][j] = 0;
360 for(int i = 0; i < 17; i++) {
361 for(int j = 0; j < (int)pscs2->size()+1; j++) {
362 countContConp2[i][j] = 0;
366 for(int iter = 1; iter < size1; iter++) {
368 const StemCandidate &sc1 = pscs1->at(iter);
369 int numSeq1 = sc1.GetNumSeq();
370 for (int i = 0; i < wordLength; i++) {
372 for(int k = 0; k < numSeq1; k++) {
373 // const Sequence *seq = seqs1->GetSequence(k);
374 string substr = sc1.GetSubstr(k);
375 string rvstr = sc1.GetRvstr(k);
377 char sChar = substr[i];
378 char rChar = rvstr[wordLength - 1 -i];
380 int type = returnBasePairType(sChar, rChar);
381 ++countConp1[i][type][iter];
384 for(int k = 0; k < numSeq1; k++) {
385 // const Sequence *seq = seqs1->GetSequence(k);
386 string substr = sc1.GetSubstr(k);
387 string rvstr = sc1.GetRvstr(k);
389 char sChar = substr[wordLength-1];
390 char rChar = rvstr[0];
392 int type = returnBasePairType(sChar, rChar);
393 ++countContConp1[type][iter];
397 for(int iter = 1; iter < size2; iter++) {
398 const StemCandidate &sc2 = pscs2->at(iter);
399 int numSeq2 = sc2.GetNumSeq();
400 for (int i = 0; i < wordLength; i++) {
402 for(int k = 0; k < numSeq2; k++) {
403 // const Sequence *seq = seqs2->GetSequence(k);
404 string substr = sc2.GetSubstr(k);
405 string rvstr = sc2.GetRvstr(k);
407 char sChar = substr[i];
408 char rChar = rvstr[wordLength - 1 -i];
410 int type = returnBasePairType(sChar, rChar);
411 ++countConp2[i][type][iter];
414 for(int k = 0; k < numSeq2; k++) {
415 // const Sequence *seq = seqs2->GetSequence(k);
416 string substr = sc2.GetSubstr(k);
417 string rvstr = sc2.GetRvstr(k);
419 char sChar = substr[wordLength-1];
420 char rChar = rvstr[0];
422 int type = returnBasePairType(sChar, rChar);
423 ++countContConp2[type][iter];
427 profileBPPMat1 = makeProfileBPPMatrix(seqs1);
428 profileBPPMat2 = makeProfileBPPMatrix(seqs2);
435 int nPscs1 = pscs1->size();
436 int nPscs2 = pscs2->size();
438 for(int i = 1; i < nPscs1; i++) {
439 const StemCandidate &sc1 = pscs1->at(i);
441 for(int j = 1; j < nPscs2; j++) {
442 const StemCandidate &sc2 = pscs2->at(j);
444 float max = IMPOSSIBLE;
447 int alpha = sc1.GetContPos(), beta = sc2.GetContPos();
448 if( (alpha > 0) && (beta > 0) ) {
449 max = VM[alpha][beta] + incrementalScorePSCPair(sc1, sc2);
454 float similarity = scorePSCPair(sc1, sc2);
455 int p = sc1.GetBeforePos(), q = sc2.GetBeforePos();
456 float tmpScore = VM[p][q] + similarity;
457 // cout << i << " " << j << " " << p << " " << q << " " << tmpScore << " " << VM[p][q] << " " << similarity << " " << endl;
459 if(max <= tmpScore) {
465 tmpScore = VG[p][q] + similarity;
466 if(max <= tmpScore) {
468 traceI = traceGI[p][q];
469 traceJ = traceGJ[p][q];
473 traceMI[i][j] = traceI;
474 traceMJ[i][j] = traceJ;
480 tmpScore = VM[i-1][j];
481 if(max <= tmpScore) {
487 tmpScore = VG[i-1][j];
488 if(max <= tmpScore) {
490 traceI = traceGI[i-1][j];
491 traceJ = traceGJ[i-1][j];
494 tmpScore = VG[i][j-1];
495 if(max <= tmpScore) {
497 traceI = traceGI[i][j-1];
498 traceJ = traceGJ[i][j-1];
502 traceGI[i][j] = traceI;
503 traceGJ[i][j] = traceJ;
512 int nPscs1 = pscs1->size();
513 int nPscs2 = pscs2->size();
515 float max = IMPOSSIBLE;
517 for (int i = 1; i < nPscs1; i++) {
518 for (int j = 1; j < nPscs2; j++) {
527 while ( (traceI != 0) && (traceJ != 0) ) {
528 matchPSCS1->push_back(traceI); matchPSCS2->push_back(traceJ);
529 int tmpI = traceMI[traceI][traceJ];
530 int tmpJ = traceMJ[traceI][traceJ];
531 traceI = tmpI; traceJ = tmpJ;
534 if(traceI != 0 && traceJ != 0) {
535 std::cout << "error in trace back of pscs:" << traceI << " " << traceJ << endl;
545 int size = matchPSCS1->size();
546 for(int iter = 0; iter < size; iter++) {
547 int i = matchPSCS1->at(iter);
548 int j = matchPSCS2->at(iter);
550 const StemCandidate &sc1 = pscs1->at(i);
551 const StemCandidate &sc2 = pscs2->at(j);
553 std::cout << iter << "\t" << sc1.GetPosition() << "\t" << sc1.GetRvposition() << "\t" << sc2.GetPosition() << "\t" << sc2.GetRvposition() << endl;