--- /dev/null
+#include "Globaldp.hpp"
+
+namespace MXSCARNA {
+
+double Globaldp::ribosum_matrix[16][16]
+= { { -2.49, -7.04, -8.24, -4.32, -8.84, -14.37, -4.68, -12.64, -6.86, -5.03, -8.39, -5.84, -4.01, -11.32, -6.16, -9.05 },
+ { -7.04, -2.11, -8.89, -2.04, -9.37, -9.08, -5.86, -10.45, -9.73, -3.81, -11.05, -4.72, -5.33, -8.67, -6.93, -7.83 },
+ { -8.24, -8.89, -0.80, -5.13, -10.41, -14.53, -4.57, -10.14, -8.61, -5.77, -5.38, -6.60, -5.43, -8.87, -5.94, -11.07 },
+ { -4.32, -2.04, -5.13, 4.49, -5.56, -6.71, 1.67, -5.17, -5.33, 2.70, -5.61, 0.59, 1.61, -4.81, -0.51, -2.98 },
+ { -8.84, -9.37, -10.41, -5.56, -5.13, -10.45, -3.57, -8.49, -7.98, -5.95, -11.36, -7.93, -2.42, -7.08, -5.63, -8.39 },
+ { -14.37, -9.08, -14.53, -6.71, -10.45, -3.59, -5.71, -5.77, -12.43, -3.70, -12.58, -7.88, -6.88, -7.40, -8.41, -5.41 },
+ { -4.68, -5.86, -4.57, 1.67, -3.57, -5.71, 5.36, -4.96, -6.00, 2.11, -4.66, -0.27, 2.75, -4.91, 1.32, -3.67 },
+ { -12.64, -10.45, -10.14, -5.17, -8.49, -5.77, -4.96, -2.28, -7.71, -5.84, -13.69, -5.61, -4.72, -3.83, -7.36, -5.21 },
+ { -6.86, -9.73, -8.61, -5.33, -7.98, -12.43, -6.00, -7.71, -1.05, -4.88, -8.67, -6.10, -5.85, -6.63, -7.55, -11.54 },
+ { -5.03, -3.81, -5.77, 2.70, -5.95, -3.70, 2.11, -5.84, -4.88, 5.62, -4.13, 1.21, 1.60, -4.49, -0.08, -3.90 },
+ { -8.39, -11.05, -5.38, -5.61, -11.36, -12.58, -4.66, -13.69, -8.67, -4.13, -1.98, -5.77, -5.75, -12.01, -4.27, -10.79 },
+ { -5.84, -4.72, -6.60, 0.59, -7.93, -7.88, -0.27, -5.61, -6.10, 1.21, -5.77, 3.47, -0.57, -5.30, -2.09, -4.45 },
+ { -4.01, -5.33, -5.43, 1.61, -2.42, -6.88, 2.75, -4.72, -5.85, 1.60, -5.75, -0.57, 4.97, -2.98, 1.14, -3.39 },
+ { -11.32, -8.67, -8.87, -4.81, -7.08, -7.40, -4.91, -3.83, -6.63, -4.49, -12.01, -5.30, -2.98, -3.21, -4.76, -5.97 },
+ { -6.16, -6.93, -5.94, -0.51, -5.63, -8.41, 1.32, -7.36, -7.55, -0.08, -4.27, -2.09, 1.14, -4.76, 3.36, -4.28 },
+ { -9.05, -7.83, -11.07, -2.98, -8.39, -5.41, -3.67, -5.21, -11.54, -3.90, -10.79, -4.45, -3.39, -5.97, -4.28, -0.02 }
+};
+
+
+Trimat<float>*
+Globaldp::
+makeProfileBPPMatrix(const MultiSequence *Sequences)
+{
+ int length = Sequences->GetSequence(0)->GetLength();
+ float thr = THR;
+ Trimat<float> *consBppMat = new Trimat<float>(length + 1);
+ fill(consBppMat->begin(), consBppMat->end(), 0);
+
+ int number = Sequences->GetNumSequences();
+ for(int seqNum = 0; seqNum < number; seqNum++) {
+ SafeVector<int> *tmpMap = Sequences->GetSequence(seqNum)->GetMappingNumber();
+ int label = Sequences->GetSequence(seqNum)->GetLabel();
+ BPPMatrix *tmpBppMatrix = BPPMatrices[label];
+
+ for(int i = 1; i <= length ; i++) {
+ int originI = tmpMap->at(i);
+ for(int j = i; j <= length; j++) {
+ int originJ = tmpMap->at(j);
+ if(originI != 0 && originJ != 0) {
+ float tmpProb = tmpBppMatrix->GetProb(originI, originJ);
+
+ if(tmpProb >= thr) {
+ consBppMat->ref(i, j) += tmpProb;
+ }
+ }
+ }
+ }
+ }
+
+ /* compute the mean of base pairing probability */
+ for(int i = 1; i <= length; i++) {
+ for(int j = i; j <= length; j++) {
+ consBppMat->ref(i,j) = consBppMat->ref(i,j)/(float)number;
+ }
+ }
+
+ return consBppMat;
+}
+
+float
+Globaldp::
+incrementalScorePSCPair(const StemCandidate &sc1, const StemCandidate &sc2)
+{
+ int wordLength = WORDLENGTH;
+
+ int pos1, rvpos1;
+ if (sc1.GetDistance() > 0) {
+ pos1 = sc1.GetPosition();
+ rvpos1 = sc1.GetRvposition();
+ }
+ else {
+ pos1 = sc1.GetRvposition();
+ rvpos1 = sc1.GetPosition();
+ }
+
+ int pos2, rvpos2;
+ if (sc2.GetDistance() > 0) {
+ pos2 = sc2.GetPosition();
+ rvpos2 = sc2.GetRvposition();
+ }
+ else {
+ pos2 = sc2.GetRvposition();
+ rvpos2 = sc2.GetPosition();
+ }
+/*
+ cout << "1:" << i << " " << j << " " << sc1.GetDistance() << " " << sc2.GetDistance() << endl;
+ cout << sc1.GetPosition() << " " << sc1.GetRvposition() << " " << sc2.GetPosition() << " " << sc2.GetRvposition() << endl;
+*/
+ float score =
+ posterior[sc1.GetPosition() + wordLength - 1][sc2.GetPosition() + wordLength - 1]
+// * sc1.GetBaseScore(wordLength - 1)
+ * profileBPPMat1->ref(pos1 + wordLength - 1, rvpos1)
+ * posterior[sc1.GetRvposition()][sc2.GetRvposition()]
+// * sc2.GetBaseScore(wordLength - 1);
+ * profileBPPMat2->ref(pos2 + wordLength - 1, rvpos2);
+
+
+/*
+ incrementalWordSimilarity(sc1, sc2, i, j)
+ + incrementalProbabilityScore(sc1, sc2) * multiDeltaScore
+ + incrementalStackingScore(sc1, sc2) * multiDeltaStacking;
+*/
+
+ return score*sc1.GetNumSeq()*sc2.GetNumSeq();
+}
+
+float
+Globaldp::
+incrementalProbabilityScore(const StemCandidate &sc1, const StemCandidate &sc2)
+{
+ int wordLength = WORDLENGTH;
+ return sc1.GetBaseScore(wordLength-1) + sc2.GetBaseScore(wordLength-1);
+}
+
+float
+Globaldp::
+incrementalStackingScore(const StemCandidate &sc1, const StemCandidate &sc2)
+{
+ return - (sc1.GetStacking() + sc2.GetStacking());
+}
+
+float
+Globaldp::
+incrementalWordSimilarity(const StemCandidate &sc1, const StemCandidate &sc2, int i, int j)
+{
+ int numSeq1 = sc1.GetNumSeq();
+ int numSeq2 = sc2.GetNumSeq();
+
+ float score = 0;
+
+ for(int k = 0; k < 16; k++) {
+ for(int l = 0; l < 16; l++) {
+ score += countContConp1[k][i]*countContConp2[l][j]*ribosum_matrix[k][l];
+ }
+ }
+
+ return score/(numSeq1*numSeq2);
+}
+
+float
+Globaldp::
+scorePSCPair(const StemCandidate &sc1, const StemCandidate &sc2)
+{
+
+
+ int wordLength = WORDLENGTH;
+ float score = 0;
+
+ int pos1, rvpos1;
+ if (sc1.GetDistance() > 0) {
+ pos1 = sc1.GetPosition();
+ rvpos1 = sc1.GetRvposition();
+ }
+ else {
+ pos1 = sc1.GetRvposition();
+ rvpos1 = sc1.GetPosition();
+ }
+
+ int pos2, rvpos2;
+ if (sc2.GetDistance() > 0) {
+ pos2 = sc2.GetPosition();
+ rvpos2 = sc2.GetRvposition();
+ }
+ else {
+ pos2 = sc2.GetRvposition();
+ rvpos2 = sc2.GetPosition();
+ }
+
+ for (int k = 0; k < wordLength; k++) {
+ score +=
+ posterior[sc1.GetPosition() + k][sc2.GetPosition() + k]
+ * profileBPPMat1->ref(pos1 + k, rvpos1 + wordLength - 1 - k)
+// * sc1.GetBaseScore(k)
+ * posterior[sc1.GetRvposition() + wordLength - 1 - k][sc2.GetRvposition() + wordLength - 1 - k]
+// * sc2.GetBaseScore(k);
+ * profileBPPMat2->ref(pos2 + k, rvpos2 + wordLength - 1 - k);
+ }
+ // validation code
+ /*
+ if (profileBPPMat1->ref(pos1 , rvpos1 + wordLength - 1) != sc1.GetBaseScore(0)) {
+ cout << "1 " << profileBPPMat1->ref(pos1, rvpos1 + wordLength - 1) << " " << sc1.GetBaseScore(0) << endl;
+ }
+ if (profileBPPMat2->ref(pos2, rvpos2 + wordLength - 1) != sc2.GetBaseScore(0)) {
+ cout << profileBPPMat2->ref(pos2, rvpos2 + wordLength - 1) << " " << sc2.GetBaseScore(0) << endl;
+ }
+ if (profileBPPMat1->ref(pos1 + 1, rvpos1 + wordLength - 1 - 1) != sc1.GetBaseScore(1)) {
+ cout << "1 " << profileBPPMat1->ref(pos1 + 1, rvpos1 + wordLength - 1 - 1) << " " << sc1.GetBaseScore(1) << endl;
+ }
+ if (profileBPPMat2->ref(pos2 + 1, rvpos2 + wordLength - 1 - 1) != sc2.GetBaseScore(1)) {
+ cout << profileBPPMat2->ref(pos2 + 1, rvpos2 + wordLength - 1 - 1) << " " << sc2.GetBaseScore(1) << endl;
+ }*/
+
+// cout << sc1.GetPosition() << " " << sc1.GetRvposition() << " " << sc1.GetDistance() << endl;
+// cout << sc2.GetPosition() << " " << sc2.GetRvposition() << " " << sc2.GetDistance() << endl;
+/*
+ wordSimilarity(sc1, sc2, i, j)
+ + probabilityScore(sc1, sc2) * multiScore
+ + stackingScore(sc1, sc2) * multiStacking
+
+*/
+/*
+ if (sc1.GetDistance() < 0 && sc2.GetDistance() < 0) {
+ cout << "2:" << i << " " << j << " " << sc1.GetDistance() << " " << sc2.GetDistance() << endl;
+ cout << sc1.GetPosition() << " " << sc1.GetRvposition() << " " << sc2.GetPosition() << " " << sc2.GetRvposition() << endl;
+ cout << "2:score" << score << endl;
+
+ }
+*/
+ return score*sc1.GetNumSeq()*sc2.GetNumSeq();
+}
+
+float
+Globaldp::
+wordSimilarity(const StemCandidate &sc1, const StemCandidate &sc2, int i, int j)
+{
+ int wordLength = WORDLENGTH;
+
+ int numSeq1 = sc1.GetNumSeq();
+ int numSeq2 = sc2.GetNumSeq();
+
+ float score = 0;
+
+ for(int k = 0; k < 16; k++) {
+ for(int l = 0; l < 16; l++) {
+ for(int iter = 0; iter < wordLength; iter++) {
+ score += countConp1[iter][k][i]*countConp2[iter][l][j]*ribosum_matrix[k][l];
+ }
+ }
+ }
+
+ return score/(numSeq1*numSeq2);
+}
+
+int
+Globaldp::
+returnBasePairType(char s, char r)
+{
+ if (s == 'A') {
+ if (r == 'A') return 0;
+ else if (r == 'C') return 1;
+ else if (r == 'G') return 2;
+ else if (r == 'U') return 3;
+ }
+ else if (s == 'C') {
+ if (r == 'A') return 4;
+ else if (r == 'C') return 5;
+ else if (r == 'G') return 6;
+ else if (r == 'U') return 7;
+ }
+ else if (s == 'G') {
+ if (r == 'A') return 8;
+ else if (r == 'C') return 9;
+ else if (r == 'G') return 10;
+ else if (r == 'U') return 11;
+ }
+ else if (s == 'U') {
+ if (r == 'A') return 12;
+ else if (r == 'C') return 13;
+ else if (r == 'G') return 14;
+ else if (r == 'U') return 15;
+ }
+
+ return 16;
+}
+
+float
+Globaldp::
+probabilityScore(const StemCandidate &sc1, const StemCandidate &sc2)
+{
+ return sc1.GetScore() + sc2.GetScore();
+}
+
+float
+Globaldp::
+stackingScore(const StemCandidate &sc1, const StemCandidate &sc2)
+{
+ return - (sc1.GetStemStacking() + sc2.GetStemStacking());
+}
+
+float
+Globaldp::
+distancePenalty(const StemCandidate &sc1, const StemCandidate &sc2)
+{
+ return std::sqrt((float)std::abs(sc1.GetDistance() - sc2.GetDistance()));
+}
+
+float
+Globaldp::
+Run()
+{
+ Initialize();
+ DP();
+ float score = traceBack();
+
+ // printMatch();
+ //cout << "score=" << score << endl;
+ return score;
+}
+
+void
+Globaldp::
+Initialize()
+{
+ int nPscs1 = pscs1->size();
+ int nPscs2 = pscs2->size();
+
+
+ for(int i = 0; i < nPscs1; i++) {
+ for(int j = 0; j < nPscs2; j++) {
+ VM[i][j] = 0;
+ VG[i][j] = 0;
+ }
+ }
+
+ VM[0][0] = 0;
+ VG[0][0] = IMPOSSIBLE;
+
+ for (int i = 1; i < nPscs1; i++) {
+ VM[i][0] = IMPOSSIBLE; VG[i][0] = 0;
+ }
+ for (int j = 1; j < nPscs2; j++) {
+ VM[0][j] = IMPOSSIBLE; VG[0][j] = 0;
+ }
+
+ for (int i = 0; i < nPscs1; i++) {
+ for (int j = 0; j < nPscs2; j++) {
+ traceMI[i][j] = 0; traceMJ[i][j] = 0;
+ traceGI[i][j] = 0; traceGJ[i][j] = 0;
+ }
+ }
+
+ int wordLength = WORDLENGTH;
+ int size1 = pscs1->size();
+ int size2 = pscs2->size();
+
+ for(int i = 0; i < wordLength; i++) {
+ for(int j = 0; j < 17; j++) {
+ for(int k = 0; k < (int)pscs1->size(); k++) {
+ countConp1[i][j][k] = 0;
+ }
+ }
+ }
+ for(int i = 0; i < wordLength; i++) {
+ for(int j = 0; j < 17; j++) {
+ for(int k = 0; k < (int)pscs2->size(); k++) {
+ countConp2[i][j][k] = 0;
+ }
+ }
+ }
+ for(int i = 0; i < 17; i++) {
+ for(int j = 0; j < (int)pscs1->size()+1; j++) {
+ countContConp1[i][j] = 0;
+ }
+ }
+ for(int i = 0; i < 17; i++) {
+ for(int j = 0; j < (int)pscs2->size()+1; j++) {
+ countContConp2[i][j] = 0;
+ }
+ }
+
+ for(int iter = 1; iter < size1; iter++) {
+
+ const StemCandidate &sc1 = pscs1->at(iter);
+ int numSeq1 = sc1.GetNumSeq();
+ for (int i = 0; i < wordLength; i++) {
+
+ for(int k = 0; k < numSeq1; k++) {
+// const Sequence *seq = seqs1->GetSequence(k);
+ string substr = sc1.GetSubstr(k);
+ string rvstr = sc1.GetRvstr(k);
+
+ char sChar = substr[i];
+ char rChar = rvstr[wordLength - 1 -i];
+
+ int type = returnBasePairType(sChar, rChar);
+ ++countConp1[i][type][iter];
+ }
+ }
+ for(int k = 0; k < numSeq1; k++) {
+// const Sequence *seq = seqs1->GetSequence(k);
+ string substr = sc1.GetSubstr(k);
+ string rvstr = sc1.GetRvstr(k);
+
+ char sChar = substr[wordLength-1];
+ char rChar = rvstr[0];
+
+ int type = returnBasePairType(sChar, rChar);
+ ++countContConp1[type][iter];
+
+ }
+ }
+ for(int iter = 1; iter < size2; iter++) {
+ const StemCandidate &sc2 = pscs2->at(iter);
+ int numSeq2 = sc2.GetNumSeq();
+ for (int i = 0; i < wordLength; i++) {
+
+ for(int k = 0; k < numSeq2; k++) {
+// const Sequence *seq = seqs2->GetSequence(k);
+ string substr = sc2.GetSubstr(k);
+ string rvstr = sc2.GetRvstr(k);
+
+ char sChar = substr[i];
+ char rChar = rvstr[wordLength - 1 -i];
+
+ int type = returnBasePairType(sChar, rChar);
+ ++countConp2[i][type][iter];
+ }
+ }
+ for(int k = 0; k < numSeq2; k++) {
+// const Sequence *seq = seqs2->GetSequence(k);
+ string substr = sc2.GetSubstr(k);
+ string rvstr = sc2.GetRvstr(k);
+
+ char sChar = substr[wordLength-1];
+ char rChar = rvstr[0];
+
+ int type = returnBasePairType(sChar, rChar);
+ ++countContConp2[type][iter];
+
+ }
+ }
+ profileBPPMat1 = makeProfileBPPMatrix(seqs1);
+ profileBPPMat2 = makeProfileBPPMatrix(seqs2);
+}
+
+void
+Globaldp::
+DP()
+{
+ int nPscs1 = pscs1->size();
+ int nPscs2 = pscs2->size();
+
+ for(int i = 1; i < nPscs1; i++) {
+ const StemCandidate &sc1 = pscs1->at(i);
+
+ for(int j = 1; j < nPscs2; j++) {
+ const StemCandidate &sc2 = pscs2->at(j);
+
+ float max = IMPOSSIBLE;
+ int traceI = 0;
+ int traceJ = 0;
+ int alpha = sc1.GetContPos(), beta = sc2.GetContPos();
+ if( (alpha > 0) && (beta > 0) ) {
+ max = VM[alpha][beta] + incrementalScorePSCPair(sc1, sc2);
+ traceI = alpha;
+ traceJ = beta;
+ }
+
+ float similarity = scorePSCPair(sc1, sc2);
+ int p = sc1.GetBeforePos(), q = sc2.GetBeforePos();
+ float tmpScore = VM[p][q] + similarity;
+// cout << i << " " << j << " " << p << " " << q << " " << tmpScore << " " << VM[p][q] << " " << similarity << " " << endl;
+
+ if(max <= tmpScore) {
+ max = tmpScore;
+ traceI = p;
+ traceJ = q;
+ }
+
+ tmpScore = VG[p][q] + similarity;
+ if(max <= tmpScore) {
+ max = tmpScore;
+ traceI = traceGI[p][q];
+ traceJ = traceGJ[p][q];
+ }
+
+ VM[i][j] = max;
+ traceMI[i][j] = traceI;
+ traceMJ[i][j] = traceJ;
+
+ max = VM[i][j-1];
+ traceI = i;
+ traceJ = j-1;
+
+ tmpScore = VM[i-1][j];
+ if(max <= tmpScore) {
+ max = tmpScore;
+ traceI = i-1;
+ traceJ = j;
+ }
+
+ tmpScore = VG[i-1][j];
+ if(max <= tmpScore) {
+ max = tmpScore;
+ traceI = traceGI[i-1][j];
+ traceJ = traceGJ[i-1][j];
+ }
+
+ tmpScore = VG[i][j-1];
+ if(max <= tmpScore) {
+ max = tmpScore;
+ traceI = traceGI[i][j-1];
+ traceJ = traceGJ[i][j-1];
+ }
+
+ VG[i][j] = max;
+ traceGI[i][j] = traceI;
+ traceGJ[i][j] = traceJ;
+ }
+ }
+}
+
+float
+Globaldp::
+traceBack()
+{
+ int nPscs1 = pscs1->size();
+ int nPscs2 = pscs2->size();
+
+ float max = IMPOSSIBLE;
+ int traceI, traceJ;
+ for (int i = 1; i < nPscs1; i++) {
+ for (int j = 1; j < nPscs2; j++) {
+ if(max < VM[i][j]) {
+ max = VM[i][j];
+ traceI = i;
+ traceJ = j;
+ }
+ }
+ }
+
+ while ( (traceI != 0) && (traceJ != 0) ) {
+ matchPSCS1->push_back(traceI); matchPSCS2->push_back(traceJ);
+ int tmpI = traceMI[traceI][traceJ];
+ int tmpJ = traceMJ[traceI][traceJ];
+ traceI = tmpI; traceJ = tmpJ;
+ }
+
+ if(traceI != 0 && traceJ != 0) {
+ std::cout << "error in trace back of pscs:" << traceI << " " << traceJ << endl;
+ }
+
+ return max;
+}
+
+void
+Globaldp::
+printMatch()
+{
+ int size = matchPSCS1->size();
+ for(int iter = 0; iter < size; iter++) {
+ int i = matchPSCS1->at(iter);
+ int j = matchPSCS2->at(iter);
+
+ const StemCandidate &sc1 = pscs1->at(i);
+ const StemCandidate &sc2 = pscs2->at(j);
+
+ std::cout << iter << "\t" << sc1.GetPosition() << "\t" << sc1.GetRvposition() << "\t" << sc2.GetPosition() << "\t" << sc2.GetRvposition() << endl;
+ }
+
+}
+}