new mafft v 6.857 with extensions
[jabaws.git] / binaries / src / mafft / extensions / mxscarna_src / seq2scs.cpp
1 ///////////////////////////////////////////////////////////////
2 // seq2scs.cpp
3 //
4 // make SCS(Stem Candidate Sequence) from the profile
5 //////////////////////////////////////////////////////////////
6
7 #include "scarna.hpp"
8 #include "SafeVector.h"
9 #include "StemCandidate.hpp"
10 #include "Sequence.h"
11 #include "MultiSequence.h"
12 #include "BPPMatrix.hpp"
13 #include "nrutil.h"
14 #include <vector>
15 #include <algorithm>
16 #include <stdio.h>
17 #include <cstring>
18 #include <stdio.h>
19 #include <math.h>
20
21 using namespace std;
22 using namespace::MXSCARNA;
23
24 // for alipfold
25 /*
26 #include "utils.h"
27 #include "fold_vars.h"
28 #include "fold.h"
29 #include "part_func.h"
30 #include "inverse.h"
31 #include "RNAstruct.h"
32 #include "treedist.h"
33 #include "stringdist.h"
34 #include "profiledist.h"
35 #include "alifold.h"
36 #include "aln_util.h"
37 #include "dist_vars.h"
38 */
39 double Stacking_Energy[36] ={
40  -0.9,-2.1,-1.7,-0.5,-0.9,-1.0,
41  -1.8,-2.9,-2.0,-1.2,-1.7,-1.9,
42  -2.3,-3.4,-2.9,-1.4,-2.1,-2.1,
43  -1.1,-2.1,-1.9,-0.4,-1.0,1.5,
44  -1.1,-2.3,-1.8,-0.8,-0.9,-1.1,
45  -0.8,-1.4,-1.2,-0.2,-0.5,-0.4 };
46
47 static Trimat<float>* makeProfileBPPMatrix(MultiSequence *Sequences, SafeVector<BPPMatrix*> &BPPMatrices);
48 static int countSCS(MultiSequence *Sequences, Trimat<float>* consBppMat, int BandWidth);
49 static std::vector<StemCandidate>* makeProfileScs(std::vector<StemCandidate> *pscs, MultiSequence *Sequences, Trimat<float>* consBppMat, int BandWidth);
50 static void printScs(std::vector<StemCandidate> *pscs);
51 static std::vector<StemCandidate>* doubleScs(std::vector<StemCandidate> *pscs);
52 static std::vector<StemCandidate>* findRelations(std::vector<StemCandidate> *pscs);
53 static std::vector<StemCandidate>* findCorresponding(std::vector<StemCandidate>* pscs);
54 static std::vector<StemCandidate>* calculateStackingEnergy(std::vector<StemCandidate>* pscs);
55
56 //float alipf_fold(char **sequences, char *structure, pair_info **pi);
57
58 struct SortCmp {
59     bool operator()(const StemCandidate &sc1, const StemCandidate &sc2) const {
60         if      (sc1.GetPosition() > sc2.GetPosition()) return false;
61         else if (sc1.GetPosition() < sc2.GetPosition()) return true;
62         else if (sc1.GetDistance() > sc2.GetDistance()) return false;
63         else return true;
64     }
65 };
66
67
68 vector<StemCandidate>*
69 seq2scs(MultiSequence *Sequences, SafeVector<BPPMatrix*> &BPPMatrices, int BandWidth)
70 {
71
72     Trimat<float> *consBppMat = makeProfileBPPMatrix(Sequences, BPPMatrices);
73
74     int numberScs = countSCS(Sequences, consBppMat, BandWidth);
75
76     std::vector<StemCandidate> *pscs = new std::vector<StemCandidate>(); // Profile Stem Candidate Sequence
77 //    cout << "numberScs=" << numberScs << endl;
78     pscs->resize(numberScs+1);
79
80     pscs = makeProfileScs(pscs, Sequences, consBppMat, BandWidth);
81
82     pscs = doubleScs(pscs);
83
84     std::vector<StemCandidate>::iterator startIter = pscs->begin();
85     std::vector<StemCandidate>::iterator endIter   = pscs->end();
86     ++startIter;
87     std::sort(startIter, endIter, SortCmp());
88     
89 //    printScs(pscs);
90     pscs = findRelations(pscs);
91
92     pscs = findCorresponding(pscs);
93
94     pscs = calculateStackingEnergy(pscs);
95
96 //    findStemRelation()
97     
98 //    exit(1);
99     delete consBppMat;
100
101     return pscs;
102 }
103
104 static Trimat<float>*
105 makeProfileBPPMatrix(MultiSequence *Sequences, SafeVector<BPPMatrix*> &BPPMatrices)
106 {
107     int length = Sequences->GetSequence(0)->GetLength();
108 //    float thr  = BaseProbThreshold;
109     Trimat<float> *consBppMat = new Trimat<float>(length + 1);
110     fill(consBppMat->begin(), consBppMat->end(), 0);
111
112 // gabage
113 //    for(int i = 0; i <= length; i++) 
114 //      for(int j = i; j <= length; j++) 
115 //          cout << "i=" << i << " j=" << j << " " << consBppMat->ref(i,j) << endl;
116 //          consBppMat->ref(i,j) = 0;
117
118
119     int number = Sequences->GetNumSequences();
120 //    if( number == 1) {
121     for(int seqNum = 0; seqNum < number; seqNum++) {
122         SafeVector<int> *tmpMap = Sequences->GetSequence(seqNum)->GetMappingNumber();
123         int label = Sequences->GetSequence(seqNum)->GetLabel();
124         BPPMatrix *tmpBppMatrix = BPPMatrices[label];
125             
126         for(int i = 1; i <= length ; i++) {
127             int originI = tmpMap->at(i);
128             for(int j = i + 3; j <= length; j++) {
129                 int originJ = tmpMap->at(j);
130                 if(originI != 0 && originJ != 0) {
131                     float tmpProb = tmpBppMatrix->GetProb(originI, originJ);
132
133 //                  if(tmpProb >= thr) {
134                         consBppMat->ref(i, j) += tmpProb;
135 //                      cout << i << " " << j << " " << consBppMat->ref(i,j) << endl;
136 //                  }
137                 }
138             }
139         }
140     }
141
142         /* compute the mean of base pairing probability  */
143     for(int i = 1; i <= length; i++) {
144         for(int j = i + 3; j <= length; j++) {
145             consBppMat->ref(i,j) = consBppMat->ref(i,j)/(float)number;
146             //consBppMat->ref(i,j) = std::sqrt[number](consBppMat->ref(i,j));
147             //      cout << i << " " << j <<  " " << consBppMat->ref(i,j) << endl;
148         }
149     }
150
151
152 /*
153     else {
154         char **Seqs;
155         Seqs = (char **) malloc(sizeof(char*) * number);
156
157         for(int i = 0; i < number; i++) {
158             Seqs[i] = (char *) malloc(sizeof(char) * (length + 1));
159             for(int j = 1; j <= length; j++) {
160                 Seqs[i][j-1] = Sequences->GetSequence(i)->GetPosition(j);
161             }
162             Seqs[i][length] = '\0';
163         }
164
165         
166         char *structure = NULL;
167         pair_info *pi;
168             
169         alipf_fold(Seqs, structure, &pi, number);
170
171         for(int iter = 0; iter < length; iter++) {
172             if(pi[iter].i == 0) break;
173             consBppMat->ref(pi[iter].i, pi[iter].j) = pi[iter].p;
174         }
175
176         for(int i = 0; i < number; i++) {
177             free (Seqs[i]);
178         }
179         free (Seqs);
180         
181 //      free_alifold_arrays(void);
182     }
183 */  
184
185     return consBppMat;
186 }
187  
188 static int 
189 countSCS(MultiSequence *Sequences, Trimat<float> *consBppMat, int BandWidth)
190 {
191
192     int length = Sequences->GetSequence(0)->GetLength();
193     int Word_Length = scsLength;
194
195     int i, j, k, s;
196     int count;
197     int sum;
198
199     sum = 0;
200     for(k = 1; k <= length; k++) {
201         count = 0;
202
203         for(i = k, j = k; i >= 1 && j <= (k + BandWidth - 1) && j <= length; i--, j++) {
204             if(consBppMat->ref(i, j) >= BaseProbThreshold) {
205                 count++;
206             }
207             else if(count >= Word_Length) {
208                 for(s = 0; s <= count - Word_Length; s++) 
209                   sum++;
210
211                 count = 0;
212             }
213             else {
214                 count = 0;
215             }
216             if(consBppMat->ref(i, j) >= BaseProbThreshold && count >= Word_Length && (i == 1 || j == length || j == (k + BandWidth - 1))) {
217                 for(s = 0; s <= count - Word_Length; s++) 
218                     sum++;
219
220                 count = 0;
221             }
222         }
223     
224         count = 0;
225         for(i = k, j = k + 1; i >= 1 && j <= (k + BandWidth - 1) && j <= length; i--, j++) {
226             if(consBppMat->ref(i, j) >= BaseProbThreshold) {
227                 count++;
228             }
229             else if(count >= Word_Length) {
230                 for(s = 0; s <= count - Word_Length; s++) sum++;
231
232                 count = 0;
233             }
234             else {
235                 count = 0;
236             }
237
238             if(consBppMat->ref(i, j) >= BaseProbThreshold && count >= Word_Length && (i == 1 || j == length || j == (k + BandWidth - 1))) {
239                 for(s = 0; s <= count - Word_Length; s++) sum++;
240
241                 count = 0;
242             }
243         }
244     }
245
246     return 2 * sum;
247 }
248
249 static std::vector<StemCandidate>* 
250 makeProfileScs(std::vector<StemCandidate> *pscs, MultiSequence *Sequences, Trimat<float>* consBppMat, int BandWidth)
251 {
252
253     int length = Sequences->GetSequence(0)->GetLength();
254     int Word_Length = scsLength;
255     float Thr = BaseProbThreshold;
256     int i, j, k, s, t, m, n, l;
257     int count;
258     int sum;
259     
260     sum = 0;
261     for(k = 1; k <= length; k++) {
262         count = 0;
263         for(i = k, j = k; i >= 1 && j <= (k + BandWidth - 1) && j <= length; i--, j++) {
264             if(consBppMat->ref(i,j) >= Thr) {
265                 count++;
266             }
267             else if(count >= Word_Length) {
268                 for(s = 0; s <= count- Word_Length; s++) {
269                     sum++;
270                     pscs->at(sum).SetLength(Word_Length);
271                     pscs->at(sum).SetPosition(i+1+s);
272                     pscs->at(sum).SetRvposition(j-count+(count-Word_Length-s));
273                     pscs->at(sum).SetDistance((j-count+count-Word_Length-s) - (i+1+s+Word_Length));
274                     pscs->at(sum).SetNumSeq(Sequences->GetNumSequences());
275                     pscs->at(sum).SetNumSubstr(Sequences->GetNumSequences());
276                     pscs->at(sum).SetNumRvstr(Sequences->GetNumSequences());
277                     for(m = i + 1 + s, n = j - count + (count-Word_Length-s), l = j - 1 - s, t = 0; n < j-s; m++, n++, l--, t++) {
278                         for(int num = 0; num < Sequences->GetNumSequences(); num++) {
279                             Sequence *seq = Sequences->GetSequence(num);
280 //                          cout << num << "; " << m << ":" << seq->GetPosition(m) << " " << n << ":" << seq->GetPosition(n) << endl;
281                             pscs->at(sum).AddSubstr(num, seq->GetPosition(m));
282                             pscs->at(sum).AddRvstr(num, seq->GetPosition(n));
283                         }
284                         //          assert(pr[iindx[m]-l] > Thr);
285 //                      cout << "prob=" << consBppMat->ref(m,l) << endl;
286                         pscs->at(sum).AddScore(consBppMat->ref(m,l));
287                         pscs->at(sum).AddBaseScore(consBppMat->ref(m, l));
288                     }
289                     for(int num = 0; num < Sequences->GetNumSequences(); num++) {
290                         pscs->at(sum).AddSubstr(num, '\0'); 
291                         pscs->at(sum).AddRvstr(num, '\0');
292                     }
293                 }
294                 count = 0;
295             }
296             else {
297                 count = 0;
298             }
299             if(consBppMat->ref(i,j) >= Thr && count >= Word_Length && (i == 1 || j == length || j == (k + BandWidth - 1))) {
300                 for(s = 0; s <= count- Word_Length; s++) {
301                     sum++;
302                     pscs->at(sum).SetLength(Word_Length);
303                     pscs->at(sum).SetPosition(i+s);
304                     pscs->at(sum).SetRvposition(j-count+1+(count-Word_Length-s));
305                     pscs->at(sum).SetDistance((j-count+1+count-Word_Length-s) - (i+s+Word_Length));
306                     pscs->at(sum).SetNumSeq(Sequences->GetNumSequences());
307                     pscs->at(sum).SetNumSubstr(Sequences->GetNumSequences());
308                     pscs->at(sum).SetNumRvstr(Sequences->GetNumSequences());
309                     for(m = i + s, n = j - count + 1 + (count-Word_Length-s), l = j - s, t = 0; n <= j-s; m++, n++, l--, t++) {
310                         for(int num = 0; num < Sequences->GetNumSequences(); num++) {
311                             Sequence *seq = Sequences->GetSequence(num);
312                             pscs->at(sum).AddSubstr(num, seq->GetPosition(m));
313                             pscs->at(sum).AddRvstr(num, seq->GetPosition(n));
314                         }
315
316                         pscs->at(sum).AddScore(consBppMat->ref(m,l));
317                         pscs->at(sum).AddBaseScore(consBppMat->ref(m, l));
318                     }
319                     for(int num = 0; num < Sequences->GetNumSequences(); num++) {
320                         pscs->at(sum).AddSubstr(num, '\0'); 
321                         pscs->at(sum).AddRvstr(num, '\0');
322                     }
323                 }
324                 count = 0;
325             }
326         }
327         count = 0;
328         for(i = k, j = k + 1; i >= 1 && j <= (k + BandWidth - 1) && j <= length; i--, j++) {
329             if(consBppMat->ref(i,j) >= Thr) {
330                 count++;
331             }
332             else if(count >= Word_Length) {
333                 for(s = 0; s <= count- Word_Length; s++) {
334                     sum++;
335                     pscs->at(sum).SetLength(Word_Length);
336                     pscs->at(sum).SetPosition(i+1+s);
337                     pscs->at(sum).SetRvposition( j-count+(count-Word_Length-s));
338                     pscs->at(sum).SetDistance((j-count+count-Word_Length-s) - (i+1+s+Word_Length));
339                     pscs->at(sum).SetNumSeq(Sequences->GetNumSequences());
340                     pscs->at(sum).SetNumSubstr(Sequences->GetNumSequences());
341                     pscs->at(sum).SetNumRvstr(Sequences->GetNumSequences());
342                     for(m = i + 1 + s, n = j - count + (count-Word_Length-s), l = j - 1 - s, t = 0; n < j-s; m++, n++, l--, t++) {
343                         for(int num = 0; num < Sequences->GetNumSequences(); num++) {
344                             Sequence *seq = Sequences->GetSequence(num);
345                             pscs->at(sum).AddSubstr(num, seq->GetPosition(m));
346                             pscs->at(sum).AddRvstr(num, seq->GetPosition(n));
347                         }
348
349                         pscs->at(sum).AddScore(consBppMat->ref(m,l));
350                         pscs->at(sum).AddBaseScore(consBppMat->ref(m, l));
351                     }
352                     for(int num = 0; num < Sequences->GetNumSequences(); num++) {
353                         pscs->at(sum).AddSubstr(num, '\0'); 
354                         pscs->at(sum).AddRvstr(num, '\0');
355                     }
356                 }
357                 count = 0;
358             }
359             else {
360                 count = 0;
361             }
362             if(consBppMat->ref(i,j) >= Thr && count >= Word_Length && (i == 1 || j == length || j == (k + BandWidth - 1))) {
363                 for(s = 0; s <= count - Word_Length; s++) {
364                     sum++;
365                     pscs->at(sum).SetLength(Word_Length);
366                     pscs->at(sum).SetPosition(i+s);
367                     pscs->at(sum).SetRvposition(j-count+1+(count-Word_Length-s));
368                     pscs->at(sum).SetDistance((j-count+1+count-Word_Length-s) - (i+s+Word_Length));
369 //                  pscs->at(sum).SetDistance((j-count+count-Word_Length-s) - (i+1+s+Word_Length));
370                     pscs->at(sum).SetNumSeq(Sequences->GetNumSequences());
371                     pscs->at(sum).SetNumSubstr(Sequences->GetNumSequences());
372                     pscs->at(sum).SetNumRvstr(Sequences->GetNumSequences());
373                     for(m = i + s, n = j - count + 1 + (count-Word_Length-s), l = j - s, t=0; n <= j-s; m++, n++, l--, t++) {
374                         for(int num = 0; num < Sequences->GetNumSequences(); num++) {
375                             Sequence *seq = Sequences->GetSequence(num);
376                             pscs->at(sum).AddSubstr(num, seq->GetPosition(m));
377                             pscs->at(sum).AddRvstr(num, seq->GetPosition(n));
378                         }
379
380                         pscs->at(sum).AddScore(consBppMat->ref(m,l));
381                         pscs->at(sum).AddBaseScore(consBppMat->ref(m, l));
382                     }
383                     for(int num = 0; num < Sequences->GetNumSequences(); num++) {
384                         pscs->at(sum).AddSubstr(num, '\0'); 
385                         pscs->at(sum).AddRvstr(num, '\0');
386                     }
387                 }
388                 count = 0;
389             }
390         }
391     }
392
393     return pscs;
394 }
395
396 static std::vector<StemCandidate>* 
397 doubleScs(std::vector<StemCandidate> *pscs)
398 {
399     int num = pscs->size()/2;
400     
401     for(int i = 1; i <= num; i++) {
402         int latter = num + i;
403         //cout << i << " " << latter << endl;
404         StemCandidate &tmpScs = pscs->at(i);
405         pscs->at(latter).SetLength(tmpScs.GetLength());
406         pscs->at(latter).SetPosition(tmpScs.GetRvposition());
407         pscs->at(latter).SetRvposition(tmpScs.GetPosition());
408         pscs->at(latter).SetDistance(-tmpScs.GetDistance());
409         pscs->at(latter).SetNumSeq(tmpScs.GetNumSeq());
410         pscs->at(latter).SetNumSubstr(tmpScs.GetNumSeq());
411         pscs->at(latter).SetNumRvstr(tmpScs.GetNumSeq());
412
413         pscs->at(latter).SetScore(tmpScs.GetScore());
414         for(int num = 0; num < tmpScs.GetNumSeq(); num++) {
415             string tmpSubstr = tmpScs.GetSubstr(num);
416             string tmpRvstr  = tmpScs.GetRvstr(num);
417
418             for(int k = 0; k < tmpScs.GetLength(); k++) {
419                 pscs->at(latter).AddSubstr(num, tmpSubstr[k]);
420                 pscs->at(latter).AddRvstr(num, tmpRvstr[k]);
421             }
422         }
423         for(int k = 0; k < tmpScs.GetLength(); k++) {
424             pscs->at(latter).AddBaseScore(tmpScs.GetBaseScore(k));
425         }
426     }
427     return pscs;
428 }
429
430
431 static void 
432 printScs(std::vector<StemCandidate> *pscs)
433 {
434     int num = pscs->size();
435 //    std::cout << "size = " << num << endl;
436     for(int i = 1; i < num; i++) {
437         StemCandidate &sc = pscs->at(i);
438         
439         std::cout << i << "\t" << sc.GetLength() << "\t" << sc.GetPosition() << "\t" << 
440             sc.GetRvposition() << "\t" << sc.GetDistance() << "\t" << sc.GetNumSeq() << 
441             "\t" << sc.GetScore() << "\t" << sc.GetContPos() << "\t" << sc.GetBeforePos() << 
442             "\t" << sc.GetRvscnumber() << "\t" << sc.GetStacking() << "\t" << sc.GetStemStacking() << 
443             "\t" << sc.GetBaseScore(0) << "\t" << sc.GetBaseScore(1) << endl;
444         cout << "substr:" << endl;
445         for(int k = 0; k < sc.GetNumSeq(); k++) {
446             cout << k << "\t" << sc.GetSubstr(k) << "\t" << sc.GetRvstr(k) << "\t" << endl;
447         }
448         
449     }
450
451 }
452
453 static std::vector<StemCandidate>*
454 findRelations(std::vector<StemCandidate> *pscs)
455 {
456     int num = pscs->size();
457     
458     for(int i = 1; i < num; i++) {
459         int pt = i-1; 
460         StemCandidate &sc = pscs->at(i);
461         sc.SetContPos(-1);
462         while(sc.GetPosition() == pscs->at(pt).GetPosition()) { pt--; }
463         
464         while((sc.GetPosition() == pscs->at(pt).GetPosition() + 1) && (pt > 0)) {
465             if(sc.GetRvposition() == pscs->at(pt).GetRvposition() - 1) {
466                 sc.SetContPos(pt);
467                 break;
468             }
469             --pt;
470         }
471         while((sc.GetPosition() < pscs->at(pt).GetPosition() + pscs->at(pt).GetLength())&&(pt > 0)) { pt--; }
472         sc.SetBeforePos(pt);
473     }
474     
475     return pscs;
476 }
477
478 static std::vector<StemCandidate>*
479 findCorresponding(std::vector<StemCandidate>* pscs)
480 {
481     int num = pscs->size();
482     
483     for(int i = 1; i < num; i++) { pscs->at(i).SetRvscnumber(0); }
484     
485     for(int i = 1; i < num; i++) {
486         if(pscs->at(i).GetDistance() > 0) {
487             for(int j = i + 1; j < num; j++) {
488                 if ( (pscs->at(j).GetPosition() == pscs->at(i).GetRvposition())
489                      && (pscs->at(i).GetPosition() == pscs->at(j).GetRvposition()) ) {
490                     pscs->at(i).SetRvscnumber(j);
491                     pscs->at(j).SetRvscnumber(i);
492                     break;
493                 }
494             }
495         }
496         if(pscs->at(i).GetRvscnumber() == 0) {
497             std::cerr << "error in findCorresponding" << " i=" << i << endl;
498 //          exit(1);
499         }
500     }
501
502     return pscs;
503 }
504
505 static std::vector<StemCandidate>*
506 calculateStackingEnergy(std::vector<StemCandidate>* pscs)
507 {
508     int num = pscs->size();
509     int wordLength = scsLength;
510
511     for(int i = 1; i < num; i++) {
512         StemCandidate &scI = pscs->at(i);
513         int j = pscs->at(i).GetContPos();
514         
515         if(j > 0) {
516
517             StemCandidate &scJ = pscs->at(j);
518             float stacking = 0;
519             int profNum =  scJ.GetNumSeq();
520             for(int k = 0; k < profNum; k++) {
521                 string substr = scJ.GetSubstr(k);
522                 string rvstr  = scJ.GetRvstr(k);
523                 int index = 0;
524                 switch(substr[wordLength - 1]) {
525                         case 'A': if(     rvstr[0]=='U' ) {index += 0;}
526                                   else{ index = -1000; }
527                                   break;
528                         case 'C': if(     rvstr[0]=='G' ) {index += 6;}
529                                   else{ index = -1000; }
530                                   break;
531                         case 'G': if(     rvstr[0]=='C'){index += 12;}
532                                   else if(rvstr[0]=='U'){index += 18;}
533                                   else{ index = -1000; }
534                                   break;
535                         case 'U': if(     rvstr[0]=='A'){index += 24;}
536                                   else if(rvstr[0]=='G'){index += 30;}
537                                   else{ index = - 1000; }
538                                   break;
539                 }
540                 substr = scI.GetSubstr(k);
541                 rvstr  = scI.GetRvstr(k);
542                 switch(substr[wordLength - 1]){
543                         case 'A': if(     rvstr[0]=='U'){index += 0;}
544                                   else{ index = -1000; }
545                                   break;
546                         case 'C': if(     rvstr[0]=='G'){index += 1;}
547                                   else{ index = -1000; }
548                                   break;
549                         case 'G': if(     rvstr[0]=='C'){index += 2;}
550                                   else if(rvstr[0]=='U'){index += 3;}
551                                   else{ index = -1000; }
552                                   break;
553                         case 'U': if(     rvstr[0]=='A'){index += 4;}
554                                   else if(rvstr[0]=='G'){index += 5;}
555                                   else{ index = -1000; }
556                                   break;
557                 }
558                 if(index > 0) {
559                     stacking += Stacking_Energy[index];
560                 }
561             }
562             scI.SetStacking(stacking/(float)profNum);
563         }
564         else {
565             scI.SetStacking(1000);
566         }
567     }
568
569     for(int i = 1; i < num; i++) {
570         StemCandidate &sc = pscs->at(i);
571         float stemStacking = 0;
572         int profNum = sc.GetNumSeq();
573         for(int k = 0; k < profNum; k++) {
574             string substr = sc.GetSubstr(k);
575             string rvstr  = sc.GetRvstr(k);
576             for(int j = 0; j < wordLength-1; j++) {
577                 int index = 0;
578
579                 switch(substr[j]) {
580                     case 'A': if(   rvstr[wordLength-1-j]=='U'){index += 0;}
581                               else{ index = -1000; }
582                               break;
583                     case 'C': if(   rvstr[wordLength-1-j]=='G'){index += 6;}
584                               else{ index = -1000; }
585                               break;
586                     case 'G': if(   rvstr[wordLength-1-j]=='C'){index += 12;}
587                               else if(rvstr[wordLength-1-j]=='U'){index += 18;}
588                               else{ index = -1000; }
589                               break;
590                     case 'U': if(   rvstr[wordLength-1-j]=='A'){index += 24;}
591                               else if(rvstr[wordLength-1-j]=='G'){index += 30;}
592                               else{ index = -1000; }
593                               break;
594                 }
595                 switch(substr[j+1]){
596                     case 'A': if(   rvstr[wordLength-1-(j+1)]=='U'){index += 0;}
597                               else{ index = -1000; }
598                               break;
599                     case 'C': if(   rvstr[wordLength-1-(j+1)]=='G'){index += 1;}
600                               else{ index = -1000; }
601                               break;
602                     case 'G': if(     rvstr[wordLength-1-(j+1)]=='C'){index += 2;}
603                               else if(rvstr[wordLength-1-(j+1)]=='U'){index += 3;}
604                               else{ index = -1000; }
605                               break;
606                     case 'U': if(     rvstr[wordLength-1-(j+1)]=='A'){index += 4;}
607                               else if(rvstr[wordLength-1-(j+1)]=='G'){index += 5;}
608                               else{ index = -1000; }
609                               break;
610                 }
611                 if(index > 0) {
612                     stemStacking += Stacking_Energy[index];
613                 }
614             }
615             sc.SetStemStacking(stemStacking/(float)profNum);
616         }
617     }
618     return pscs;
619 }
620