4 #define CANONICALTREEFORMAT 1
10 int seqlen( char *seq )
14 if( *seq++ != '-' ) val++;
18 int seqlen( char *seq )
21 if( *newgapstr == '-' )
24 if( *seq++ != '-' ) val++;
30 if( *seq != '-' && *seq != *newgapstr ) val++;
38 int intlen( int *num )
41 while( *num++ != -1 ) value++;
45 char seqcheck( char **seq )
52 for( i=0; i<len; i++ )
54 if( amino_n[(int)(*seq)[i]] == -1 )
57 reporterr( "========================================================================= \n" );
58 reporterr( "========================================================================= \n" );
59 reporterr( "=== \n" );
60 reporterr( "=== Alphabet '%c' is unknown.\n", (*seq)[i] );
61 reporterr( "=== Please check site %d in sequence %d.\n", i+1, (int)(seq-seqbk+1) );
62 reporterr( "=== \n" );
63 reporterr( "=== To make an alignment having unusual characters (U, @, #, etc), try\n" );
64 reporterr( "=== %% mafft --anysymbol input > output\n" );
65 reporterr( "=== \n" );
66 reporterr( "========================================================================= \n" );
67 reporterr( "========================================================================= \n" );
68 return( (int)(*seq)[i] );
76 void intcat( int *s1, int *s2 )
78 while( *s1 != -1 ) s1++;
81 // reporterr( "copying %d\n", *s2 );
87 void intcpy( int *s1, int *s2 )
91 // reporterr( "copying %d\n", *s2 );
97 void intncpy( int *s1, int *s2, int n )
99 while( n-- ) *s1++ = *s2++;
102 void fltncpy( double *s1, double *s2, int n )
104 while( n-- ) *s1++ = *s2++;
107 static int countmem( int *s )
110 while( *s++ != -1 ) v++;
114 static int lastmem( int *s )
122 void scmx_calc( int icyc, char **aseq, double *effarr, double **scmx )
126 lgth = strlen( aseq[0] );
127 for( j=0; j<lgth; j++ )
129 for( i=0; i<nalphabets; i++ )
134 for( i=0; i<icyc+1; i++ )
137 id = amino_n[(unsigned char)aseq[i][0]];
138 scmx[id][0] += (double)effarr[i];
140 for( j=1; j<lgth-1; j++ )
142 for( i=0; i<icyc+1; i++ )
145 id = amino_n[(unsigned char)aseq[i][j]];
146 scmx[id][j] += (double)effarr[i];
149 for( i=0; i<icyc+1; i++ )
152 id = amino_n[(unsigned char)aseq[i][lgth-1]];
153 scmx[id][lgth-1] += (double)effarr[i];
157 void exitall( char arr[] )
159 reporterr( "%s\n", arr );
163 void display( char **seq, int nseq )
169 if( nseq > DISPSEQF ) imax = DISPSEQF;
171 reporterr( " ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" );
172 for( i=0; i<+imax; i++ )
174 strncpy( b, seq[i]+DISPSITEI, 120 );
176 reporterr( "%3d %s\n", i+1, b );
180 void intergroup_score_consweight( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
184 unsigned char ms1, ms2;
189 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
190 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
195 for( i=0; i<clus1; i++ )
197 for( j=0; j<clus2; j++ )
199 efficient = eff1[i] * eff2[j]; /*
\e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k
\e(B,
\e$BB?J,%P%0
\e(B */
203 for( k=0; k<len; k++ )
205 ms1 = (unsigned char)mseq1[k];
206 ms2 = (unsigned char)mseq2[k];
207 if( ms1 == '-' && ms2 == '-' ) continue;
208 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
212 tmpscore += (double)penalty;
213 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
214 while( (ms1=(unsigned char)mseq1[++k]) == '-' )
216 // tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
223 tmpscore += (double)penalty;
224 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
225 while( (ms2=(unsigned char)mseq2[++k]) == '-' )
227 // tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
229 if( k > len2 ) break;
233 *value += (double)tmpscore * (double)efficient;
234 // reporterr( "val in _gapnomi = %f\n", *value );
238 fprintf( stdout, "###score = %f\n", score );
241 reporterr( "score in intergroup_score = %f\n", score );
245 void intergroup_score_gapnomi( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
254 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
255 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
260 for( i=0; i<clus1; i++ )
262 for( j=0; j<clus2; j++ )
264 efficient = eff1[i] * eff2[j]; /*
\e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k
\e(B,
\e$BB?J,%P%0
\e(B */
268 for( k=0; k<len; k++ )
272 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
273 // tmpscore += (double)amino_dis[ms1][ms2];
275 if( ms1 == (int)'-' )
277 tmpscore += (double)penalty;
278 // tmpscore += (double)amino_dis[ms1][ms2];
279 while( (ms1=(int)mseq1[++k]) == (int)'-' )
281 // tmpscore += (double)amino_dis[ms1][ms2];
286 if( ms2 == (int)'-' )
288 tmpscore += (double)penalty;
289 // tmpscore += (double)amino_dis[ms1][ms2];
290 while( (ms2=(int)mseq2[++k]) == (int)'-' )
292 // tmpscore += (double)amino_dis[ms1][ms2];
294 if( k > len2 ) break;
298 *value += (double)tmpscore * (double)efficient;
299 // reporterr( "val in _gapnomi = %f\n", *value );
303 fprintf( stdout, "###score = %f\n", score );
306 reporterr( "score in intergroup_score = %f\n", score );
311 void intergroup_score_multimtx( int **whichmtx, double ***scoringmatrices, char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
319 int gapnum = amino_n['-'];
322 double gapscore = 0.0;
324 // reporterr( "#### in intergroup_score\n" );
326 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
327 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
329 // reporterr( "\n intergroup_score_multimtx ..." );
331 for( i=0; i<clus1; i++ )
333 for( j=0; j<clus2; j++ )
335 efficient = eff1[i] * eff2[j]; /*
\e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k
\e(B,
\e$BB?J,%P%0
\e(B */
341 for( k=0; k<len; k++ )
343 mn1 = amino_n[(unsigned char)(mseq1[k])];
344 mn2 = amino_n[(unsigned char)(mseq2[k])];
345 if( mn1 == gapnum && mn2 == gapnum ) continue;
346 tmpscore += (double)scoringmatrices[c][mn1][mn2];
347 // tmpscore += (double)scoringmtx[mn1][mn2];
351 tmpscore += (double)penalty;
352 gaptmpscore += (double)penalty;
353 // tmpscore += (double)scoringmtx[mn1][mn2];
354 tmpscore += (double)scoringmatrices[c][mn1][mn2];
355 while( (mn1=amino_n[(unsigned char)mseq1[++k]]) == gapnum )
356 tmpscore += (double)scoringmatrices[c][mn1][mn2];
357 // tmpscore += (double)scoringmtx[mn1][mn2];
364 tmpscore += (double)penalty;
365 gaptmpscore += (double)penalty;
366 tmpscore += (double)scoringmatrices[c][mn1][mn2];
367 // tmpscore += (double)scoringmtx[mn1][mn2];
368 while( (mn2=amino_n[(unsigned char)mseq2[++k]]) == gapnum )
369 tmpscore += (double)scoringmatrices[c][mn1][mn2];
370 // tmpscore += (double)scoringmtx[mn1][mn2];
372 if( k > len2 ) break;
376 *value += (double)tmpscore * (double)efficient;
377 gapscore += (double)gaptmpscore * (double)efficient;
380 // reporterr( "done." );
382 reporterr( "###gapscore = %f\n", gapscore );
385 reporterr( "score in intergroup_score = %f\n", score );
389 void intergroup_score_dynmtx( double **offsetmtx, int scoringmtx[0x80][0x80], char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
399 double gapscore = 0.0;
401 // reporterr( "#### in intergroup_score\n" );
403 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
404 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
406 reporterr( "\n intergroup_score_dynmtx ..." );
408 for( i=0; i<clus1; i++ )
410 for( j=0; j<clus2; j++ )
412 efficient = eff1[i] * eff2[j]; /*
\e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k
\e(B,
\e$BB?J,%P%0
\e(B */
417 for( k=0; k<len; k++ )
421 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
422 tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600;
423 // tmpscore += (double)scoringmtx[ms1][ms2];
425 if( ms1 == (int)'-' )
427 tmpscore += (double)penalty;
428 gaptmpscore += (double)penalty;
429 // tmpscore += (double)scoringmtx[ms1][ms2];
430 tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600;;
431 while( (ms1=(int)mseq1[++k]) == (int)'-' )
432 tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600;
433 // tmpscore += (double)scoringmtx[ms1][ms2];
438 if( ms2 == (int)'-' )
440 tmpscore += (double)penalty;
441 gaptmpscore += (double)penalty;
442 tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600;
443 // tmpscore += (double)scoringmtx[ms1][ms2];
444 while( (ms2=(int)mseq2[++k]) == (int)'-' )
445 tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600;
446 // tmpscore += (double)scoringmtx[ms1][ms2];
448 if( k > len2 ) break;
452 *value += (double)tmpscore * (double)efficient;
453 gapscore += (double)gaptmpscore * (double)efficient;
456 reporterr( "done." );
458 reporterr( "###gapscore = %f\n", gapscore );
461 reporterr( "score in intergroup_score = %f\n", score );
465 void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
469 unsigned char ms1, ms2;
475 double gapscore = 0.0;
477 // reporterr( "#### in intergroup_score\n" );
479 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
480 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
483 for( i=0; i<clus1; i++ )
485 for( j=0; j<clus2; j++ )
487 efficient = eff1[i] * eff2[j]; /*
\e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k
\e(B,
\e$BB?J,%P%0
\e(B */
492 for( k=0; k<len; k++ )
494 ms1 = (unsigned char)mseq1[k];
495 ms2 = (unsigned char)mseq2[k];
496 if( ms1 == '-' && ms2 == '-' ) continue;
497 // tmpscore += (double)amino_dis[ms1][ms2];
498 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
502 tmpscore += (double)penalty;
503 gaptmpscore += (double)penalty;
504 // tmpscore += (double)amino_dis[ms1][ms2];
505 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
506 while( (ms1=(unsigned char)mseq1[++k]) == '-' )
507 // tmpscore += (double)amino_dis[ms1][ms2];
508 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
515 tmpscore += (double)penalty;
516 gaptmpscore += (double)penalty;
517 // tmpscore += (double)amino_dis[ms1][ms2];
518 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
519 while( (ms2=(unsigned char)mseq2[++k]) == '-' )
520 // tmpscore += (double)amino_dis[ms1][ms2];
521 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
523 if( k > len2 ) break;
527 *value += (double)tmpscore * (double)efficient;
528 gapscore += (double)gaptmpscore * (double)efficient;
532 reporterr( "###gapscore = %f\n", gapscore );
535 reporterr( "score in intergroup_score = %f\n", score );
540 double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */
544 int len = strlen( seq[0] );
559 if( i == ex ) continue;
560 efficient = eff[i][ex];
564 for( k=0; k<len; k++ )
566 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
567 tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
569 if( mseq1[k] == '-' )
572 while( mseq1[++k] == '-' )
573 tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
575 if( k > len-2 ) break;
578 if( mseq2[k] == '-' )
581 while( mseq2[++k] == '-' )
582 tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
584 if( k > len-2 ) break;
588 score += (double)tmpscore * efficient;
590 fprintf( stdout, "%d-%d tmpscore = %f, eff = %f, tmpscore*eff = %f\n", i, ex, tmpscore, efficient, tmpscore*efficient );
594 fprintf( stdout, "total score = %f\n", score );
597 for( i=0; i<s-1; i++ )
599 for( j=i+1; j<s; j++ )
601 if( i == ex || j == ex ) continue;
603 efficient = eff[i][j];
607 for( k=0; k<len; k++ )
609 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
610 tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
612 if( mseq1[k] == '-' )
615 while( mseq1[++k] == '-' )
616 tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
618 if( k > len-2 ) break;
621 if( mseq2[k] == '-' )
624 while( mseq2[++k] == '-' )
625 tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
627 if( k > len-2 ) break;
631 score += (double)tmpscore * efficient;
635 reporterr( "score in score_calc5 = %f\n", score );
637 return( (double)score );
640 fprintf( trap_g, "score by fast = %f\n", (double)score );
642 tmpscore = score = 0.0;
645 if( i == ex ) continue;
646 tmpscore = Cscore_m_1( seq, i, eff );
647 fprintf( stdout, "%d %f\n", i, tmpscore );
651 tmpscore = Cscore_m_1( seq, ex, eff );
652 fprintf( stdout, "ex%d %f\n", i, tmpscore );
661 double score_calc4( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */
665 int len = strlen( seq[0] );
674 printf( "in score_calc4\n" );
679 printf( "% 5.3f", eff[i][j] );
685 for( i=0; i<s-1; i++ )
687 for( j=i+1; j<s; j++ )
689 efficient = eff[i][j];
690 if( mix == 1 ) efficient = 1.0;
692 printf( "weight for %d v.s. %d = %f\n", i, j, efficient );
697 for( k=0; k<len; k++ )
699 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
700 tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]] + 400 * !scoremtx ;
704 if( mseq1[k] == '-' )
706 tmpscore += penalty - n_dis[24][0];
707 while( mseq1[++k] == '-' )
710 if( k > len-2 ) break;
713 if( mseq2[k] == '-' )
715 tmpscore += penalty - n_dis[24][0];
716 while( mseq2[++k] == '-' )
719 if( k > len-2 ) break;
724 if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len );
726 score += (double)tmpscore * efficient;
730 return( (double)score );
735 void upg2( int nseq, double **eff, int ***topol, double **len )
740 static char **pair = NULL;
744 pair = AllocateCharMtx( njob, njob );
747 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
748 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
749 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
751 for( k=0; k<nseq-1; k++ )
753 double minscore = 9999.0;
754 int im = -1, jm = -1;
757 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
759 if( eff[i][j] < minscore )
761 minscore = eff[i][j];
765 for( i=0, count=0; i<nseq; i++ )
766 if( pair[im][i] > 0 )
768 topol[k][0][count] = i;
771 topol[k][0][count] = -1;
772 for( i=0, count=0; i<nseq; i++ )
773 if( pair[jm][i] > 0 )
775 topol[k][1][count] = i;
778 topol[k][1][count] = -1;
780 len[k][0] = minscore / 2.0 - tmplen[im];
781 len[k][1] = minscore / 2.0 - tmplen[jm];
783 tmplen[im] = minscore / 2.0;
785 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
786 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
788 for( i=0; i<nseq; i++ )
790 if( i != im && i != jm )
792 eff[MIN(i,im)][MAX(i,im)] =
793 ( eff[MIN(i,im)][MAX(i,im)] + eff[MIN(i,jm)][MAX(i,jm)] ) / 2.0;
794 eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
796 eff[im][jm] = 9999.0;
799 printf( "STEP-%03d:\n", k+1 );
800 printf( "len0 = %f\n", len[k][0] );
801 for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
803 printf( "len1 = %f\n", len[k][1] );
804 for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
810 #define BLOCKSIZE 100
811 #define LARGEBLOCKSIZE 100
813 typedef struct _generaltdistarrthread_arg
828 #ifdef enablemultithread
829 pthread_mutex_t *mutex;
831 } generaldistarrthread_arg_t;
833 static void *generalkmerdistarrthread( void *arg ) // enablemultithread == 0 demo tsukau
835 generaldistarrthread_arg_t *targ = (generaldistarrthread_arg_t *)arg;
836 int njob = targ->njob;
837 int para = targ->para;
839 int *nlen = targ->nlen;
840 int **pointt = targ->pointt;
841 int *ttable = targ->ttable;
842 int *tselfscore = targ->tselfscore;
843 int *joblist = targ->joblist;
844 int *posshared = targ->posshared;
845 double *result = targ->result;
846 // double **partmtx = targ->partmtx;
847 int i, posinjoblist, n;
849 // for( acpti=ac; acpti!=NULL; acpti=acpti->next )
853 #ifdef enablemultithread
854 if( para ) pthread_mutex_lock( targ->mutex );
856 if( *posshared >= njob ) // block no toki >=
858 #ifdef enablemultithread
859 if( para ) pthread_mutex_unlock( targ->mutex );
861 commonsextet_p( NULL, NULL );
864 posinjoblist = *posshared;
865 *posshared += LARGEBLOCKSIZE;
866 #ifdef enablemultithread
867 if( para ) pthread_mutex_unlock( targ->mutex );
870 for( n=0; n<LARGEBLOCKSIZE&&posinjoblist<njob; n++ )
872 i = joblist[posinjoblist++];
874 // if( i == m ) continue; // iranai
876 result[i] = distcompact( nlen[m], nlen[i], ttable, pointt[i], tselfscore[m], tselfscore[i] );
882 static void *generalmsadistarrthread( void *arg ) // enablemultithread == 0 demo tsukau
884 generaldistarrthread_arg_t *targ = (generaldistarrthread_arg_t *)arg;
885 int njob = targ->njob;
886 int para = targ->para;
888 int *tselfscore = targ->tselfscore;
889 char **seq = targ->seq;
890 int **skiptable = targ->skiptable;
891 int *joblist = targ->joblist;
892 int *posshared = targ->posshared;
893 double *result = targ->result;
894 // double **partmtx = targ->partmtx;
895 int i, posinjoblist, n;
897 // for( acpti=ac; acpti!=NULL; acpti=acpti->next )
901 #ifdef enablemultithread
902 if( para ) pthread_mutex_lock( targ->mutex );
904 if( *posshared >= njob ) // block no toki >=
906 #ifdef enablemultithread
907 if( para ) pthread_mutex_unlock( targ->mutex );
911 posinjoblist = *posshared;
912 *posshared += LARGEBLOCKSIZE;
913 #ifdef enablemultithread
914 if( para ) pthread_mutex_unlock( targ->mutex );
917 for( n=0; n<LARGEBLOCKSIZE&&posinjoblist<njob; n++ )
919 i = joblist[posinjoblist++];
921 // if( i == m ) continue; // iranai
923 result[i] = distcompact_msa( seq[m], seq[i], skiptable[m], skiptable[i], tselfscore[m], tselfscore[i] );
930 static void kmerresetnearest( int nseq, Bchain *acpt, double **distfrompt, double *mindisfrompt, int *nearestpt, int pos, int *tselfscore, int **pointt, int *nlen, int *singlettable1, double *result, int *joblist )
945 // reporterr( "resetnearest..\r" );
946 // printf( "[%d], %f, dist=%d ->", pos, *distfrompt, *nearestpt );
948 // mindisfrom = 999.9;
952 // result = calloc( nseq, sizeof( double ) );
953 // joblist = calloc( nseq, sizeof( int ) );
956 for( acptj=(acpt+pos)->next,j=0; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru
959 // if( i == pos ) continue;
961 if( distfrompt[pos] )
963 tmpdouble = result[i] = distfrompt[pos][i];
964 if( tmpdouble < mindisfrom )
966 mindisfrom = tmpdouble;
970 else if( distfrompt[i] )
972 tmpdouble = result[i] = distfrompt[i][pos];
973 if( tmpdouble < mindisfrom )
975 mindisfrom = tmpdouble;
983 for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru
986 // if( i == pos ) continue;
988 if( distfrompt[pos] )
990 tmpdouble = result[i] = distfrompt[pos][i];
991 if( tmpdouble < mindisfrom )
993 mindisfrom = tmpdouble;
997 else if( distfrompt[i] )
999 tmpdouble = result[i] = distfrompt[i][pos];
1000 if( tmpdouble < mindisfrom )
1002 mindisfrom = tmpdouble;
1013 // reporterr( "resetting in parallel!! j=%d\n", j );
1016 generaldistarrthread_arg_t *targ;
1018 #ifdef enablemultithread
1022 pthread_mutex_t mutex;
1024 targ = calloc( nthread, sizeof( generaldistarrthread_arg_t ) );
1025 handle = calloc( nthread, sizeof( pthread_t ) );
1027 pthread_mutex_init( &mutex, NULL );
1028 for( i=0; i<nthread; i++ )
1033 targ[i].tselfscore = tselfscore;
1034 targ[i].nlen = nlen;
1035 targ[i].pointt = pointt;
1036 targ[i].ttable = singlettable1;
1037 targ[i].joblist = joblist;
1038 targ[i].result = result;
1039 targ[i].posshared = &posshared;
1040 targ[i].mutex = &mutex;
1042 pthread_create( handle+i, NULL, generalkmerdistarrthread, (void *)(targ+i) );
1045 for( j=0; j<nthread; j++ ) pthread_join( handle[j], NULL );
1046 pthread_mutex_destroy( &mutex );
1052 targ = calloc( 1, sizeof( generaldistarrthread_arg_t ) );
1058 targ[0].tselfscore = tselfscore;
1059 targ[0].nlen = nlen;
1060 targ[0].pointt = pointt;
1061 targ[0].ttable = singlettable1;
1062 targ[0].joblist = joblist;
1063 targ[0].result = result;
1064 targ[0].posshared = &posshared;
1066 generalkmerdistarrthread( targ );
1071 for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru
1074 tmpdouble = result[j];
1075 if( tmpdouble < mindisfrom )
1077 mindisfrom = tmpdouble;
1082 for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru
1085 tmpdouble = result[j];
1086 if( tmpdouble < mindisfrom )
1088 mindisfrom = tmpdouble;
1095 *mindisfrompt = mindisfrom;
1096 *nearestpt = nearest;
1103 static void kmerresetnearest( int nseq, Bchain *acpt, double **distfrompt, double *mindisfrompt, int *nearestpt, int pos, int *tselfscore, int **pointt, int *nlen, int *singlettable1, double *resultnotused, int *joblistnotused )
1109 // double **effptpt;
1116 // reporterr( "resetnearest..\r" );
1117 // printf( "[%d], %f, dist=%d ->", pos, *distfrompt, *nearestpt );
1119 // mindisfrom = 999.9;
1123 for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru
1127 if( distfrompt[pos] )
1128 tmpdouble=distfrompt[pos][j];
1129 else if( distfrompt[j] )
1130 tmpdouble=distfrompt[j][pos];
1132 // tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] );
1134 tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] );
1137 if( tmpdouble < mindisfrom )
1139 mindisfrom = tmpdouble;
1144 for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru
1148 if( distfrompt[pos] )
1149 tmpdouble=distfrompt[pos][j];
1150 else if( distfrompt[j] )
1151 tmpdouble=distfrompt[j][pos];
1153 // tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] );
1155 tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] );
1159 if( tmpdouble < mindisfrom )
1161 mindisfrom = tmpdouble;
1165 // printf( "mindisfrom = %f\n", mindisfrom );
1167 *mindisfrompt = mindisfrom;
1168 *nearestpt = nearest;
1173 static void msaresetnearest( int nseq, Bchain *acpt, double **distfrompt, double *mindisfrompt, int *nearestpt, int pos, char **seq, int **skiptable, int *tselfscore, double *result, int *joblist )
1179 // double **effptpt;
1188 // reporterr( "resetnearest..\r" );
1189 // printf( "[%d], %f, dist=%d ->", pos, *distfrompt, *nearestpt );
1191 // mindisfrom = 999.9;
1195 // result = calloc( nseq, sizeof( double ) );
1196 // joblist = calloc( nseq, sizeof( int ) );
1198 // for( acptj=acpt,j=0; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru
1199 for( acptj=(acpt+pos)->next,j=0; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru
1202 // if( i == pos ) continue;
1204 if( distfrompt[pos] )
1206 tmpdouble = result[i] = distfrompt[pos][i];
1207 if( tmpdouble < mindisfrom )
1209 mindisfrom = tmpdouble;
1213 else if( distfrompt[i] )
1215 tmpdouble = result[i] = distfrompt[i][pos];
1216 if( tmpdouble < mindisfrom )
1218 mindisfrom = tmpdouble;
1226 for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru
1229 // if( i == pos ) continue;
1231 if( distfrompt[pos] )
1233 tmpdouble = result[i] = distfrompt[pos][i];
1234 if( tmpdouble < mindisfrom )
1236 mindisfrom = tmpdouble;
1240 else if( distfrompt[i] )
1242 tmpdouble = result[i] = distfrompt[i][pos];
1243 if( tmpdouble < mindisfrom )
1245 mindisfrom = tmpdouble;
1256 // reporterr( "resetting in parallel!! j=%d\r", j );
1259 generaldistarrthread_arg_t *targ;
1262 #ifdef enablemultithread
1266 pthread_mutex_t mutex;
1267 targ = calloc( nthread, sizeof( generaldistarrthread_arg_t ) );
1268 handle = calloc( nthread, sizeof( pthread_t ) );
1269 pthread_mutex_init( &mutex, NULL );
1270 for( i=0; i<nthread; i++ )
1275 targ[i].tselfscore = tselfscore;
1277 targ[i].skiptable = skiptable;
1278 targ[i].joblist = joblist;
1279 targ[i].result = result;
1280 targ[i].posshared = &posshared;
1281 targ[i].mutex = &mutex;
1283 pthread_create( handle+i, NULL, generalmsadistarrthread, (void *)(targ+i) );
1285 for( j=0; j<nthread; j++ ) pthread_join( handle[j], NULL );
1286 pthread_mutex_destroy( &mutex );
1292 targ = calloc( 1, sizeof( generaldistarrthread_arg_t ) );
1297 targ[0].tselfscore = tselfscore;
1299 targ[0].skiptable = skiptable;
1300 targ[0].joblist = joblist;
1301 targ[0].result = result;
1302 targ[0].posshared = &posshared;
1304 generalmsadistarrthread( targ );
1309 for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru
1312 tmpdouble = result[j];
1313 if( tmpdouble < mindisfrom )
1315 mindisfrom = tmpdouble;
1320 for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru
1323 tmpdouble = result[j];
1324 if( tmpdouble < mindisfrom )
1326 mindisfrom = tmpdouble;
1333 // printf( "mindisfrom = %f\n", mindisfrom );
1335 *mindisfrompt = mindisfrom;
1336 *nearestpt = nearest;
1342 static void msaresetnearest( int nseq, Bchain *acpt, double **distfrompt, double *mindisfrompt, int *nearestpt, int pos, char **seq, int **skiptable, int *tselfscore, double *resultnotused, int *joblistnotused )
1348 // double **effptpt;
1355 // reporterr( "resetnearest..\r" );
1356 // printf( "[%d], %f, dist=%d ->", pos, *distfrompt, *nearestpt );
1358 // mindisfrom = 999.9;
1362 for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru
1366 if( distfrompt[pos] )
1367 tmpdouble=distfrompt[pos][j];
1368 else if( distfrompt[j] )
1369 tmpdouble=distfrompt[j][pos];
1371 tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] );
1373 // tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] );
1376 if( tmpdouble < mindisfrom )
1378 mindisfrom = tmpdouble;
1383 for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru
1387 if( distfrompt[pos] )
1388 tmpdouble=distfrompt[pos][j];
1389 else if( distfrompt[j] )
1390 tmpdouble=distfrompt[j][pos];
1392 tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] );
1394 // tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] );
1398 if( tmpdouble < mindisfrom )
1400 mindisfrom = tmpdouble;
1404 // printf( "mindisfrom = %f\n", mindisfrom );
1406 *mindisfrompt = mindisfrom;
1407 *nearestpt = nearest;
1411 static void setnearest( int nseq, Bchain *acpt, double **eff, double *mindisfrompt, int *nearestpt, int pos )
1417 // double **effptpt;
1423 // printf( "[%d], %f, dist=%d ->", pos, *mindisfrompt, *nearestpt );
1425 // if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos;
1427 // for( j=pos+1; j<nseq; j++ )
1428 for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next )
1431 // if( (tmpdouble=*effpt++) < *mindisfrompt )
1432 if( (tmpdouble=eff[pos][j-pos]) < mindisfrom )
1434 mindisfrom = tmpdouble;
1439 // for( j=0; j<pos; j++ )
1440 for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next )
1443 // if( (tmpdouble=(*effptpt++)[pos-j]) < *mindisfrompt )
1444 if( (tmpdouble=eff[j][pos-j]) < mindisfrom )
1446 mindisfrom = tmpdouble;
1451 *mindisfrompt = mindisfrom;
1452 *nearestpt = nearest;
1453 // printf( "%f, %d \n", pos, *mindisfrompt, *nearestpt );
1456 static void setnearest_double_fullmtx( int nseq, Bchain *acpt, double **eff, double *mindisfrompt, int *nearestpt, int pos )
1463 *mindisfrompt = 999.9;
1466 // if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos;
1468 // for( j=pos+1; j<nseq; j++ )
1469 for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next )
1472 // if( (tmpdouble=*effpt++) < *mindisfrompt )
1473 if( (tmpdouble=eff[pos][j]) < *mindisfrompt )
1475 *mindisfrompt = tmpdouble;
1480 // for( j=0; j<pos; j++ )
1481 for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next )
1484 // if( (tmpdouble=(*effptpt++)[pos-j]) < *mindisfrompt )
1485 if( (tmpdouble=eff[j][pos]) < *mindisfrompt )
1487 *mindisfrompt = tmpdouble;
1495 static void loadtreeoneline( int *ar, double *len, FILE *fp )
1497 static char gett[1000];
1501 p = fgets( gett, 999, fp );
1504 reporterr( "\n\nFormat error (1) in the tree? It has to be a bifurcated and rooted tree.\n" );
1505 reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" );
1510 res = sscanf( gett, "%d %d %lf %lf", ar, ar+1, len, len+1 );
1513 reporterr( "\n\nFormat error (2) in the tree? It has to be a bifurcated and rooted tree.\n" );
1514 reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" );
1521 if( ar[0] >= ar[1] )
1523 reporterr( "\n\nIncorrect guide tree\n" );
1524 reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" );
1529 // reporterr( "ar[0] = %d, ar[1] = %d\n", ar[0], ar[1] );
1530 // reporterr( "len[0] = %f, len[1] = %f\n", len[0], len[1] );
1533 void loadtop( int nseq, double **mtx, int ***topol, double **len, char **name, int *nlen, Treedep *dep )
1535 int i, j, k, minijm, maxijm;
1536 int *intpt, *intpt2;
1539 int im = -1, jm = -1;
1540 Bchain *acjmnext, *acjmprev;
1542 int *pt1, *pt2, *pt11, *pt22;
1547 char *nametmp, *nameptr, *tmpptr;
1555 fp = fopen( "_guidetree", "r" );
1558 reporterr( "cannot open _guidetree\n" );
1564 hist = AllocateIntVec( nseq );
1565 ac = (Bchain *)malloc( nseq * sizeof( Bchain ) );
1566 nmemar = AllocateIntVec( nseq );
1567 // treetmp = AllocateCharVec( nseq*50 );
1569 nametmp = AllocateCharVec( 1000 ); // nagasugi
1570 // tree = AllocateCharMtx( nseq, nseq*50 );
1571 tree = AllocateCharMtx( nseq, 0 );
1572 height = AllocateFloatVec( nseq );
1575 for( i=0; i<nseq; i++ )
1577 for( j=0; j<999; j++ ) nametmp[j] = 0;
1578 for( j=0; j<999; j++ )
1583 else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
1589 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
1591 nameptr = strstr( nametmp, "_numo_e" ) + 8;
1593 nameptr = nametmp + 1;
1595 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
1597 tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
1598 if( tree[i] == NULL )
1600 reporterr( "Cannot allocate tree!\n" );
1603 sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
1607 for( i=0; i<nseq; i++ )
1609 ac[i].next = ac+i+1;
1610 ac[i].prev = ac+i-1;
1613 ac[nseq-1].next = NULL;
1616 for( i=0; i<nseq; i++ )
1623 for( k=0; k<nseq-1; k++ )
1625 if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
1628 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
1631 // reporterr( "k=%d i=%d\n", k, i );
1632 if( mindisfrom[i] < minscore ) // muscle
1635 minscore = mindisfrom[i];
1644 len[k][0] = len[k][1] = -1.0;
1645 loadtreeoneline( node, len[k], fp );
1649 if( im > nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL )
1651 reporterr( "\n\nCheck the guide tree.\n" );
1652 reporterr( "im=%d, jm=%d\n", im+1, jm+1 );
1653 reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" );
1659 prevnode = hist[im];
1660 if( dep ) dep[k].child0 = prevnode;
1661 nmemim = nmemar[im];
1663 // reporterr( "prevnode = %d, nmemim = %d\n", prevnode, nmemim );
1665 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
1666 if( prevnode == -1 )
1673 pt1 = topol[prevnode][0];
1674 pt2 = topol[prevnode][1];
1685 for( intpt2=pt11; *intpt2!=-1; )
1686 *intpt++ = *intpt2++;
1687 for( intpt2=pt22; *intpt2!=-1; )
1688 *intpt++ = *intpt2++;
1693 nmemjm = nmemar[jm];
1694 prevnode = hist[jm];
1695 if( dep ) dep[k].child1 = prevnode;
1697 // reporterr( "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm );
1699 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
1702 reporterr( "Cannot reallocate topol\n" );
1705 if( prevnode == -1 )
1712 pt1 = topol[prevnode][0];
1713 pt2 = topol[prevnode][1];
1724 for( intpt2=pt11; *intpt2!=-1; )
1725 *intpt++ = *intpt2++;
1726 for( intpt2=pt22; *intpt2!=-1; )
1727 *intpt++ = *intpt2++;
1732 // len[k][0] = ( minscore - tmptmplen[im] );
1733 // len[k][1] = ( minscore - tmptmplen[jm] );
1739 nmemar[im] = nmemim + nmemjm;
1742 if( len[k][0] == -1 || len[k][1] == -1 )
1744 reporterr( "Re-computing the length of branch %d..\n", k );
1747 for( i=0; (mi=topol[k][0][i])>-1; i++ ) for( j=0; (mj=topol[k][1][j])>-1; j++ )
1749 minijm = MIN(mi,mj);
1750 maxijm = MAX(mi,mj);
1751 clusterdist += mtx[minijm][maxijm-minijm];
1754 clusterdist /= (double)mpair;
1755 reporterr( "clusterdist = %f\n", clusterdist );
1756 if( len[k][0] == -1 ) len[k][0] = clusterdist/2.0 - height[im];
1757 if( len[k][1] == -1 ) len[k][1] = clusterdist/2.0 - height[im];
1759 fprintf( stderr, "len0 = %f\n", len[k][0] );
1760 fprintf( stderr, "len1 = %f\n\n", len[k][1] );
1764 fprintf( stderr, "vSTEP-%03d:\n", k+1 );
1765 fprintf( stderr, "len0 = %f\n", len[k][0] );
1766 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i]+1 );
1767 fprintf( stderr, "\n" );
1768 fprintf( stderr, "len1 = %f\n", len[k][1] );
1769 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i]+1 );
1770 fprintf( stderr, "\n" );
1773 height[im] += len[k][0]; // for ig tree, 2015/Dec/25
1774 dep[k].distfromtip = height[im]; // for ig tree, 2015/Dec/25
1775 // reporterr( "##### dep[%d].distfromtip = %f\n", k, height[im] );
1779 treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
1782 reporterr( "Cannot allocate treetmp\n" );
1785 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
1788 tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
1790 if( tree[im] == NULL )
1792 reporterr( "Cannot reallocate tree!\n" );
1795 strcpy( tree[im], treetmp );
1797 // reporterr( "im,jm=%d,%d\n", im, jm );
1798 acjmprev = ac[jm].prev;
1799 acjmnext = ac[jm].next;
1800 acjmprev->next = acjmnext;
1801 if( acjmnext != NULL )
1802 acjmnext->prev = acjmprev;
1803 // free( (void *)eff[jm] ); eff[jm] = NULL;
1805 #if 0 // muscle seems to miss this.
1806 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1809 if( nearest[i] == im )
1811 // reporterr( "calling setnearest\n" );
1812 // setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
1820 fp = fopen( "infile.tree", "w" );
1821 fprintf( fp, "%s\n", treetmp );
1822 fprintf( fp, "#by loadtop\n" );
1825 FreeCharMtx( tree );
1830 free( (void *)nmemar );
1836 void stringshuffle( int *ary, int size )
1841 int j = rand()%size;
1848 void topolorder( int nseq, int *order, int *posinorder, int ***topol, Treedep *dep, int pos, int nchild )
1852 child0 = dep[pos].child0;
1853 child1 = dep[pos].child1;
1858 if( nchild == 0 || nchild == 2 )
1862 str = calloc( 2, sizeof( int ) );
1863 str[0] = topol[pos][0][0]; // kanarazu memsave format nara, tanjunka dekiru.
1867 // for( i=0; order[i]!=-1; i++ )
1869 // reporterr( "0: i=%d, *posinorder=%d\n", i, *posinorder );
1871 intcpy( order+*posinorder, str );
1872 // intcat( order, str );
1881 topolorder( nseq, order, posinorder, topol, dep, child0, 2 );
1886 if( nchild == 1 || nchild == 2 )
1890 str = calloc( 2, sizeof( int ) );
1891 str[0] = topol[pos][1][0]; // kanarazu memsave format nara, tanjunka dekiru.
1895 // for( i=0; order[i]!=-1; i++ )
1897 // reporterr( "1: i=%d, *posinorder=%d\n", i, *posinorder );
1899 intcpy( order+*posinorder, str );
1900 // intcat( order, str );
1908 topolorder( nseq, order, posinorder, topol, dep, child1, 2 );
1911 // return( posinorder );
1914 #if CANONICALTREEFORMAT
1915 void createchain( int nseq, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int treeout, int shuffle, int seed )
1924 // char *treetmp, *tt;
1925 char *nametmp, *nameptr, *tmpptr;
1933 nametmp = AllocateCharVec( 1000 ); // nagasugi
1934 tree = AllocateCharMtx( nseq, 0 );
1937 for( i=0; i<nseq; i++ )
1940 for( j=0; j<999; j++ ) nametmp[j] = 0;
1941 for( j=0; j<999; j++ )
1946 else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
1952 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
1954 nameptr = strstr( nametmp, "_numo_e" ) + 8;
1956 nameptr = nametmp + 1;
1958 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
1960 tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
1961 if( tree[i] == NULL )
1963 reporterr( "Cannot allocate tree!\n" );
1966 sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
1967 treelen += strlen( tree[i] ) + 20;
1971 instanttree = calloc( treelen, sizeof( char ) );
1973 for( i=0; i<nseq-1; i++ )
1975 instanttree[i] = '(';
1982 order = calloc( nseq, sizeof( int ) );
1983 for( i=0; i<nseq; i++ ) order[i] = i;
1986 if( shuffle ) stringshuffle( order, nseq );
1988 ll = l = 2.0 / nseq;
1993 topol[0][0] = (int *)realloc( topol[0][0], ( 2 ) * sizeof( int ) );
1994 topol[0][1] = (int *)realloc( topol[0][1], ( 2 ) * sizeof( int ) );
1997 topol[0][0][0] = im;
1998 topol[0][0][1] = -1;
1999 topol[0][1][0] = jm;
2000 topol[0][1][1] = -1;
2005 topol[0][0][0] = jm;
2006 topol[0][0][1] = -1;
2007 topol[0][1][0] = im;
2008 topol[0][1][1] = -1;
2011 len[0][0] = len[0][1] = l;
2014 dep[0].distfromtip = l;
2019 posinit += sprintf( instanttree+posinit, "%s:%7.5f,", tree[im], len[0][0] );
2020 // reporterr( "instanttree = %s\n", instanttree );
2023 for( i=1; i<nseq-1; i++ )
2031 topol[i][0] = (int *)realloc( topol[i][0], ( 2 ) * sizeof( int ) );
2032 topol[i][0][0] = mm;
2033 topol[i][0][1] = -1;
2035 topol[i][0] = (int *)realloc( topol[i][0], ( i + 2 ) * sizeof( int ) );
2036 intcpy( topol[i][0], topol[i-1][0] );
2037 intcat( topol[i][0], topol[i-1][1] );
2039 topol[i][1] = (int *)realloc( topol[i][1], ( 2 ) * sizeof( int ) );
2040 topol[i][1][0] = jm;
2041 topol[i][1][1] = -1;
2043 // reporterr( "step %d\n", i );
2044 // for( j=0; topol[i][0][j]!=-1; j++ ) reporterr( "%5d ", topol[i][0][j] );
2045 // reporterr( "\n", i );
2046 // for( j=0; topol[i][1][j]!=-1; j++ ) reporterr( "%5d ", topol[i][1][j] );
2047 // reporterr( "\n\n", i );
2054 dep[i].child0 = i-1;
2056 dep[i].distfromtip = ll;
2063 topol[i][1] = (int *)realloc( topol[i][1], ( 2 ) * sizeof( int ) );
2064 topol[i][1][0] = mm;
2065 topol[i][1][1] = -1;
2067 topol[i][1] = (int *)realloc( topol[i][1], ( i + 2 ) * sizeof( int ) );
2068 intcpy( topol[i][1], topol[i-1][0] );
2069 intcat( topol[i][1], topol[i-1][1] );
2071 topol[i][0] = (int *)realloc( topol[i][0], ( 2 ) * sizeof( int ) );
2072 topol[i][0][0] = jm;
2073 topol[i][0][1] = -1;
2077 // reporterr( "step %d\n", i );
2078 // for( j=0; topol[i][0][j]!=-1; j++ ) reporterr( "%5d ", topol[i][0][j] );
2079 // reporterr( "\n", i );
2080 // for( j=0; topol[i][1][j]!=-1; j++ ) reporterr( "%5d ", topol[i][1][j] );
2081 // reporterr( "\n\n", i );
2089 dep[i].child1 = i-1;
2091 dep[i].distfromtip = ll;
2097 posinit += sprintf( instanttree+posinit, "%s:%7.5f):%7.5f,", tree[im], ll-l, l );
2098 // reporterr( "instanttree (in loop) = %s\n", instanttree );
2100 if( i % 1000 == 0 ) reporterr( "\r%d/%d", i, nseq );
2101 // reporterr( "size = %d\n", ( strlen( tree[im] ) + strlen( tree[jm] ) + 100 ) * sizeof( char ) );
2102 // reporterr( "size = %d\n", ( strlen( tree[im] ) + strlen( tree[jm] ) + 100 ) );
2103 // reporterr( "treetmp = %p\n", treetmp );
2104 tt = realloc( treetmp, ( strlen( tree[im] ) + strlen( tree[jm] ) + 100 ) * sizeof( char ) ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
2107 reporterr( "Cannot allocate treetmp\n" );
2111 // reporterr( "i=%d\n", i );
2112 // reporterr( "part1=%s\n", tree[0] );
2113 // reporterr( "part2=%s\n", tree[i+1] );
2114 // reporterr( "size = %d, %d\n", strlen( tree[0] ), strlen( tree[i+1] ) );
2115 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[i][0], tree[jm], len[i][1] );
2118 tree[jm] = calloc( strlen( treetmp )+1, sizeof( char ) );
2120 if( tree[jm] == NULL )
2122 reporterr( "Cannot reallocate tree!\n" );
2125 strcpy( tree[jm], treetmp );
2132 posinit += sprintf( instanttree+posinit, "%s:%7.5f)", tree[jm], ll-l );
2133 fp = fopen( "infile.tree", "w" );
2134 // fprintf( fp, "%s;\n", treetmp );
2135 // fprintf( fp, "#by createchain\n" );
2136 fprintf( fp, "%s;\n", instanttree );
2138 FreeCharMtx( tree );
2140 free( instanttree );
2143 fp = fopen( "_guidetree", "w" );
2146 reporterr( "cannot open _guidetree\n" );
2149 for( i=0; i<nseq-1; i++ )
2150 fprintf( fp, "%d %d %f %f\n", topol[i][0][0]+1, topol[i][1][0]+1, len[i][0], len[i][1] );
2157 void createchain( int nseq, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int treeout, int shuffle, int seed )
2166 // char *treetmp, *tt;
2167 char *nametmp, *nameptr, *tmpptr;
2175 nametmp = AllocateCharVec( 1000 ); // nagasugi
2176 tree = AllocateCharMtx( nseq, 0 );
2179 for( i=0; i<nseq; i++ )
2182 for( j=0; j<999; j++ ) nametmp[j] = 0;
2183 for( j=0; j<999; j++ )
2188 else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
2194 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
2196 nameptr = strstr( nametmp, "_numo_e" ) + 8;
2198 nameptr = nametmp + 1;
2200 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
2202 tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
2203 if( tree[i] == NULL )
2205 reporterr( "Cannot allocate tree!\n" );
2208 sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
2209 treelen += strlen( tree[i] ) + 20;
2213 instanttree = calloc( treelen, sizeof( char ) );
2215 for( i=0; i<nseq-1; i++ )
2217 instanttree[i] = '(';
2224 order = calloc( nseq, sizeof( int ) );
2225 for( i=0; i<nseq; i++ ) order[i] = i;
2228 if( shuffle ) stringshuffle( order, nseq );
2231 ll = l = 2.0 / nseq;
2233 for( i=0; i<nseq-1; i++ )
2238 topol[i][0] = (int *)realloc( topol[i][0], ( i + 2 ) * sizeof( int ) );
2239 topol[i][1] = (int *)realloc( topol[i][1], ( 2 ) * sizeof( int ) );
2241 for( j=0; j<i; j++ )
2242 topol[i][0][j] = order[j];
2243 topol[i][0][i] = im;
2244 topol[i][0][i+1] = -1;
2246 topol[i][1][0] = jm;
2247 topol[i][1][1] = -1;
2255 dep[i].child0 = i-1;
2257 dep[i].distfromtip = ll;
2264 posinit += sprintf( instanttree+posinit, "%s:%7.5f,", tree[im], len[i][0] );
2265 // reporterr( "instanttree = %s\n", instanttree );
2267 else if ( i == nseq-2 )
2269 posinit += sprintf( instanttree+posinit, "%s:%7.5f):%7.5f,", tree[im], len[i-1][1], len[i-1][0] );
2270 posinit += sprintf( instanttree+posinit, "%s:%7.5f)", tree[jm], len[i][1] );
2274 posinit += sprintf( instanttree+posinit, "%s:%7.5f):%7.5f,", tree[im], len[i-1][1], len[i-1][0] );
2275 // reporterr( "instanttree (in loop) = %s\n", instanttree );
2278 if( i % 1000 == 0 ) reporterr( "\r%d/%d", i, nseq );
2279 // reporterr( "size = %d\n", ( strlen( tree[im] ) + strlen( tree[jm] ) + 100 ) * sizeof( char ) );
2280 // reporterr( "size = %d\n", ( strlen( tree[im] ) + strlen( tree[jm] ) + 100 ) );
2281 // reporterr( "treetmp = %p\n", treetmp );
2282 tt = realloc( treetmp, ( strlen( tree[im] ) + strlen( tree[jm] ) + 100 ) * sizeof( char ) ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
2285 reporterr( "Cannot allocate treetmp\n" );
2289 // reporterr( "i=%d\n", i );
2290 // reporterr( "part1=%s\n", tree[0] );
2291 // reporterr( "part2=%s\n", tree[i+1] );
2292 // reporterr( "size = %d, %d\n", strlen( tree[0] ), strlen( tree[i+1] ) );
2293 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[i][0], tree[jm], len[i][1] );
2296 tree[jm] = calloc( strlen( treetmp )+1, sizeof( char ) );
2298 if( tree[jm] == NULL )
2300 reporterr( "Cannot reallocate tree!\n" );
2303 strcpy( tree[jm], treetmp );
2309 fp = fopen( "infile.tree", "w" );
2310 // fprintf( fp, "%s;\n", treetmp );
2311 // fprintf( fp, "#by createchain\n" );
2312 fprintf( fp, "%s;\n", instanttree );
2314 FreeCharMtx( tree );
2316 free( instanttree );
2319 fp = fopen( "_guidetree", "w" );
2322 reporterr( "cannot open _guidetree\n" );
2325 #if CANONICALTREEFORMAT
2326 for( i=0; i<nseq-1; i++ )
2327 fprintf( fp, "%d %d %f %f\n", topol[i][0][0]+1, topol[i][1][0]+1, len[i][0], len[i][1] );
2330 for( i=0; i<nseq-1; i++ )
2332 jm = topol[i][1][0];
2336 fprintf( fp, "%d %d %f %f\n", k+1, jm+1, len[i][0], len[i][1] );
2340 fprintf( fp, "%d %d %f %f\n", jm+1, k+1, len[i][1], len[i][0] );
2350 void loadtree( int nseq, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int treeout )
2352 int i, j, k, miniim, maxiim, minijm, maxijm;
2353 int *intpt, *intpt2;
2356 int im = -1, jm = -1;
2357 Bchain *acjmnext, *acjmprev;
2360 int *pt1, *pt2, *pt11, *pt22;
2365 char *nametmp, *nameptr, *tmpptr;
2371 fp = fopen( "_guidetree", "r" );
2374 reporterr( "cannot open _guidetree\n" );
2379 reporterr( "Loading a tree\n" );
2383 hist = AllocateIntVec( nseq );
2384 ac = (Bchain *)malloc( nseq * sizeof( Bchain ) );
2385 nmemar = AllocateIntVec( nseq );
2386 // treetmp = AllocateCharVec( nseq*50 );
2387 if( dep ) height = AllocateFloatVec( nseq );
2393 nametmp = AllocateCharVec( 1000 ); // nagasugi
2394 // tree = AllocateCharMtx( nseq, nseq*50 );
2395 tree = AllocateCharMtx( nseq, 0 );
2397 for( i=0; i<nseq; i++ )
2399 for( j=0; j<999; j++ ) nametmp[j] = 0;
2400 for( j=0; j<999; j++ )
2405 else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
2411 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
2413 nameptr = strstr( nametmp, "_numo_e" ) + 8;
2415 nameptr = nametmp + 1;
2417 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
2419 tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
2420 if( tree[i] == NULL )
2422 reporterr( "Cannot allocate tree!\n" );
2425 sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
2430 for( i=0; i<nseq; i++ )
2432 ac[i].next = ac+i+1;
2433 ac[i].prev = ac+i-1;
2436 ac[nseq-1].next = NULL;
2439 for( i=0; i<nseq; i++ )
2446 for( k=0; k<nseq-1; k++ )
2448 if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
2451 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
2454 // reporterr( "k=%d i=%d\n", k, i );
2455 if( mindisfrom[i] < minscore ) // muscle
2458 minscore = mindisfrom[i];
2467 len[k][0] = len[k][1] = -1.0;
2468 loadtreeoneline( node, len[k], fp );
2472 // if( im > nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL )
2473 if( im > nseq-1 || jm > nseq-1 )
2475 reporterr( "\n\nCheck the guide tree.\n" );
2476 reporterr( "im=%d, jm=%d\n", im+1, jm+1 );
2477 reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" );
2482 if( len[k][0] == -1.0 || len[k][1] == -1.0 )
2484 reporterr( "\n\nERROR: Branch length is not given.\n" );
2488 if( len[k][0] < 0.0 ) len[k][0] = 0.0;
2489 if( len[k][1] < 0.0 ) len[k][1] = 0.0;
2494 prevnode = hist[im];
2495 if( dep ) dep[k].child0 = prevnode;
2496 nmemim = nmemar[im];
2498 // reporterr( "prevnode = %d, nmemim = %d\n", prevnode, nmemim );
2500 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
2501 if( prevnode == -1 )
2508 pt1 = topol[prevnode][0];
2509 pt2 = topol[prevnode][1];
2520 for( intpt2=pt11; *intpt2!=-1; )
2521 *intpt++ = *intpt2++;
2522 for( intpt2=pt22; *intpt2!=-1; )
2523 *intpt++ = *intpt2++;
2528 nmemjm = nmemar[jm];
2529 prevnode = hist[jm];
2530 if( dep ) dep[k].child1 = prevnode;
2532 // reporterr( "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm );
2534 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
2537 reporterr( "Cannot reallocate topol\n" );
2540 if( prevnode == -1 )
2547 pt1 = topol[prevnode][0];
2548 pt2 = topol[prevnode][1];
2559 for( intpt2=pt11; *intpt2!=-1; )
2560 *intpt++ = *intpt2++;
2561 for( intpt2=pt22; *intpt2!=-1; )
2562 *intpt++ = *intpt2++;
2567 // len[k][0] = ( minscore - tmptmplen[im] );
2568 // len[k][1] = ( minscore - tmptmplen[jm] );
2574 nmemar[im] = nmemim + nmemjm;
2576 // mindisfrom[im] = 999.9;
2577 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
2580 if( i != im && i != jm )
2609 treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
2612 reporterr( "Cannot allocate treetmp\n" );
2615 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
2618 tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
2620 if( tree[im] == NULL )
2622 reporterr( "Cannot reallocate tree!\n" );
2625 strcpy( tree[im], treetmp );
2628 // reporterr( "im,jm=%d,%d\n", im, jm );
2629 acjmprev = ac[jm].prev;
2630 acjmnext = ac[jm].next;
2631 acjmprev->next = acjmnext;
2632 if( acjmnext != NULL )
2633 acjmnext->prev = acjmprev;
2634 // free( (void *)eff[jm] ); eff[jm] = NULL;
2636 #if 0 // muscle seems to miss this.
2637 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
2640 if( nearest[i] == im )
2642 // reporterr( "calling setnearest\n" );
2643 // setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
2650 fprintf( stderr, "vSTEP-%03d:\n", k+1 );
2651 fprintf( stderr, "len0 = %f\n", len[k][0] );
2652 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i]+1 );
2653 fprintf( stderr, "\n" );
2654 fprintf( stderr, "len1 = %f\n", len[k][1] );
2655 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i]+1 );
2656 fprintf( stderr, "\n" );
2661 height[im] += len[k][0]; // for ig tree, 2015/Dec/25
2662 dep[k].distfromtip = height[im]; // for ig tree, 2015/Dec/25
2663 // reporterr( "##### dep[%d].distfromtip = %f\n\n", k, height[im] );
2666 // reporterr( "dep[%d].child0 = %d\n", k, dep[k].child0 );
2667 // reporterr( "dep[%d].child1 = %d\n", k, dep[k].child1 );
2668 // reporterr( "dep[%d].distfromtip = %f\n", k, dep[k].distfromtip );
2674 fp = fopen( "infile.tree", "w" );
2675 fprintf( fp, "%s;\n", treetmp );
2676 fprintf( fp, "#by loadtree\n" );
2678 FreeCharMtx( tree );
2685 free( (void *)nmemar );
2686 if( dep ) free( height );
2690 int check_guidetreefile( int *seed, int *npick, double *limitram )
2700 *limitram = 10.0 * 1000 * 1000 * 1000; // 10GB
2701 fp = fopen( "_guidetree", "r" );
2704 reporterr( "cannot open _guidetree\n" );
2708 fgets( string, 999, fp );
2711 if( !strncmp( string, "shuffle", 7 ) )
2713 sscanf( string+7, "%d", seed );
2714 reporterr( "shuffle, seed=%d\n", *seed );
2717 else if( !strncmp( string, "pileup", 6 ) )
2719 reporterr( "pileup.\n" );
2722 else if( !strncmp( string, "auto", 4 ) )
2724 sscanf( string+4, "%d %d", seed, npick );
2725 reporterr( "auto, seed=%d, npick=%d\n", *seed, *npick );
2728 reporterr( "Check npick\n" );
2733 else if( !strncmp( string, "test", 4 ) )
2735 sscanf( string+4, "%d %d", seed, npick );
2736 reporterr( "calc, seed=%d, npick=%d\n", *seed, *npick );
2739 reporterr( "Check npick\n" );
2744 else if( !strncmp( string, "compact", 7 ) )
2746 sizestring = string + 7;
2747 reporterr( "sizestring = %s\n", sizestring );
2748 if( strchr( sizestring, 'k' ) || strchr( sizestring, 'k' ) ) tanni = 1.0 * 1000; // kB
2749 else if( strchr( sizestring, 'M' ) || strchr( sizestring, 'm' ) ) tanni = 1.0 * 1000 * 1000; // GB
2750 else if( strchr( sizestring, 'G' ) || strchr( sizestring, 'g' ) ) tanni = 1.0 * 1000 * 1000 * 1000; // GB
2751 else if( strchr( sizestring, 'T' ) || strchr( sizestring, 't' ) ) tanni = 1.0 * 1000 * 1000 * 1000 * 1000; // TB
2754 reporterr( "\nSpecify initial ram usage by '--initialramusage xGB'\n\n\n" );
2757 sscanf( sizestring, "%lf", &tmpd );
2758 *limitram = tmpd * tanni;
2759 reporterr( "Initial RAM usage = %10.3fGB\n", *limitram/1000/1000/1000 );
2762 else if( !strncmp( string, "very compact", 12 ) )
2764 reporterr( "very compact.\n" );
2769 reporterr( "loadtree.\n" );
2775 static double sueff1, sueff05;
2776 //static double sueff1_double, sueff05_double;
2778 static double cluster_mix_double( double d1, double d2 )
2780 return( MIN( d1, d2 ) * sueff1 + ( d1 + d2 ) * sueff05 );
2782 static double cluster_average_double( double d1, double d2 )
2784 return( ( d1 + d2 ) * 0.5 );
2786 static double cluster_minimum_double( double d1, double d2 )
2788 return( MIN( d1, d2 ) );
2791 static double cluster_mix_double( double d1, double d2 )
2793 return( MIN( d1, d2 ) * sueff1_double + ( d1 + d2 ) * sueff05_double );
2795 static double cluster_average_double( double d1, double d2 )
2797 return( ( d1 + d2 ) * 0.5 );
2799 static double cluster_minimum_double( double d1, double d2 )
2801 return( MIN( d1, d2 ) );
2805 static void increaseintergroupdistanceshalfmtx( double **eff, int ngroup, int **groups, int nseq )
2808 int i, k, m, s1, s2, sl, ss;
2810 double maxdist, *dptr, dtmp;
2811 tft = calloc( nseq, sizeof( int * ) );
2812 others = calloc( nseq, sizeof( int * ) );
2814 // for( m=0; m<nseq-1; m++ ) for( k=m+1; k<nseq; k++ )
2815 // reporterr( "mtx[%d][%d] originally = %f (maxdist=%f)\n", m, k, eff[m][k-m], maxdist );
2817 reporterr( "\n" ); // Hitsuyou desu.
2818 for( i=0; i<ngroup; i++ )
2820 if( groups[i][1] == -1 ) continue;
2822 for( m=0; m<nseq; m++ ) tft[m] = 0;
2823 for( m=0; (s1=groups[i][m])>-1; m++ ) tft[s1] = 1;
2824 for( m=0,k=0; m<nseq; m++ ) if( tft[m] == 0 ) others[k++] = m;
2828 for( m=1; (s2=groups[i][m])>-1; m++ ) for( k=0; (s1=groups[i][k])>-1&&k<m; k++ )
2830 // reporterr( "m=%d, k=%d, s2=%d, s1=%d\n", m, k, s2, s1 );
2840 dtmp = eff[ss][sl-ss];
2841 if( dtmp > maxdist ) maxdist = dtmp;
2843 // reporterr( "maxdist = %f\n", maxdist );
2845 for( m=0; (s2=groups[i][m])>-1; m++ ) for( k=0; (s1=others[k])>-1; k++ )
2855 dptr = eff[ss] + sl-ss;
2856 if( *dptr < maxdist )
2858 if( *dptr < 0.5 && nwarned++ < 100 ) reporterr( "# Sequences %d and %d seem to be closely related, but are not in the same sub MSA (%d) in your setting.\n", s2+1, s1+1, i+1 );
2862 // for( m=0; m<nseq-1; m++ ) for( k=m+1; k<nseq; k++ )
2863 // reporterr( "mtx[%d][%d] after modification%d = %f (maxdist=%f)\n", m, k, i, eff[m][k-m], maxdist );
2865 if( nwarned > 100 ) reporterr( "# Sequenc.... (more pairs)\n" );
2871 static void increaseintergroupdistancesfullmtx( double **eff, int ngroup, int **groups, int nseq )
2874 int i, k, m, s1, s2, sl, ss;
2876 double maxdist, *dptr, dtmp;
2877 tft = calloc( nseq, sizeof( int * ) );
2878 others = calloc( nseq, sizeof( int * ) );
2880 reporterr( "\n" ); // Hitsuyou desu.
2881 for( i=0; i<ngroup; i++ )
2883 if( groups[i][1] == -1 ) continue;
2885 for( m=0; m<nseq; m++ ) tft[m] = 0;
2886 for( m=0; (s1=groups[i][m])>-1; m++ ) tft[s1] = 1;
2887 for( m=0,k=0; m<nseq; m++ ) if( tft[m] == 0 ) others[k++] = m;
2891 for( m=1; (s2=groups[i][m])>-1; m++ ) for( k=0; (s1=groups[i][k])>-1&&k<m; k++ )
2902 if( dtmp > maxdist ) maxdist = dtmp;
2905 // reporterr( "maxdist = %f\n", maxdist );
2907 for( m=0; (s2=groups[i][m])>-1; m++ ) for( k=0; (s1=others[k])>-1; k++ )
2917 dptr = eff[ss] + sl;
2918 if( *dptr < maxdist )
2920 if( *dptr < 0.5 && nwarned++ < 100 ) reporterr( "# Sequences %d and %d seem to be closely related, but are not in the same sub MSA (%d) in your setting.\n", s2+1, s1+1, i+1 );
2925 if( nwarned > 100 ) reporterr( "# Sequenc.... (more pairs)\n" );
2927 // for( m=0; m<nseq-1; m++ ) for( k=m+1; k<nseq; k++ )
2928 // reporterr( "mtx[%d][%d] after modification = %f (maxdist=%f)\n", m, k, eff[m][k], maxdist );
2933 void fixed_supg_double_realloc_nobk_halfmtx_treeout_constrained( int nseq, double **eff, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int ngroup, int **groups, int efffree )
2935 int i, j, k, miniim, maxiim, minijm, maxijm;
2936 int *intpt, *intpt2;
2939 double *tmptmplen = NULL; //static?
2940 int *hist = NULL; //static?
2941 Bchain *ac = NULL; //static?
2942 int im = -1, jm = -1;
2943 Bchain *acjmnext, *acjmprev;
2945 Bchain *acpti, *acptj;
2946 int *pt1, *pt2, *pt11, *pt22;
2947 int *nmemar; //static?
2950 int *nearest = NULL; // by D.Mathog, a guess
2951 double *mindisfrom = NULL; // by D.Mathog, a guess
2952 char **tree; //static?
2953 char *treetmp; //static?
2954 char *nametmp, *nameptr, *tmpptr; //static?
2956 double (*clusterfuncpt[1])(double,double);
2958 int *testtopol, **inconsistent;
2959 int **inconsistentpairlist;
2960 int ninconsistentpairs;
2962 int allinconsistent;
2965 increaseintergroupdistanceshalfmtx( eff, ngroup, groups, nseq );
2967 sueff1 = 1 - (double)sueff_global;
2968 sueff05 = (double)sueff_global * 0.5;
2969 if ( treemethod == 'X' )
2970 clusterfuncpt[0] = cluster_mix_double;
2971 else if ( treemethod == 'E' )
2972 clusterfuncpt[0] = cluster_average_double;
2973 else if ( treemethod == 'q' )
2974 clusterfuncpt[0] = cluster_minimum_double;
2977 reporterr( "Unknown treemethod, %c\n", treemethod );
2983 hist = AllocateIntVec( njob );
2984 tmptmplen = AllocateFloatVec( njob );
2985 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
2986 nmemar = AllocateIntVec( njob );
2987 mindisfrom = AllocateFloatVec( njob );
2988 nearest = AllocateIntVec( njob );
2989 // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi?
2990 treetmp = NULL; // kentou 2013/06/12
2991 nametmp = AllocateCharVec( 1000 ); // nagasugi
2992 // tree = AllocateCharMtx( njob, njob*600 );
2993 tree = AllocateCharMtx( njob, 0 );
2994 testtopol = AllocateIntVec( njob + 1 );
2995 inconsistent = AllocateIntMtx( njob, njob ); // muda
2996 // inconsistentpairlist = AllocateIntMtx( njob*(njob-1)/2+1, 2 ); // muda!
2997 inconsistentpairlist = AllocateIntMtx( 1, 2 );
2998 // reporterr( "allocating inconsistentpairlist\n" );
2999 warned = AllocateIntVec( ngroup );
3003 for( i=0; i<nseq; i++ )
3005 for( j=0; j<999; j++ ) nametmp[j] = 0;
3006 for( j=0; j<999; j++ )
3011 else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
3017 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
3019 nameptr = strstr( nametmp, "_numo_e" ) + 8;
3021 nameptr = nametmp + 1;
3023 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
3025 tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
3026 if( tree[i] == NULL )
3028 reporterr( "Cannot allocate tree!\n" );
3031 sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
3033 for( i=0; i<nseq; i++ )
3035 ac[i].next = ac+i+1;
3036 ac[i].prev = ac+i-1;
3039 ac[nseq-1].next = NULL;
3041 for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
3043 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
3044 for( i=0; i<nseq; i++ )
3051 ninconsistentpairs = 0;
3052 for( k=0; k<nseq-1; k++ )
3054 if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
3056 for( i=0; i<ninconsistentpairs; i++ ) inconsistent[inconsistentpairlist[i][0]][inconsistentpairlist[i][1]] = 0;
3057 // for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) inconsistent[acpti->pos][acptj->pos] = 0; // osoi!!!
3058 ninconsistentpairs = 0;
3066 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
3069 // reporterr( "k=%d i=%d\n", k, i );
3070 if( mindisfrom[i] < minscore ) // muscle
3073 minscore = mindisfrom[i];
3085 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
3088 // reporterr( "k=%d i=%d\n", k, i );
3089 for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next )
3092 if( !inconsistent[i][j] && (tmpdouble=eff[i][j-i]) < minscore )
3094 minscore = tmpdouble;
3098 for( acptj=ac; (acptj&&acptj->pos!=i); acptj=acptj->next )
3101 if( !inconsistent[j][i] && (tmpdouble=eff[j][i-j]) < minscore )
3103 minscore = tmpdouble;
3111 allinconsistent = 1;
3112 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
3114 for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next )
3116 if( inconsistent[acpti->pos][acptj->pos] == 0 )
3118 allinconsistent = 0;
3125 if( allinconsistent )
3127 reporterr( "\n\n\nPlease check whether the grouping is possible.\n\n\n" );
3132 prevnode = hist[im];
3133 if( prevnode == -1 )
3139 for( intpt2=topol[prevnode][0]; *intpt2!=-1; )
3140 *intpt++ = *intpt2++;
3141 for( intpt2=topol[prevnode][1]; *intpt2!=-1; )
3142 *intpt++ = *intpt2++;
3145 prevnode = hist[jm];
3146 if( prevnode == -1 )
3152 for( intpt2=topol[prevnode][0]; *intpt2!=-1; )
3153 *intpt++ = *intpt2++;
3154 for( intpt2=topol[prevnode][1]; *intpt2!=-1; )
3155 *intpt++ = *intpt2++;
3158 // reporterr( "testtopol = \n" );
3159 // for( i=0; testtopol[i]>-1; i++ ) reporterr( " %03d", testtopol[i]+1 );
3160 // reporterr( "\n" );
3162 for( i=0; i<ngroup; i++ )
3164 // reporterr( "groups[%d] = \n", i );
3165 // for( j=0; groups[i][j]>-1; j++ ) reporterr( " %03d", groups[i][j]+1 );
3166 // reporterr( "\n" );
3167 if( overlapmember( groups[i], testtopol ) )
3169 if( !includemember( testtopol, groups[i] ) && !includemember( groups[i], testtopol ) )
3174 reporterr( "\n###################################################################\n" );
3175 reporterr( "# WARNING: Group %d is forced to be a monophyletic cluster.\n", i+1 );
3176 reporterr( "###################################################################\n" );
3178 inconsistent[im][jm] = 1;
3180 inconsistentpairlist = realloc( inconsistentpairlist, (ninconsistentpairs+1)*sizeof( int * ) );
3181 inconsistentpairlist[ninconsistentpairs] = malloc( sizeof( int ) * 2 );
3182 reporterr( "reallocating inconsistentpairlist, size=%d\n", ninconsistentpairs+1 );
3183 inconsistentpairlist[ninconsistentpairs][0] = im;
3184 inconsistentpairlist[ninconsistentpairs][1] = jm;
3185 ninconsistentpairs++;
3192 // reporterr( "OK\n" );
3198 prevnode = hist[im];
3199 if( dep ) dep[k].child0 = prevnode;
3200 nmemim = nmemar[im];
3201 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
3202 if( prevnode == -1 )
3209 pt1 = topol[prevnode][0];
3210 pt2 = topol[prevnode][1];
3221 for( intpt2=pt11; *intpt2!=-1; )
3222 *intpt++ = *intpt2++;
3223 for( intpt2=pt22; *intpt2!=-1; )
3224 *intpt++ = *intpt2++;
3228 prevnode = hist[jm];
3229 if( dep ) dep[k].child1 = prevnode;
3230 nmemjm = nmemar[jm];
3231 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
3234 reporterr( "Cannot reallocate topol\n" );
3237 if( prevnode == -1 )
3244 pt1 = topol[prevnode][0];
3245 pt2 = topol[prevnode][1];
3256 for( intpt2=pt11; *intpt2!=-1; )
3257 *intpt++ = *intpt2++;
3258 for( intpt2=pt22; *intpt2!=-1; )
3259 *intpt++ = *intpt2++;
3265 len[k][0] = ( minscore - tmptmplen[im] );
3266 len[k][1] = ( minscore - tmptmplen[jm] );
3267 if( len[k][0] < 0.0 ) len[k][0] = 0.0;
3268 if( len[k][1] < 0.0 ) len[k][1] = 0.0;
3270 if( dep ) dep[k].distfromtip = minscore;
3271 // reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore );
3273 tmptmplen[im] = minscore;
3276 nmemar[im] = nmemim + nmemjm;
3278 mindisfrom[im] = 999.9;
3279 eff[im][jm-im] = 999.9;
3280 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
3283 if( i != im && i != jm )
3306 eff0 = eff[miniim][maxiim-miniim];
3307 eff1 = eff[minijm][maxijm-minijm];
3309 tmpdouble = eff[miniim][maxiim-miniim] =
3310 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
3312 tmpdouble = eff[miniim][maxiim-miniim] =
3313 (clusterfuncpt[0])( eff0, eff1 );
3316 if( tmpdouble < mindisfrom[i] )
3318 mindisfrom[i] = tmpdouble;
3321 if( tmpdouble < mindisfrom[im] )
3323 mindisfrom[im] = tmpdouble;
3326 if( nearest[i] == jm )
3334 treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
3337 reporterr( "Cannot allocate treetmp\n" );
3340 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
3343 tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
3345 if( tree[im] == NULL )
3347 reporterr( "Cannot reallocate tree!\n" );
3350 strcpy( tree[im], treetmp );
3352 acjmprev = ac[jm].prev;
3353 acjmnext = ac[jm].next;
3354 acjmprev->next = acjmnext;
3355 if( acjmnext != NULL )
3356 acjmnext->prev = acjmprev;
3359 free( (void *)eff[jm] ); eff[jm] = NULL;
3362 #if 1 // muscle seems to miss this.
3363 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
3366 if( nearest[i] == im )
3378 if( eff[miniim][maxiim-miniim] > mindisfrom[i] )
3379 setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
3386 reporterr( "\noSTEP-%03d:\n", k+1 );
3387 reporterr( "len0 = %f\n", len[k][0] );
3388 for( i=0; topol[k][0][i]>-1; i++ ) reporterr( " %03d", topol[k][0][i]+1 );
3390 reporterr( "len1 = %f\n", len[k][1] );
3391 for( i=0; topol[k][1][i]>-1; i++ ) reporterr( " %03d", topol[k][1][i]+1 );
3392 reporterr( "\n\n" );
3395 fp = fopen( "infile.tree", "w" );
3396 fprintf( fp, "%s\n", treetmp );
3403 free( (void *)tmptmplen ); tmptmplen = NULL;
3404 free( hist ); hist = NULL;
3405 free( (char *)ac ); ac = NULL;
3406 free( (void *)nmemar ); nmemar = NULL;
3410 FreeIntMtx( inconsistent );
3411 FreeIntMtx( inconsistentpairlist );
3415 void makecompositiontable_global( int *table, int *pointt )
3419 while( ( point = *pointt++ ) != END_OF_VEC )
3423 typedef struct _resetnearestthread_arg
3441 #ifdef enablemultithread
3442 pthread_mutex_t *mutex;
3444 } resetnearestthread_arg_t;
3446 static void *msaresetnearestthread( void *arg )
3448 resetnearestthread_arg_t *targ = (resetnearestthread_arg_t *)arg;
3449 // int thread_no = targ->thread_no;
3450 int para = targ->para;
3452 int nseq = targ->nseq;
3453 double **partmtx = targ->partmtx;
3454 double *mindist = targ->mindist;
3455 int *nearest = targ->nearest;
3456 char **seq = targ->seq;
3457 int **skiptable = targ->skiptable;
3458 int *tselfscore = targ->tselfscore;
3459 double *result = targ->result;
3460 int *joblist = targ->joblist;
3461 Bchain **acpt = targ->acpt;
3462 Bchain *ac = targ->ac;
3472 #ifdef enablemultithread
3473 if( para ) pthread_mutex_lock( targ->mutex );
3477 #ifdef enablemultithread
3478 if( para ) pthread_mutex_unlock( targ->mutex );
3480 commonsextet_p( NULL, NULL );
3484 *acpt = (*acpt)->next;
3486 #ifdef enablemultithread
3487 if( para ) pthread_mutex_unlock( targ->mutex );
3490 if( nearest[i] == im )
3492 if( partmtx[im][i] > mindist[i] )
3494 msaresetnearest( nseq, ac, partmtx, mindist+i, nearest+i, i, seq, skiptable, tselfscore, result, joblist );
3500 static void *kmerresetnearestthread( void *arg )
3502 resetnearestthread_arg_t *targ = (resetnearestthread_arg_t *)arg;
3503 // int thread_no = targ->thread_no;
3504 int para = targ->para;
3506 int nseq = targ->nseq;
3507 double **partmtx = targ->partmtx;
3508 double *mindist = targ->mindist;
3509 int *nearest = targ->nearest;
3510 int *tselfscore = targ->tselfscore;
3511 int **pointt = targ->pointt;
3512 int *nlen = targ->nlen;
3513 double *result = targ->result;
3514 int *joblist = targ->joblist;
3515 Bchain **acpt = targ->acpt;
3516 Bchain *ac = targ->ac;
3528 #ifdef enablemultithread
3529 if( para ) pthread_mutex_lock( targ->mutex );
3533 #ifdef enablemultithread
3534 if( para ) pthread_mutex_unlock( targ->mutex );
3536 commonsextet_p( NULL, NULL );
3540 *acpt = (*acpt)->next;
3542 #ifdef enablemultithread
3543 if( para ) pthread_mutex_unlock( targ->mutex );
3546 if( nearest[i] == im )
3548 if( partmtx[im][i] > mindist[i] )
3550 if( pointt ) // kmer
3552 singlettable1 = (int *)calloc( tsize, sizeof( int ) );
3553 makecompositiontable_global( singlettable1, pointt[i] );
3555 kmerresetnearest( nseq, ac, partmtx, mindist+i, nearest+i, i, tselfscore, pointt, nlen, singlettable1, result, joblist );
3556 if( pointt ) free( singlettable1 ); singlettable1 = NULL;// kmer
3557 if( pointt ) commonsextet_p( NULL, NULL );
3564 typedef struct _compactdistarrthread_arg
3585 #ifdef enablemultithread
3586 pthread_mutex_t *mutex;
3588 } compactdistarrthread_arg_t;
3590 static void *verycompactkmerdistarrthreadjoblist( void *arg ) // enablemultithread == 0 demo tsukau
3592 compactdistarrthread_arg_t *targ = (compactdistarrthread_arg_t *)arg;
3593 int njob = targ->njob;
3594 int para = targ->para;
3597 // int thread_no = targ->thread_no;
3598 int *nlen = targ->nlen;
3599 int **pointt = targ->pointt;
3600 int *table1 = targ->table1;
3601 int *table2 = targ->table2;
3602 int *tselfscore = targ->tselfscore;
3603 int *joblist = targ->joblist;
3604 int *posshared = targ->posshared;
3605 double *mindist = targ->mindist;
3606 int *nearest = targ->nearest;
3607 // double **partmtx = targ->partmtx;
3608 double *newarr = targ->newarr;
3609 int i, posinjoblist, n;
3615 // for( acpti=ac; acpti!=NULL; acpti=acpti->next )
3619 #ifdef enablemultithread
3620 if( para ) pthread_mutex_lock( targ->mutex );
3622 if( *posshared >= njob ) // block no toki >=
3624 #ifdef enablemultithread
3625 if( para ) pthread_mutex_unlock( targ->mutex );
3627 commonsextet_p( NULL, NULL );
3630 posinjoblist = *posshared;
3631 *posshared += BLOCKSIZE;
3632 #ifdef enablemultithread
3633 if( para ) pthread_mutex_unlock( targ->mutex );
3636 for( n=0; n<BLOCKSIZE&&posinjoblist<njob; n++ )
3638 i = joblist[posinjoblist++];
3640 if( i == im ) continue;
3641 if( i == jm ) continue;
3643 // if( partmtx[im] )
3644 // tmpdist1 = partmtx[im][i];
3645 // else if( partmtx[i] )
3646 // tmpdist1 = partmtx[i][im];
3648 tmpdist1 = distcompact( nlen[im], nlen[i], table1, pointt[i], tselfscore[im], tselfscore[i] );
3650 // if( partmtx[jm] )
3651 // tmpdist2 = partmtx[jm][i];
3652 // else if( partmtx[i] )
3653 // tmpdist2 = partmtx[i][jm];
3655 tmpdist2 = distcompact( nlen[jm], nlen[i], table2, pointt[i], tselfscore[jm], tselfscore[i] );
3659 // tmpdist1 = distcompact_msa( seq[im], seq[i], skiptable[im], skiptable[i], tselfscore[im], tselfscore[i] );
3660 // tmpdist2 = distcompact_msa( seq[jm], seq[i], skiptable[jm], skiptable[i], tselfscore[jm], tselfscore[i] );
3664 // tmpdist1 = distcompact( nlen[im], nlen[i], table1, pointt[i], tselfscore[im], tselfscore[i] );
3665 // tmpdist2 = distcompact( nlen[jm], nlen[i], table2, pointt[i], tselfscore[jm], tselfscore[i] );
3667 tmpdouble = cluster_mix_double( tmpdist1, tmpdist2 );
3668 newarr[i] = tmpdouble;
3670 // if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i];
3672 if( tmpdouble < mindist[i] )
3674 mindist[i] = tmpdouble;
3678 // if( tmpdouble < mindist[im] ) // koko deha muri
3680 // mindist[im] = tmpdouble;
3684 if( nearest[i] == jm )
3692 static void *kmerdistarrthreadjoblist( void *arg ) // enablemultithread == 0 demo tsukau
3694 compactdistarrthread_arg_t *targ = (compactdistarrthread_arg_t *)arg;
3695 int njob = targ->njob;
3696 int para = targ->para;
3699 // int thread_no = targ->thread_no;
3700 int *nlen = targ->nlen;
3701 int **pointt = targ->pointt;
3702 int *table1 = targ->table1;
3703 int *table2 = targ->table2;
3704 int *tselfscore = targ->tselfscore;
3705 int *joblist = targ->joblist;
3706 int *posshared = targ->posshared;
3707 double *mindist = targ->mindist;
3708 int *nearest = targ->nearest;
3709 double **partmtx = targ->partmtx;
3710 double *newarr = targ->newarr;
3711 int i, posinjoblist, n;
3717 // for( acpti=ac; acpti!=NULL; acpti=acpti->next )
3721 #ifdef enablemultithread
3722 if( para ) pthread_mutex_lock( targ->mutex );
3724 if( *posshared >= njob ) // block no toki >=
3726 #ifdef enablemultithread
3727 if( para ) pthread_mutex_unlock( targ->mutex );
3729 commonsextet_p( NULL, NULL );
3732 posinjoblist = *posshared;
3733 *posshared += BLOCKSIZE;
3734 #ifdef enablemultithread
3735 if( para ) pthread_mutex_unlock( targ->mutex );
3738 for( n=0; n<BLOCKSIZE&&posinjoblist<njob; n++ )
3740 i = joblist[posinjoblist++];
3742 if( i == im ) continue;
3743 if( i == jm ) continue;
3746 tmpdist1 = partmtx[im][i];
3747 else if( partmtx[i] )
3748 tmpdist1 = partmtx[i][im];
3750 tmpdist1 = distcompact( nlen[im], nlen[i], table1, pointt[i], tselfscore[im], tselfscore[i] );
3753 tmpdist2 = partmtx[jm][i];
3754 else if( partmtx[i] )
3755 tmpdist2 = partmtx[i][jm];
3757 tmpdist2 = distcompact( nlen[jm], nlen[i], table2, pointt[i], tselfscore[jm], tselfscore[i] );
3761 // tmpdist1 = distcompact_msa( seq[im], seq[i], skiptable[im], skiptable[i], tselfscore[im], tselfscore[i] );
3762 // tmpdist2 = distcompact_msa( seq[jm], seq[i], skiptable[jm], skiptable[i], tselfscore[jm], tselfscore[i] );
3766 // tmpdist1 = distcompact( nlen[im], nlen[i], table1, pointt[i], tselfscore[im], tselfscore[i] );
3767 // tmpdist2 = distcompact( nlen[jm], nlen[i], table2, pointt[i], tselfscore[jm], tselfscore[i] );
3769 tmpdouble = cluster_mix_double( tmpdist1, tmpdist2 );
3770 newarr[i] = tmpdouble;
3772 if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i];
3774 if( tmpdouble < mindist[i] )
3776 mindist[i] = tmpdouble;
3780 // if( tmpdouble < mindist[im] ) // koko deha muri
3782 // mindist[im] = tmpdouble;
3786 if( nearest[i] == jm )
3794 static void *verycompactmsadistarrthreadjoblist( void *arg ) // enablemultithread == 0 demo tsukau
3796 compactdistarrthread_arg_t *targ = (compactdistarrthread_arg_t *)arg;
3797 int njob = targ->njob;
3798 int para = targ->para;
3801 // int thread_no = targ->thread_no;
3802 int *tselfscore = targ->tselfscore;
3803 char **seq = targ->seq;
3804 int **skiptable = targ->skiptable;
3805 int *joblist = targ->joblist;
3806 int *posshared = targ->posshared;
3807 double *mindist = targ->mindist;
3808 int *nearest = targ->nearest;
3809 // double **partmtx = targ->partmtx;
3810 double *newarr = targ->newarr;
3811 int i, posinjoblist, n;
3817 // for( acpti=ac; acpti!=NULL; acpti=acpti->next )
3822 #ifdef enablemultithread
3823 if( para ) pthread_mutex_lock( targ->mutex );
3825 if( *posshared >= njob ) // block no toki >=
3827 #ifdef enablemultithread
3828 if( para ) pthread_mutex_unlock( targ->mutex );
3830 commonsextet_p( NULL, NULL );
3833 posinjoblist = *posshared;
3834 *posshared += BLOCKSIZE;
3835 #ifdef enablemultithread
3836 if( para ) pthread_mutex_unlock( targ->mutex );
3839 for( n=0; n<BLOCKSIZE&&posinjoblist<njob; n++ )
3841 i = joblist[posinjoblist++];
3843 if( i == im ) continue;
3844 if( i == jm ) continue;
3846 // if( partmtx[im] )
3847 // tmpdist1 = partmtx[im][i];
3848 // else if( partmtx[i] )
3849 // tmpdist1 = partmtx[i][im];
3851 tmpdist1 = distcompact_msa( seq[im], seq[i], skiptable[im], skiptable[i], tselfscore[im], tselfscore[i] );
3853 // if( partmtx[jm] )
3854 // tmpdist2 = partmtx[jm][i];
3855 // else if( partmtx[i] )
3856 // tmpdist2 = partmtx[i][jm];
3858 tmpdist2 = distcompact_msa( seq[jm], seq[i], skiptable[jm], skiptable[i], tselfscore[jm], tselfscore[i] );
3860 tmpdouble = cluster_mix_double( tmpdist1, tmpdist2 );
3861 newarr[i] = tmpdouble;
3863 // if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i];
3865 if( tmpdouble < mindist[i] )
3867 mindist[i] = tmpdouble;
3871 // if( tmpdouble < mindist[im] ) // koko deha muri
3873 // mindist[im] = tmpdouble;
3877 if( nearest[i] == jm )
3885 static void *msadistarrthreadjoblist( void *arg ) // enablemultithread == 0 demo tsukau
3887 compactdistarrthread_arg_t *targ = (compactdistarrthread_arg_t *)arg;
3888 int njob = targ->njob;
3889 int para = targ->para;
3892 // int thread_no = targ->thread_no;
3893 int *tselfscore = targ->tselfscore;
3894 char **seq = targ->seq;
3895 int **skiptable = targ->skiptable;
3896 int *joblist = targ->joblist;
3897 int *posshared = targ->posshared;
3898 double *mindist = targ->mindist;
3899 int *nearest = targ->nearest;
3900 double **partmtx = targ->partmtx;
3901 double *newarr = targ->newarr;
3902 int i, posinjoblist, n;
3908 // for( acpti=ac; acpti!=NULL; acpti=acpti->next )
3913 #ifdef enablemultithread
3914 if( para ) pthread_mutex_lock( targ->mutex );
3916 if( *posshared >= njob ) // block no toki >=
3918 #ifdef enablemultithread
3919 if( para ) pthread_mutex_unlock( targ->mutex );
3921 commonsextet_p( NULL, NULL );
3924 posinjoblist = *posshared;
3925 *posshared += BLOCKSIZE;
3926 #ifdef enablemultithread
3927 if( para ) pthread_mutex_unlock( targ->mutex );
3930 for( n=0; n<BLOCKSIZE&&posinjoblist<njob; n++ )
3932 i = joblist[posinjoblist++];
3934 if( i == im ) continue;
3935 if( i == jm ) continue;
3938 tmpdist1 = partmtx[im][i];
3939 else if( partmtx[i] )
3940 tmpdist1 = partmtx[i][im];
3942 tmpdist1 = distcompact_msa( seq[im], seq[i], skiptable[im], skiptable[i], tselfscore[im], tselfscore[i] );
3945 tmpdist2 = partmtx[jm][i];
3946 else if( partmtx[i] )
3947 tmpdist2 = partmtx[i][jm];
3949 tmpdist2 = distcompact_msa( seq[jm], seq[i], skiptable[jm], skiptable[i], tselfscore[jm], tselfscore[i] );
3951 tmpdouble = cluster_mix_double( tmpdist1, tmpdist2 );
3952 newarr[i] = tmpdouble;
3954 if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i];
3956 if( tmpdouble < mindist[i] )
3958 mindist[i] = tmpdouble;
3962 // if( tmpdouble < mindist[im] ) // koko deha muri
3964 // mindist[im] = tmpdouble;
3968 if( nearest[i] == jm )
3976 void compacttree_memsaveselectable( int nseq, double **partmtx, int *nearest, double *mindist, int **pointt, int *tselfscore, char **seq, int **skiptable, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int treeout, int howcompact, int memsave )
3979 // int miniim, maxiim, minijm, maxijm;
3980 int *intpt, *intpt2;
3981 // double tmpdouble;
3982 // double eff1, eff0;
3983 double *tmptmplen = NULL; //static?
3984 int *hist = NULL; //static?
3985 Bchain *ac = NULL; //static?
3986 int im = -1, jm = -1;
3987 Bchain *acjmnext, *acjmprev;
3990 int *pt1, *pt2, *pt11, *pt22;
3991 int *nmemar; //static?
3994 char **tree; //static?
3995 char *treetmp; //static?
3996 char *nametmp, *nameptr, *tmpptr; //static?
3998 double (*clusterfuncpt[1])(double,double);
4000 int *singlettable1 = NULL;
4001 int *singlettable2 = NULL;
4003 void *(*distarrfunc)( void * );
4004 void *(*resetnearestfunc)( void * );
4007 compactdistarrthread_arg_t *distarrarg;
4008 resetnearestthread_arg_t *resetarg;
4009 int *joblist, nactive, posshared;
4013 sueff1 = 1 - (double)sueff_global;
4014 sueff05 = (double)sueff_global * 0.5;
4015 if ( treemethod == 'X' )
4016 clusterfuncpt[0] = cluster_mix_double;
4019 reporterr( "Unknown treemethod, %c\n", treemethod );
4023 if( howcompact == 2 )
4027 // distarrfunc = verycompactmsadistarrthread;
4028 distarrfunc = verycompactmsadistarrthreadjoblist;
4029 resetnearestfunc = NULL;
4033 // distarrfunc = verycompactkmerdistarrthread;
4034 distarrfunc = verycompactkmerdistarrthreadjoblist;
4035 resetnearestfunc = NULL;
4042 distarrfunc = msadistarrthreadjoblist;
4043 resetnearestfunc = msaresetnearestthread;
4047 distarrfunc = kmerdistarrthreadjoblist;
4048 resetnearestfunc = kmerresetnearestthread;
4051 distarrarg = calloc( MAX( nthread, 1 ), sizeof( compactdistarrthread_arg_t ) );
4052 resetarg = calloc( MAX( nthread, 1 ), sizeof( resetnearestthread_arg_t ) );
4053 joblist = calloc( njob, sizeof( int ) );
4054 if( howcompact != 2 ) result = calloc( njob, sizeof( double ) );
4059 hist = AllocateIntVec( njob );
4060 tmptmplen = AllocateFloatVec( njob );
4061 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
4062 nmemar = AllocateIntVec( njob );
4065 treetmp = NULL; // kentou 2013/06/12
4066 nametmp = AllocateCharVec( 1000 ); // nagasugi
4067 tree = AllocateCharMtx( njob, 0 );
4074 for( i=0; i<nseq; i++ )
4076 for( j=0; j<999; j++ ) nametmp[j] = 0;
4077 for( j=0; j<999; j++ )
4082 else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
4088 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
4090 nameptr = strstr( nametmp, "_numo_e" ) + 8;
4092 nameptr = nametmp + 1;
4094 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
4096 tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
4097 if( tree[i] == NULL )
4099 reporterr( "Cannot allocate tree!\n" );
4102 sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
4106 for( i=0; i<nseq; i++ )
4108 ac[i].next = ac+i+1;
4109 ac[i].prev = ac+i-1;
4112 ac[nseq-1].next = NULL;
4114 // for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
4116 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
4117 for( i=0; i<nseq; i++ )
4123 for( i=0,numfilled=0; i<nseq; i++ ) if( partmtx[i] ) numfilled++;
4125 for( k=0; k<nseq-1; k++ )
4127 if( k % 100 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
4129 // for( i=0,j=0; i<nseq; i++ ) if( partmtx[i] ) j++;
4130 // if( k% 100 == 0 ) reporterr( "numfilled=%d, filledinpartmtx=%d, numempty=%d\n", numfilled, j, nseq-k-numfilled );
4133 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
4136 // printf( "k=%d i=%d, mindist[i]=%f\n", k, i, mindist[i] );
4137 if( mindist[i] < minscore ) // muscle
4140 minscore = mindist[i];
4143 // printf( "minscore=%f\n", minscore );
4145 // printf( "im=%d\n", im );
4146 // printf( "jm=%d\n", jm );
4154 if( partmtx[im] == NULL && howcompact != 2 ) numfilled++;
4155 if( partmtx[jm] != NULL ) numfilled--;
4157 prevnode = hist[im];
4158 if( dep ) dep[k].child0 = prevnode;
4159 nmemim = nmemar[im];
4161 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( 2 ) * sizeof( int ) ); // memsave
4163 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); // memsave
4164 if( prevnode == -1 )
4171 pt1 = topol[prevnode][0];
4172 pt2 = topol[prevnode][1];
4190 for( intpt2=pt11; *intpt2!=-1; )
4191 *intpt++ = *intpt2++;
4192 for( intpt2=pt22; *intpt2!=-1; )
4193 *intpt++ = *intpt2++;
4198 prevnode = hist[jm];
4199 if( dep ) dep[k].child1 = prevnode;
4200 nmemjm = nmemar[jm];
4202 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( 2 ) * sizeof( int ) ); // memsave
4204 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); // memsave
4207 reporterr( "Cannot reallocate topol\n" );
4210 if( prevnode == -1 )
4217 pt1 = topol[prevnode][0];
4218 pt2 = topol[prevnode][1];
4236 for( intpt2=pt11; *intpt2!=-1; )
4237 *intpt++ = *intpt2++;
4238 for( intpt2=pt22; *intpt2!=-1; )
4239 *intpt++ = *intpt2++;
4246 // printf( "minscore = %f, tmptmplen[im] = %f, tmptmplen[jm] = %f\n", minscore, tmptmplen[im], tmptmplen[jm] );
4248 len[k][0] = ( minscore - tmptmplen[im] );
4249 len[k][1] = ( minscore - tmptmplen[jm] );
4251 if( dep ) dep[k].distfromtip = minscore;
4252 // reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore );
4254 tmptmplen[im] = minscore;
4257 nmemar[im] = nmemim + nmemjm;
4258 mindist[im] = 999.9;
4260 if( pointt ) // kmer
4262 singlettable1 = (int *)calloc( tsize, sizeof( int ) );
4263 singlettable2 = (int *)calloc( tsize, sizeof( int ) );
4264 makecompositiontable_global( singlettable1, pointt[im] );
4265 makecompositiontable_global( singlettable2, pointt[jm] );
4268 newarr = calloc( nseq, sizeof( double ) );
4270 // nthreadtree = MAX( 1, nthread );
4271 nthreadtree = nthread;
4274 for( acpti=ac,nactive=0; acpti!=NULL; acpti=acpti->next ) joblist[nactive++] = acpti->pos; // sukoshi muda...
4277 #ifdef enablemultithread
4278 if( nthreadtree > 0 )
4280 compactdistarrthread_arg_t *targ;
4282 pthread_mutex_t mutex;
4285 // targ = calloc( nthreadtree, sizeof( compactdistarrthread_arg_t ) );
4287 handle = calloc( nthreadtree, sizeof( pthread_t ) );
4288 pthread_mutex_init( &mutex, NULL );
4290 if( k % 100 == 0 ) reporterr( " (%d threads, nactive=%d, nfilled=%d) \r", nthreadtree, nactive, numfilled );
4291 for( i=0; i<nthreadtree; i++ )
4294 targ[i].njob = nactive;
4295 // targ[i].thread_no = i;
4298 targ[i].tselfscore = tselfscore;
4299 targ[i].nlen = nlen;
4301 targ[i].skiptable = skiptable;
4302 targ[i].pointt = pointt;
4303 targ[i].table1 = singlettable1;
4304 targ[i].table2 = singlettable2;
4305 targ[i].joblist = joblist;
4306 targ[i].posshared = &posshared;
4307 targ[i].mindist = mindist;
4308 targ[i].nearest = nearest;
4309 targ[i].newarr = newarr;
4310 targ[i].partmtx = partmtx;
4311 targ[i].mutex = &mutex;
4313 pthread_create( handle+i, NULL, distarrfunc, (void *)(targ+i) );
4316 for( j=0; j<nthreadtree; j++ ) pthread_join( handle[j], NULL );
4317 pthread_mutex_destroy( &mutex );
4322 for( acpti=ac; acpti!=NULL; acpti=acpti->next ) // antei sei no tame
4325 if( i != im && i != jm )
4327 // if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i]; // heiretsu demo ii.
4328 // if( newarr[i] < mindist[i] )
4330 // mindist[i] = newarr[i];
4333 if( newarr[i] < mindist[im] )
4335 mindist[im] = newarr[i];
4338 // if( nearest[i] == jm )
4349 if( k % 100 == 0 ) reporterr( " (serial, nactive=%d, nfilled=%d) \r", nactive, numfilled );
4350 compactdistarrthread_arg_t *targ;
4353 // targ = calloc( 1, sizeof( compactdistarrthread_arg_t ) );
4356 for( i=0; i<1; i++ )
4359 targ[i].njob = nactive;
4360 // targ[i].thread_no = i;
4363 targ[i].tselfscore = tselfscore;
4364 targ[i].nlen = nlen;
4366 targ[i].skiptable = skiptable;
4367 targ[i].pointt = pointt;
4368 targ[i].table1 = singlettable1;
4369 targ[i].table2 = singlettable2;
4370 targ[i].joblist = joblist;
4371 targ[i].posshared = &posshared;
4372 targ[i].mindist = mindist;
4373 targ[i].nearest = nearest;
4374 targ[i].newarr = newarr;
4375 targ[i].partmtx = partmtx;
4377 distarrfunc( targ+i );
4378 // pthread_create( handle, NULL, distarrfunc, (void *)(targ) );
4385 for( acpti=ac; acpti!=NULL; acpti=acpti->next ) // antei sei no tame
4388 if( i != im && i != jm )
4390 // if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i]; // heiretsu demo ii.
4391 // if( newarr[i] < mindist[i] )
4393 // mindist[i] = newarr[i];
4396 if( newarr[i] < mindist[im] )
4398 mindist[im] = newarr[i];
4401 // if( nearest[i] == jm )
4408 // printf( "im=%d, jm=%d\n", im, jm );
4410 printf( "matrix = \n" );
4411 for( i=0; i<njob; i++ )
4413 if( partmtx[i] ) for( j=0; j<njob; j++ ) printf( "%f ", partmtx[i][j] );
4414 else printf( "nai" );
4421 // reporterr( "at step %d,", k );
4425 if( partmtx[im] ) free( partmtx[im] ); partmtx[im] = NULL;
4426 if( partmtx[jm] ) free( partmtx[jm] ); partmtx[jm] = NULL;
4427 if( howcompact == 2 )
4434 partmtx[im] = newarr;
4440 free( singlettable1 );
4441 free( singlettable2 );
4442 singlettable1 = NULL;
4443 singlettable2 = NULL;
4448 treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
4451 reporterr( "Cannot allocate treetmp\n" );
4454 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
4457 tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
4459 if( tree[im] == NULL )
4461 reporterr( "Cannot reallocate tree!\n" );
4464 strcpy( tree[im], treetmp );
4467 acjmprev = ac[jm].prev;
4468 acjmnext = ac[jm].next;
4469 acjmprev->next = acjmnext;
4470 if( acjmnext != NULL )
4471 acjmnext->prev = acjmprev;
4473 #if 0 // muscle seems to miss this.
4475 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
4478 // printf( "reset nearest? i=%d, k=%d, nearest[i]=%d, im=%d, mindist=%f\n", i, k, nearest[i], im, mindist[i] );
4479 if( nearest[i] == im )
4481 // printf( "reset nearest, i=%d, k=%d\n", i, k );
4482 if( partmtx[im][i] > mindist[i] )
4485 // printf( "go\n" );
4486 if( pointt ) // kmer
4488 singlettable1 = (int *)calloc( tsize, sizeof( int ) );
4489 makecompositiontable_global( singlettable1, pointt[i] );
4491 resetnearest( nseq, ac, partmtx, mindist+i, nearest+i, i, seq, skiptable, tselfscore, pointt, nlen, singlettable1 );
4492 if( pointt ) free( singlettable1 ); singlettable1 = NULL;// kmer
4493 if( pointt ) commonsextet_p( NULL, NULL );
4497 // reporterr( "nwork = %d\n", nwork );
4500 if( howcompact == 2 ) continue;
4503 if( 0 && nthreadtree > 0 )
4505 resetnearestthread_arg_t *targ;
4507 pthread_mutex_t mutex;
4511 // targ = calloc( nthreadtree, sizeof( resetnearestthread_arg_t ) );
4513 handle = calloc( nthreadtree, sizeof( pthread_t ) );
4514 pthread_mutex_init( &mutex, NULL );
4516 for( i=0; i<nthreadtree; i++ )
4519 targ[i].nseq = nseq;
4521 targ[i].partmtx = partmtx;
4522 targ[i].mindist = mindist;
4523 targ[i].nearest = nearest;
4525 targ[i].skiptable = skiptable;
4526 targ[i].tselfscore = tselfscore;
4527 targ[i].pointt = pointt;
4528 targ[i].nlen = nlen;
4529 targ[i].acpt = &acshared;
4531 targ[i].mutex = &mutex;
4533 pthread_create( handle+i, NULL, resetnearestfunc, (void *)(targ+i) );
4536 for( j=0; j<nthreadtree; j++ ) pthread_join( handle[j], NULL );
4537 pthread_mutex_destroy( &mutex );
4546 resetnearestthread_arg_t *targ;
4547 // targ = calloc( 1, sizeof( resetnearestthread_arg_t ) );
4551 targ[0].nseq = nseq;
4553 targ[0].partmtx = partmtx;
4554 targ[0].mindist = mindist;
4555 targ[0].nearest = nearest;
4557 targ[0].skiptable = skiptable;
4558 targ[0].tselfscore = tselfscore;
4559 targ[0].pointt = pointt;
4560 targ[0].nlen = nlen;
4561 targ[0].result = result;
4562 targ[0].joblist = joblist;
4563 targ[0].acpt = &acshared;
4566 resetnearestfunc( targ );
4574 printf( "\nooSTEP-%03d:\n", k+1 );
4575 printf( "len0 = %f\n", len[k][0] );
4576 for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i]+1 );
4578 printf( "len1 = %f\n", len[k][1] );
4579 for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i]+1 );
4585 fp = fopen( "infile.tree", "w" );
4586 fprintf( fp, "%s\n", treetmp );
4590 for( im=0; im<nseq; im++ ) // im wo ugokasu hituyouha nai.
4592 if( partmtx[im] ) free( partmtx[im] ); partmtx[im] = NULL;
4594 // if( partmtx ) free( partmtx ); partmtx = NULL; // oya ga free
4602 free( (void *)tmptmplen ); tmptmplen = NULL;
4603 free( hist ); hist = NULL;
4604 free( (char *)ac ); ac = NULL;
4605 free( (void *)nmemar ); nmemar = NULL;
4606 if( singlettable1 ) free( singlettable1 );
4607 if( singlettable2 ) free( singlettable2 );
4611 if( result ) free( result );
4614 void fixed_musclesupg_double_realloc_nobk_halfmtx_treeout_memsave( int nseq, double **eff, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int efffree )
4617 int i, j, k, miniim, maxiim, minijm, maxijm;
4621 double *tmptmplen = NULL; //static?
4622 int *hist = NULL; //static?
4623 Bchain *ac = NULL; //static?
4624 int im = 1, jm = -1;
4625 Bchain *acjmnext, *acjmprev;
4628 int *pt1, *pt2, *pt11;
4629 int *nmemar; //static?
4632 int *nearest = NULL; // by D.Mathog, a guess
4633 double *mindisfrom = NULL; // by D.Mathog, a guess
4634 char **tree; //static?
4635 char *treetmp; //static?
4636 char *nametmp, *nameptr, *tmpptr; //static?
4638 double (*clusterfuncpt[1])(double,double);
4642 sueff1 = 1 - (double)sueff_global;
4643 sueff05 = (double)sueff_global * 0.5;
4644 if ( treemethod == 'X' )
4645 clusterfuncpt[0] = cluster_mix_double;
4646 else if ( treemethod == 'E' )
4647 clusterfuncpt[0] = cluster_average_double;
4648 else if ( treemethod == 'q' )
4649 clusterfuncpt[0] = cluster_minimum_double;
4652 reporterr( "Unknown treemethod, %c\n", treemethod );
4658 hist = AllocateIntVec( njob );
4659 tmptmplen = AllocateFloatVec( njob );
4660 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
4661 nmemar = AllocateIntVec( njob );
4662 mindisfrom = AllocateFloatVec( njob );
4663 nearest = AllocateIntVec( njob );
4664 // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi?
4665 treetmp = NULL; // kentou 2013/06/12
4666 nametmp = AllocateCharVec( 1000 ); // nagasugi
4667 // tree = AllocateCharMtx( njob, njob*600 );
4668 tree = AllocateCharMtx( njob, 0 );
4672 for( i=0; i<nseq; i++ )
4674 for( j=0; j<999; j++ ) nametmp[j] = 0;
4675 for( j=0; j<999; j++ )
4680 else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
4686 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
4688 nameptr = strstr( nametmp, "_numo_e" ) + 8;
4690 nameptr = nametmp + 1;
4692 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
4694 tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
4695 if( tree[i] == NULL )
4697 reporterr( "Cannot allocate tree!\n" );
4700 sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
4702 for( i=0; i<nseq; i++ )
4704 ac[i].next = ac+i+1;
4705 ac[i].prev = ac+i-1;
4708 ac[nseq-1].next = NULL;
4710 for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
4713 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
4714 for( i=0; i<nseq; i++ )
4721 for( k=0; k<nseq-1; k++ )
4723 if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
4726 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
4729 // printf( "k=%d i=%d, mindist[i]=%f\n", k, i, mindisfrom[i] );
4730 if( mindisfrom[i] < minscore ) // muscle
4733 minscore = mindisfrom[i];
4737 // printf( "minscore=%f\n", minscore );
4739 // printf( "im=%d\n", im );
4740 // printf( "jm=%d\n", jm );
4747 prevnode = hist[im];
4748 if( dep ) dep[k].child0 = prevnode;
4749 nmemim = nmemar[im];
4750 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( 2 ) * sizeof( int ) ); // memsave
4751 if( prevnode == -1 )
4758 pt1 = topol[prevnode][0];
4759 pt2 = topol[prevnode][1];
4774 for( intpt2=pt11; *intpt2!=-1; )
4775 *intpt++ = *intpt2++;
4776 for( intpt2=pt22; *intpt2!=-1; )
4777 *intpt++ = *intpt2++;
4782 prevnode = hist[jm];
4783 if( dep ) dep[k].child1 = prevnode;
4784 nmemjm = nmemar[jm];
4785 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( 2 ) * sizeof( int ) ); // memsave
4788 reporterr( "Cannot reallocate topol\n" );
4791 if( prevnode == -1 )
4798 pt1 = topol[prevnode][0];
4799 pt2 = topol[prevnode][1];
4814 for( intpt2=pt11; *intpt2!=-1; )
4815 *intpt++ = *intpt2++;
4816 for( intpt2=pt22; *intpt2!=-1; )
4817 *intpt++ = *intpt2++;
4824 // printf( "minscore = %f, tmptmplen[im] = %f, tmptmplen[jm] = %f\n", minscore, tmptmplen[im], tmptmplen[jm] );
4825 len[k][0] = ( minscore - tmptmplen[im] );
4826 len[k][1] = ( minscore - tmptmplen[jm] );
4828 if( dep ) dep[k].distfromtip = minscore;
4829 // reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore );
4831 tmptmplen[im] = minscore;
4834 nmemar[im] = nmemim + nmemjm;
4836 mindisfrom[im] = 999.9;
4837 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
4840 if( i != im && i != jm )
4863 eff0 = eff[miniim][maxiim-miniim];
4864 eff1 = eff[minijm][maxijm-minijm];
4866 tmpdouble = eff[miniim][maxiim-miniim] =
4867 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
4869 tmpdouble = eff[miniim][maxiim-miniim] =
4870 (clusterfuncpt[0])( eff0, eff1 );
4871 // printf( "tmpdouble=%f, eff0=%f, eff1=%f\n", tmpdouble, eff0, eff1 );
4873 if( tmpdouble < mindisfrom[i] )
4875 mindisfrom[i] = tmpdouble;
4878 if( tmpdouble < mindisfrom[im] )
4880 mindisfrom[im] = tmpdouble;
4883 if( nearest[i] == jm )
4889 // printf( "im=%d, jm=%d\n", im, jm );
4891 printf( "matrix = \n" );
4892 for( i=0; i<njob; i++ )
4894 for( j=0; j<njob; j++ )
4906 printf( "%f ", eff[minijm][maxijm-minijm] );
4912 treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
4915 reporterr( "Cannot allocate treetmp\n" );
4918 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
4921 tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
4923 if( tree[im] == NULL )
4925 reporterr( "Cannot reallocate tree!\n" );
4928 strcpy( tree[im], treetmp );
4930 acjmprev = ac[jm].prev;
4931 acjmnext = ac[jm].next;
4932 acjmprev->next = acjmnext;
4933 if( acjmnext != NULL )
4934 acjmnext->prev = acjmprev;
4937 free( (void *)eff[jm] ); eff[jm] = NULL; // Ato de fukkatsu
4940 #if 1 // muscle seems to miss this.
4941 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
4944 // printf( "reset nearest? i=%d, k=%d, nearest[i]=%d, im=%d, mindist=%f\n", i, k, nearest[i], im, mindisfrom[i] );
4945 if( nearest[i] == im )
4947 // printf( "reset nearest, i=%d, k=%d\n", i, k );
4958 if( eff[miniim][maxiim-miniim] > mindisfrom[i] )
4960 // printf( "go\n" );
4961 setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
4966 reporterr( "CHUUI!\n" );
4971 printf( "\nooSTEP-%03d:\n", k+1 );
4972 printf( "len0 = %f\n", len[k][0] );
4973 for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i]+1 );
4975 printf( "len1 = %f\n", len[k][1] );
4976 for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i]+1 );
4980 fp = fopen( "infile.tree", "w" );
4981 fprintf( fp, "%s\n", treetmp );
4988 free( (void *)tmptmplen ); tmptmplen = NULL;
4989 free( hist ); hist = NULL;
4990 free( (char *)ac ); ac = NULL;
4991 free( (void *)nmemar ); nmemar = NULL;
4996 void fixed_musclesupg_double_realloc_nobk_halfmtx_treeout( int nseq, double **eff, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int efffree )
4998 int i, j, k, miniim, maxiim, minijm, maxijm;
4999 int *intpt, *intpt2;
5002 double *tmptmplen = NULL; //static?
5003 int *hist = NULL; //static?
5004 Bchain *ac = NULL; //static?
5005 int im = -1, jm = -1;
5006 Bchain *acjmnext, *acjmprev;
5009 int *pt1, *pt2, *pt11, *pt22;
5010 int *nmemar; //static?
5013 int *nearest = NULL; // by D.Mathog, a guess
5014 double *mindisfrom = NULL; // by D.Mathog, a guess
5015 char **tree; //static?
5016 char *treetmp; //static?
5017 char *nametmp, *nameptr, *tmpptr; //static?
5019 double (*clusterfuncpt[1])(double,double);
5023 sueff1 = 1 - (double)sueff_global;
5024 sueff05 = (double)sueff_global * 0.5;
5025 if ( treemethod == 'X' )
5026 clusterfuncpt[0] = cluster_mix_double;
5027 else if ( treemethod == 'E' )
5028 clusterfuncpt[0] = cluster_average_double;
5029 else if ( treemethod == 'q' )
5030 clusterfuncpt[0] = cluster_minimum_double;
5033 reporterr( "Unknown treemethod, %c\n", treemethod );
5039 hist = AllocateIntVec( njob );
5040 tmptmplen = AllocateFloatVec( njob );
5041 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
5042 nmemar = AllocateIntVec( njob );
5043 mindisfrom = AllocateFloatVec( njob );
5044 nearest = AllocateIntVec( njob );
5045 // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi?
5046 treetmp = NULL; // kentou 2013/06/12
5047 nametmp = AllocateCharVec( 1000 ); // nagasugi
5048 // tree = AllocateCharMtx( njob, njob*600 );
5049 tree = AllocateCharMtx( njob, 0 );
5053 for( i=0; i<nseq; i++ )
5055 for( j=0; j<999; j++ ) nametmp[j] = 0;
5056 for( j=0; j<999; j++ )
5061 else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
5067 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
5069 nameptr = strstr( nametmp, "_numo_e" ) + 8;
5071 nameptr = nametmp + 1;
5073 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
5075 tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
5076 if( tree[i] == NULL )
5078 reporterr( "Cannot allocate tree!\n" );
5081 sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
5083 for( i=0; i<nseq; i++ )
5085 ac[i].next = ac+i+1;
5086 ac[i].prev = ac+i-1;
5089 ac[nseq-1].next = NULL;
5091 for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
5093 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
5094 for( i=0; i<nseq; i++ )
5101 for( k=0; k<nseq-1; k++ )
5103 if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
5106 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
5109 // reporterr( "k=%d i=%d\n", k, i );
5110 if( mindisfrom[i] < minscore ) // muscle
5113 minscore = mindisfrom[i];
5123 prevnode = hist[im];
5124 if( dep ) dep[k].child0 = prevnode;
5125 nmemim = nmemar[im];
5126 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
5127 if( prevnode == -1 )
5134 pt1 = topol[prevnode][0];
5135 pt2 = topol[prevnode][1];
5146 for( intpt2=pt11; *intpt2!=-1; )
5147 *intpt++ = *intpt2++;
5148 for( intpt2=pt22; *intpt2!=-1; )
5149 *intpt++ = *intpt2++;
5153 prevnode = hist[jm];
5154 if( dep ) dep[k].child1 = prevnode;
5155 nmemjm = nmemar[jm];
5156 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
5159 reporterr( "Cannot reallocate topol\n" );
5162 if( prevnode == -1 )
5169 pt1 = topol[prevnode][0];
5170 pt2 = topol[prevnode][1];
5181 for( intpt2=pt11; *intpt2!=-1; )
5182 *intpt++ = *intpt2++;
5183 for( intpt2=pt22; *intpt2!=-1; )
5184 *intpt++ = *intpt2++;
5190 len[k][0] = ( minscore - tmptmplen[im] );
5191 len[k][1] = ( minscore - tmptmplen[jm] );
5193 if( dep ) dep[k].distfromtip = minscore;
5194 // reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore );
5196 tmptmplen[im] = minscore;
5199 nmemar[im] = nmemim + nmemjm;
5201 mindisfrom[im] = 999.9;
5202 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
5205 if( i != im && i != jm )
5228 eff0 = eff[miniim][maxiim-miniim];
5229 eff1 = eff[minijm][maxijm-minijm];
5231 tmpdouble = eff[miniim][maxiim-miniim] =
5232 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
5234 tmpdouble = eff[miniim][maxiim-miniim] =
5235 (clusterfuncpt[0])( eff0, eff1 );
5239 if( tmpdouble < mindisfrom[i] )
5241 mindisfrom[i] = tmpdouble;
5244 if( tmpdouble < mindisfrom[im] )
5246 mindisfrom[im] = tmpdouble;
5249 if( nearest[i] == jm )
5256 treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
5259 reporterr( "Cannot allocate treetmp\n" );
5262 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
5265 tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
5267 if( tree[im] == NULL )
5269 reporterr( "Cannot reallocate tree!\n" );
5272 strcpy( tree[im], treetmp );
5274 acjmprev = ac[jm].prev;
5275 acjmnext = ac[jm].next;
5276 acjmprev->next = acjmnext;
5277 if( acjmnext != NULL )
5278 acjmnext->prev = acjmprev;
5281 free( (void *)eff[jm] ); eff[jm] = NULL;
5284 #if 1 // muscle seems to miss this.
5285 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
5288 if( nearest[i] == im )
5300 if( eff[miniim][maxiim-miniim] > mindisfrom[i] )
5301 setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
5305 reporterr( "chuui!\n" );
5310 printf( "\nooSTEP-%03d:\n", k+1 );
5311 printf( "len0 = %f\n", len[k][0] );
5312 for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i]+1 );
5314 printf( "len1 = %f\n", len[k][1] );
5315 for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i]+1 );
5319 fp = fopen( "infile.tree", "w" );
5320 fprintf( fp, "%s\n", treetmp );
5327 free( (void *)tmptmplen ); tmptmplen = NULL;
5328 free( hist ); hist = NULL;
5329 free( (char *)ac ); ac = NULL;
5330 free( (void *)nmemar ); nmemar = NULL;
5335 void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, double **len, char **name )
5337 int i, j, k, miniim, maxiim, minijm, maxijm;
5338 int *intpt, *intpt2;
5341 static double *tmptmplen = NULL;
5342 static int *hist = NULL;
5343 static Bchain *ac = NULL;
5344 int im = -1, jm = -1;
5345 Bchain *acjmnext, *acjmprev;
5348 int *pt1, *pt2, *pt11, *pt22;
5352 int *nearest = NULL; // by D.Mathog, a guess
5353 double *mindisfrom = NULL; // by D.Mathog, a guess
5355 static char *treetmp;
5356 static char *nametmp, *nameptr, *tmpptr;
5358 double (*clusterfuncpt[1])(double,double);
5362 sueff1 = 1.0 - sueff_global;
5363 sueff05 = sueff_global * 0.5;
5364 if ( treemethod == 'X' )
5365 clusterfuncpt[0] = cluster_mix_double;
5366 else if ( treemethod == 'E' )
5367 clusterfuncpt[0] = cluster_average_double;
5368 else if ( treemethod == 'q' )
5369 clusterfuncpt[0] = cluster_minimum_double;
5372 reporterr( "Unknown treemethod, %c\n", treemethod );
5383 hist = AllocateIntVec( njob );
5384 tmptmplen = AllocateDoubleVec( njob );
5385 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
5386 nmemar = AllocateIntVec( njob );
5387 mindisfrom = AllocateDoubleVec( njob );
5388 nearest = AllocateIntVec( njob );
5389 treetmp = AllocateCharVec( njob*150 );
5390 nametmp = AllocateCharVec( 91 );
5391 tree = AllocateCharMtx( njob, njob*150 );
5393 for( i=0; i<nseq; i++ )
5395 for( j=0; j<90; j++ ) nametmp[j] = 0;
5396 for( j=0; j<90; j++ )
5398 if( name[i][j] == 0 )
5400 else if( isalnum( name[i][j] ) )
5401 nametmp[j] = name[i][j];
5406 // sprintf( tree[i], "%d_%.60s", i+1, nametmp+1 );
5408 nameptr = strstr( nametmp, "_numo_e" ) + 8;
5410 nameptr = nametmp + 1;
5412 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
5414 sprintf( tree[i], "\n%d_%.60s\n", i+1, nameptr );
5421 hist = AllocateIntVec( njob );
5422 tmptmplen = AllocateDoubleVec( njob );
5423 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
5424 nmemar = AllocateIntVec( njob );
5425 mindisfrom = AllocateDoubleVec( njob );
5426 nearest = AllocateIntVec( njob );
5427 // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi?
5428 treetmp = NULL; // kentou 2013/06/12
5429 nametmp = AllocateCharVec( 1000 ); // nagasugi
5430 // tree = AllocateCharMtx( njob, njob*600 );
5431 tree = AllocateCharMtx( njob, 0 );
5435 for( i=0; i<nseq; i++ )
5437 for( j=0; j<999; j++ ) nametmp[j] = 0;
5438 for( j=0; j<999; j++ )
5443 else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
5449 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
5451 nameptr = strstr( nametmp, "_numo_e" ) + 8;
5453 nameptr = nametmp + 1;
5455 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
5457 tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
5458 if( tree[i] == NULL )
5460 reporterr( "Cannot allocate tree!\n" );
5463 sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
5475 for( i=0; i<nseq; i++ )
5477 ac[i].next = ac+i+1;
5478 ac[i].prev = ac+i-1;
5481 ac[nseq-1].next = NULL;
5483 for( i=0; i<nseq; i++ ) setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
5485 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
5486 for( i=0; i<nseq; i++ )
5493 for( k=0; k<nseq-1; k++ )
5495 if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
5498 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
5501 // reporterr( "k=%d i=%d\n", k, i );
5502 if( mindisfrom[i] < minscore ) // muscle
5505 minscore = mindisfrom[i];
5515 prevnode = hist[im];
5516 nmemim = nmemar[im];
5517 // intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
5518 intpt = topol[k][0];
5519 if( prevnode == -1 )
5526 pt1 = topol[prevnode][0];
5527 pt2 = topol[prevnode][1];
5538 for( intpt2=pt11; *intpt2!=-1; )
5539 *intpt++ = *intpt2++;
5540 for( intpt2=pt22; *intpt2!=-1; )
5541 *intpt++ = *intpt2++;
5545 prevnode = hist[jm];
5546 nmemjm = nmemar[jm];
5547 // intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
5548 intpt = topol[k][1];
5549 if( prevnode == -1 )
5556 pt1 = topol[prevnode][0];
5557 pt2 = topol[prevnode][1];
5568 for( intpt2=pt11; *intpt2!=-1; )
5569 *intpt++ = *intpt2++;
5570 for( intpt2=pt22; *intpt2!=-1; )
5571 *intpt++ = *intpt2++;
5577 len[k][0] = ( minscore - tmptmplen[im] );
5578 len[k][1] = ( minscore - tmptmplen[jm] );
5581 tmptmplen[im] = minscore;
5584 nmemar[im] = nmemim + nmemjm;
5586 mindisfrom[im] = 999.9;
5587 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
5590 if( i != im && i != jm )
5613 eff0 = eff[miniim][maxiim];
5614 eff1 = eff[minijm][maxijm];
5616 tmpdouble = eff[miniim][maxiim] =
5617 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
5619 tmpdouble = eff[miniim][maxiim] =
5620 (clusterfuncpt[0])( eff0, eff1 );
5622 if( tmpdouble < mindisfrom[i] )
5624 mindisfrom[i] = tmpdouble;
5627 if( tmpdouble < mindisfrom[im] )
5629 mindisfrom[im] = tmpdouble;
5632 if( nearest[i] == jm )
5639 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
5640 strcpy( tree[im], treetmp );
5642 treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
5645 reporterr( "Cannot allocate treetmp\n" );
5648 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
5651 tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
5653 if( tree[im] == NULL )
5655 reporterr( "Cannot reallocate tree!\n" );
5658 strcpy( tree[im], treetmp );
5661 acjmprev = ac[jm].prev;
5662 acjmnext = ac[jm].next;
5663 acjmprev->next = acjmnext;
5664 if( acjmnext != NULL )
5665 acjmnext->prev = acjmprev;
5666 // free( (void *)eff[jm] ); eff[jm] = NULL;
5668 #if 1 // muscle seems to miss this.
5669 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
5672 if( nearest[i] == im )
5684 if( eff[miniim][maxiim] > mindisfrom[i] )
5685 setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i );
5692 fprintf( stdout, "\nvSTEP-%03d:\n", k+1 );
5693 fprintf( stdout, "len0 = %f\n", len[k][0] );
5694 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
5695 fprintf( stdout, "\n" );
5696 fprintf( stdout, "len1 = %f\n", len[k][1] );
5697 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
5698 fprintf( stdout, "\n" );
5701 fp = fopen( "infile.tree", "w" );
5702 fprintf( fp, "%s\n", treetmp );
5705 FreeCharMtx( tree );
5712 free( (void *)tmptmplen ); tmptmplen = NULL;
5713 free( hist ); hist = NULL;
5714 free( (char *)ac ); ac = NULL;
5715 free( (void *)nmemar ); nmemar = NULL;
5720 void fixed_supg_double_treeout_constrained( int nseq, double **eff, int ***topol, double **len, char **name, int ngroup, int **groups )
5722 int i, j, k, miniim, maxiim, minijm, maxijm;
5723 int *intpt, *intpt2;
5726 static double *tmptmplen = NULL;
5727 static int *hist = NULL;
5728 static Bchain *ac = NULL;
5729 int im = -1, jm = -1;
5730 Bchain *acjmnext, *acjmprev;
5732 Bchain *acpti, *acptj;
5733 int *pt1, *pt2, *pt11, *pt22;
5737 int *nearest = NULL; // by D.Mathog, a guess
5738 double *mindisfrom = NULL; // by D.Mathog, a guess
5740 static char *treetmp;
5741 static char *nametmp, *nameptr, *tmpptr;
5743 double (*clusterfuncpt[1])(double,double);
5745 int *testtopol, **inconsistent;
5746 int **inconsistentpairlist;
5747 int ninconsistentpairs;
5749 int allinconsistent;
5752 increaseintergroupdistancesfullmtx( eff, ngroup, groups, nseq );
5754 sueff1 = 1 - sueff_global;
5755 sueff05 = sueff_global * 0.5;
5756 if ( treemethod == 'X' )
5757 clusterfuncpt[0] = cluster_mix_double;
5758 else if ( treemethod == 'E' )
5759 clusterfuncpt[0] = cluster_average_double;
5760 else if ( treemethod == 'q' )
5761 clusterfuncpt[0] = cluster_minimum_double;
5764 reporterr( "Unknown treemethod, %c\n", treemethod );
5775 hist = AllocateIntVec( njob );
5776 tmptmplen = AllocateDoubleVec( njob );
5777 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
5778 nmemar = AllocateIntVec( njob );
5779 mindisfrom = AllocateDoubleVec( njob );
5780 nearest = AllocateIntVec( njob );
5781 treetmp = AllocateCharVec( njob*150 );
5782 nametmp = AllocateCharVec( 91 );
5783 tree = AllocateCharMtx( njob, njob*150 );
5785 for( i=0; i<nseq; i++ )
5787 for( j=0; j<90; j++ ) nametmp[j] = 0;
5788 for( j=0; j<90; j++ )
5790 if( name[i][j] == 0 )
5792 else if( isalnum( name[i][j] ) )
5793 nametmp[j] = name[i][j];
5798 // sprintf( tree[i], "%d_%.60s", i+1, nametmp+1 );
5800 nameptr = strstr( nametmp, "_numo_e" ) + 8;
5802 nameptr = nametmp + 1;
5804 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
5806 sprintf( tree[i], "\n%d_%.60s\n", i+1, nameptr );
5813 hist = AllocateIntVec( njob );
5814 tmptmplen = AllocateDoubleVec( njob );
5815 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
5816 nmemar = AllocateIntVec( njob );
5817 mindisfrom = AllocateDoubleVec( njob );
5818 nearest = AllocateIntVec( njob );
5819 // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi?
5820 treetmp = NULL; // kentou 2013/06/12
5821 nametmp = AllocateCharVec( 1000 ); // nagasugi
5822 // tree = AllocateCharMtx( njob, njob*600 );
5823 tree = AllocateCharMtx( njob, 0 );
5824 testtopol = AllocateIntVec( njob + 1 );
5825 inconsistent = AllocateIntMtx( njob, njob ); // muda
5826 // inconsistentpairlist = AllocateIntMtx( njob*(njob-1)/2+1, 2 ); // muda
5827 inconsistentpairlist = AllocateIntMtx( 1, 2 );
5828 warned = AllocateIntVec( ngroup );
5832 for( i=0; i<nseq; i++ )
5834 for( j=0; j<999; j++ ) nametmp[j] = 0;
5835 for( j=0; j<999; j++ )
5840 else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
5846 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
5848 nameptr = strstr( nametmp, "_numo_e" ) + 8;
5850 nameptr = nametmp + 1;
5852 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
5854 tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
5855 if( tree[i] == NULL )
5857 reporterr( "Cannot allocate tree!\n" );
5860 sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
5872 for( i=0; i<nseq; i++ )
5874 ac[i].next = ac+i+1;
5875 ac[i].prev = ac+i-1;
5878 ac[nseq-1].next = NULL;
5880 for( i=0; i<nseq; i++ ) setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
5882 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
5883 for( i=0; i<nseq; i++ )
5890 ninconsistentpairs = 0;
5891 for( k=0; k<nseq-1; k++ )
5893 if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
5897 // for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) inconsistent[acpti->pos][acptj->pos] = 0;
5898 for( i=0; i<ninconsistentpairs; i++ ) inconsistent[inconsistentpairlist[i][0]][inconsistentpairlist[i][1]] = 0;
5899 ninconsistentpairs = 0;
5907 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
5910 // reporterr( "k=%d i=%d\n", k, i );
5911 if( mindisfrom[i] < minscore ) // muscle
5914 minscore = mindisfrom[i];
5926 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
5929 // reporterr( "k=%d i=%d\n", k, i );
5930 for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next )
5933 if( !inconsistent[i][j] && (tmpdouble=eff[i][j]) < minscore )
5935 minscore = tmpdouble;
5939 for( acptj=ac; (acptj&&acptj->pos!=i); acptj=acptj->next )
5942 if( !inconsistent[j][i] && (tmpdouble=eff[j][i]) < minscore )
5944 minscore = tmpdouble;
5951 allinconsistent = 1;
5952 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
5954 for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next )
5956 if( inconsistent[acpti->pos][acptj->pos] == 0 )
5958 allinconsistent = 0;
5965 if( allinconsistent )
5967 reporterr( "\n\n\nPlease check whether the grouping is possible.\n\n\n" );
5972 prevnode = hist[im];
5973 if( prevnode == -1 )
5979 for( intpt2=topol[prevnode][0]; *intpt2!=-1; )
5980 *intpt++ = *intpt2++;
5981 for( intpt2=topol[prevnode][1]; *intpt2!=-1; )
5982 *intpt++ = *intpt2++;
5985 prevnode = hist[jm];
5986 if( prevnode == -1 )
5992 for( intpt2=topol[prevnode][0]; *intpt2!=-1; )
5993 *intpt++ = *intpt2++;
5994 for( intpt2=topol[prevnode][1]; *intpt2!=-1; )
5995 *intpt++ = *intpt2++;
5998 // reporterr( "testtopol = \n" );
5999 // for( i=0; testtopol[i]>-1; i++ ) reporterr( " %03d", testtopol[i]+1 );
6000 // reporterr( "\n" );
6002 for( i=0; i<ngroup; i++ )
6004 // reporterr( "groups[%d] = \n", i );
6005 // for( j=0; groups[i][j]>-1; j++ ) reporterr( " %03d", groups[i][j]+1 );
6006 // reporterr( "\n" );
6007 if( overlapmember( testtopol, groups[i] ) )
6009 if( !includemember( testtopol, groups[i] ) && !includemember( groups[i], testtopol ) )
6014 reporterr( "\n###################################################################\n" );
6015 reporterr( "# WARNING: Group %d is forced to be a monophyletic cluster.\n", i+1 );
6016 reporterr( "###################################################################\n" );
6018 inconsistent[im][jm] = 1;
6020 inconsistentpairlist = realloc( inconsistentpairlist, (ninconsistentpairs+1)*sizeof( int * ) );
6021 inconsistentpairlist[ninconsistentpairs] = malloc( sizeof( int ) * 2 );
6022 inconsistentpairlist[ninconsistentpairs][0] = im;
6023 inconsistentpairlist[ninconsistentpairs][1] = jm;
6024 ninconsistentpairs++;
6031 // reporterr( "OK\n" );
6041 prevnode = hist[im];
6042 nmemim = nmemar[im];
6043 // intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
6044 intpt = topol[k][0];
6045 if( prevnode == -1 )
6052 pt1 = topol[prevnode][0];
6053 pt2 = topol[prevnode][1];
6064 for( intpt2=pt11; *intpt2!=-1; )
6065 *intpt++ = *intpt2++;
6066 for( intpt2=pt22; *intpt2!=-1; )
6067 *intpt++ = *intpt2++;
6071 prevnode = hist[jm];
6072 nmemjm = nmemar[jm];
6073 // intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
6074 intpt = topol[k][1];
6075 if( prevnode == -1 )
6082 pt1 = topol[prevnode][0];
6083 pt2 = topol[prevnode][1];
6094 for( intpt2=pt11; *intpt2!=-1; )
6095 *intpt++ = *intpt2++;
6096 for( intpt2=pt22; *intpt2!=-1; )
6097 *intpt++ = *intpt2++;
6103 len[k][0] = ( minscore - tmptmplen[im] );
6104 len[k][1] = ( minscore - tmptmplen[jm] );
6105 if( len[k][0] < 0.0 ) len[k][0] = 0.0;
6106 if( len[k][1] < 0.0 ) len[k][1] = 0.0;
6109 tmptmplen[im] = minscore;
6112 nmemar[im] = nmemim + nmemjm;
6114 mindisfrom[im] = 999.9;
6115 eff[im][jm] = 999.9;
6116 // eff[im][jm-im] = 999.9; // bug??
6118 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
6121 if( i != im && i != jm )
6144 eff0 = eff[miniim][maxiim];
6145 eff1 = eff[minijm][maxijm];
6147 tmpdouble = eff[miniim][maxiim] =
6148 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
6150 tmpdouble = eff[miniim][maxiim] =
6151 (clusterfuncpt[0])( eff0, eff1 );
6155 if( tmpdouble < mindisfrom[i] )
6157 mindisfrom[i] = tmpdouble;
6160 if( tmpdouble < mindisfrom[im] )
6162 mindisfrom[im] = tmpdouble;
6165 if( nearest[i] == jm )
6173 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
6174 strcpy( tree[im], treetmp );
6176 treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
6179 reporterr( "Cannot allocate treetmp\n" );
6182 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
6185 tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
6187 if( tree[im] == NULL )
6189 reporterr( "Cannot reallocate tree!\n" );
6192 strcpy( tree[im], treetmp );
6195 acjmprev = ac[jm].prev;
6196 acjmnext = ac[jm].next;
6197 acjmprev->next = acjmnext;
6198 if( acjmnext != NULL )
6199 acjmnext->prev = acjmprev;
6200 // free( (void *)eff[jm] ); eff[jm] = NULL;
6202 #if 1 // muscle seems to miss this.
6203 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
6206 if( nearest[i] == im )
6218 if( eff[miniim][maxiim] > mindisfrom[i] )
6219 setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i );
6226 fprintf( stdout, "\ncSTEP-%03d:\n", k+1 );
6227 fprintf( stdout, "len0 = %f\n", len[k][0] );
6228 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
6229 fprintf( stdout, "\n" );
6230 fprintf( stdout, "len1 = %f\n", len[k][1] );
6231 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
6232 fprintf( stdout, "\n" );
6235 fp = fopen( "infile.tree", "w" );
6236 fprintf( fp, "%s\n", treetmp );
6239 FreeCharMtx( tree );
6246 free( (void *)tmptmplen ); tmptmplen = NULL;
6247 free( hist ); hist = NULL;
6248 free( (char *)ac ); ac = NULL;
6249 free( (void *)nmemar ); nmemar = NULL;
6253 FreeIntMtx( inconsistent );
6254 FreeIntMtx( inconsistentpairlist );
6258 void fixed_musclesupg_double_realloc_nobk_halfmtx_memsave( int nseq, double **eff, int ***topol, double **len, Treedep *dep, int progressout, int efffree )
6260 int i, j, k, miniim, maxiim, minijm, maxijm;
6264 double *tmptmplen = NULL; // static TLS -> local, 2012/02/25
6265 int *hist = NULL; // static TLS -> local, 2012/02/25
6266 Bchain *ac = NULL; // static TLS -> local, 2012/02/25
6267 int im = -1, jm = -1;
6268 Bchain *acjmnext, *acjmprev;
6271 int *pt1, *pt2, *pt11;
6272 int *nmemar; // static TLS -> local, 2012/02/25
6275 int *nearest = NULL; // by Mathog, a guess
6276 double *mindisfrom = NULL; // by Mathog, a guess
6277 double (*clusterfuncpt[1])(double,double);
6280 sueff1 = 1 - (double)sueff_global;
6281 sueff05 = (double)sueff_global * 0.5;
6282 if ( treemethod == 'X' )
6283 clusterfuncpt[0] = cluster_mix_double;
6284 else if ( treemethod == 'E' )
6285 clusterfuncpt[0] = cluster_average_double;
6286 else if ( treemethod == 'q' )
6287 clusterfuncpt[0] = cluster_minimum_double;
6290 reporterr( "Unknown treemethod, %c\n", treemethod );
6296 hist = AllocateIntVec( njob );
6297 tmptmplen = AllocateFloatVec( njob );
6298 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
6299 nmemar = AllocateIntVec( njob );
6300 mindisfrom = AllocateFloatVec( njob );
6301 nearest = AllocateIntVec( njob );
6305 for( i=0; i<nseq; i++ )
6307 ac[i].next = ac+i+1;
6308 ac[i].prev = ac+i-1;
6311 ac[nseq-1].next = NULL;
6313 for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
6315 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
6316 for( i=0; i<nseq; i++ )
6322 if( progressout ) reporterr( "\n" );
6323 for( k=0; k<nseq-1; k++ )
6325 if( progressout && k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
6328 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
6331 // reporterr( "k=%d i=%d\n", k, i );
6332 if( mindisfrom[i] < minscore ) // muscle
6335 minscore = mindisfrom[i];
6345 prevnode = hist[im];
6346 if( dep ) dep[k].child0 = prevnode;
6347 nmemim = nmemar[im];
6348 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( 2 ) * sizeof( int ) ); // memsave
6349 // intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
6350 if( prevnode == -1 )
6357 pt1 = topol[prevnode][0];
6358 pt2 = topol[prevnode][1];
6373 for( intpt2=pt11; *intpt2!=-1; )
6374 *intpt++ = *intpt2++;
6375 for( intpt2=pt22; *intpt2!=-1; )
6376 *intpt++ = *intpt2++;
6381 prevnode = hist[jm];
6382 if( dep ) dep[k].child1 = prevnode;
6383 nmemjm = nmemar[jm];
6384 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( 2 ) * sizeof( int ) );
6385 // intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
6388 reporterr( "Cannot reallocate topol\n" );
6391 if( prevnode == -1 )
6398 pt1 = topol[prevnode][0];
6399 pt2 = topol[prevnode][1];
6414 for( intpt2=pt11; *intpt2!=-1; )
6415 *intpt++ = *intpt2++;
6416 for( intpt2=pt22; *intpt2!=-1; )
6417 *intpt++ = *intpt2++;
6424 len[k][0] = ( minscore - tmptmplen[im] );
6425 len[k][1] = ( minscore - tmptmplen[jm] );
6427 if( dep ) dep[k].distfromtip = minscore;
6429 tmptmplen[im] = minscore;
6432 nmemar[im] = nmemim + nmemjm;
6434 mindisfrom[im] = 999.9;
6435 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
6438 if( i != im && i != jm )
6461 eff0 = eff[miniim][maxiim-miniim];
6462 eff1 = eff[minijm][maxijm-minijm];
6463 tmpdouble = eff[miniim][maxiim-miniim] =
6465 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
6467 (clusterfuncpt[0])( eff0, eff1 );
6469 if( tmpdouble < mindisfrom[i] )
6471 mindisfrom[i] = tmpdouble;
6474 if( tmpdouble < mindisfrom[im] )
6476 mindisfrom[im] = tmpdouble;
6479 if( nearest[i] == jm )
6486 // reporterr( "im,jm=%d,%d\n", im, jm );
6487 acjmprev = ac[jm].prev;
6488 acjmnext = ac[jm].next;
6489 acjmprev->next = acjmnext;
6490 if( acjmnext != NULL )
6491 acjmnext->prev = acjmprev;
6494 free( (void *)eff[jm] ); eff[jm] = NULL;
6497 #if 1 // muscle seems to miss this.
6498 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
6501 if( nearest[i] == im )
6513 if( eff[miniim][maxiim-miniim] > mindisfrom[i] )
6514 setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
6521 fprintf( stdout, "vSTEP-%03d:\n", k+1 );
6522 fprintf( stdout, "len0 = %f\n", len[k][0] );
6523 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
6524 fprintf( stdout, "\n" );
6525 fprintf( stdout, "len1 = %f\n", len[k][1] );
6526 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
6527 fprintf( stdout, "\n" );
6530 free( (void *)tmptmplen ); tmptmplen = NULL;
6531 free( hist ); hist = NULL;
6532 free( (char *)ac ); ac = NULL;
6533 free( (void *)nmemar ); nmemar = NULL;
6537 void fixed_musclesupg_double_realloc_nobk_halfmtx( int nseq, double **eff, int ***topol, double **len, Treedep *dep, int progressout, int efffree )
6539 int i, j, k, miniim, maxiim, minijm, maxijm;
6540 int *intpt, *intpt2;
6543 double *tmptmplen = NULL; // static TLS -> local, 2012/02/25
6544 int *hist = NULL; // static TLS -> local, 2012/02/25
6545 Bchain *ac = NULL; // static TLS -> local, 2012/02/25
6546 int im = -1, jm = -1;
6547 Bchain *acjmnext, *acjmprev;
6550 int *pt1, *pt2, *pt11, *pt22;
6551 int *nmemar; // static TLS -> local, 2012/02/25
6554 int *nearest = NULL; // by Mathog, a guess
6555 double *mindisfrom = NULL; // by Mathog, a guess
6556 double (*clusterfuncpt[1])(double,double);
6559 sueff1 = 1 - (double)sueff_global;
6560 sueff05 = (double)sueff_global * 0.5;
6561 if ( treemethod == 'X' )
6562 clusterfuncpt[0] = cluster_mix_double;
6563 else if ( treemethod == 'E' )
6564 clusterfuncpt[0] = cluster_average_double;
6565 else if ( treemethod == 'q' )
6566 clusterfuncpt[0] = cluster_minimum_double;
6569 reporterr( "Unknown treemethod, %c\n", treemethod );
6575 hist = AllocateIntVec( njob );
6576 tmptmplen = AllocateFloatVec( njob );
6577 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
6578 nmemar = AllocateIntVec( njob );
6579 mindisfrom = AllocateFloatVec( njob );
6580 nearest = AllocateIntVec( njob );
6584 for( i=0; i<nseq; i++ )
6586 ac[i].next = ac+i+1;
6587 ac[i].prev = ac+i-1;
6590 ac[nseq-1].next = NULL;
6592 for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
6594 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
6595 for( i=0; i<nseq; i++ )
6601 if( progressout ) reporterr( "\n" );
6602 for( k=0; k<nseq-1; k++ )
6604 if( progressout && k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
6607 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
6610 // reporterr( "k=%d i=%d\n", k, i );
6611 if( mindisfrom[i] < minscore ) // muscle
6614 minscore = mindisfrom[i];
6624 prevnode = hist[im];
6625 if( dep ) dep[k].child0 = prevnode;
6626 nmemim = nmemar[im];
6627 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
6628 if( prevnode == -1 )
6635 pt1 = topol[prevnode][0];
6636 pt2 = topol[prevnode][1];
6647 for( intpt2=pt11; *intpt2!=-1; )
6648 *intpt++ = *intpt2++;
6649 for( intpt2=pt22; *intpt2!=-1; )
6650 *intpt++ = *intpt2++;
6654 prevnode = hist[jm];
6655 if( dep ) dep[k].child1 = prevnode;
6656 nmemjm = nmemar[jm];
6657 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
6660 reporterr( "Cannot reallocate topol\n" );
6663 if( prevnode == -1 )
6670 pt1 = topol[prevnode][0];
6671 pt2 = topol[prevnode][1];
6682 for( intpt2=pt11; *intpt2!=-1; )
6683 *intpt++ = *intpt2++;
6684 for( intpt2=pt22; *intpt2!=-1; )
6685 *intpt++ = *intpt2++;
6691 len[k][0] = ( minscore - tmptmplen[im] );
6692 len[k][1] = ( minscore - tmptmplen[jm] );
6694 if( dep ) dep[k].distfromtip = minscore;
6696 tmptmplen[im] = minscore;
6699 nmemar[im] = nmemim + nmemjm;
6701 mindisfrom[im] = 999.9;
6702 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
6705 if( i != im && i != jm )
6728 eff0 = eff[miniim][maxiim-miniim];
6729 eff1 = eff[minijm][maxijm-minijm];
6730 tmpdouble = eff[miniim][maxiim-miniim] =
6732 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
6734 (clusterfuncpt[0])( eff0, eff1 );
6736 if( tmpdouble < mindisfrom[i] )
6738 mindisfrom[i] = tmpdouble;
6741 if( tmpdouble < mindisfrom[im] )
6743 mindisfrom[im] = tmpdouble;
6746 if( nearest[i] == jm )
6753 // reporterr( "im,jm=%d,%d\n", im, jm );
6754 acjmprev = ac[jm].prev;
6755 acjmnext = ac[jm].next;
6756 acjmprev->next = acjmnext;
6757 if( acjmnext != NULL )
6758 acjmnext->prev = acjmprev;
6761 free( (void *)eff[jm] ); eff[jm] = NULL;
6764 #if 1 // muscle seems to miss this.
6765 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
6768 if( nearest[i] == im )
6780 if( eff[miniim][maxiim-miniim] > mindisfrom[i] )
6781 setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
6788 fprintf( stdout, "vSTEP-%03d:\n", k+1 );
6789 fprintf( stdout, "len0 = %f\n", len[k][0] );
6790 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
6791 fprintf( stdout, "\n" );
6792 fprintf( stdout, "len1 = %f\n", len[k][1] );
6793 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
6794 fprintf( stdout, "\n" );
6797 free( (void *)tmptmplen ); tmptmplen = NULL;
6798 free( hist ); hist = NULL;
6799 free( (char *)ac ); ac = NULL;
6800 free( (void *)nmemar ); nmemar = NULL;
6813 void veryfastsupg_double_loadtree( int nseq, double **eff, int ***topol, double **len, char **name )
6815 int i, j, k, miniim, maxiim, minijm, maxijm;
6816 int *intpt, *intpt2;
6823 int im = -1, jm = -1;
6824 int prevnode, acjmnext, acjmprev;
6825 int *pt1, *pt2, *pt11, *pt22;
6829 char *nametmp, *nameptr, *tmpptr; //static?
6832 fp = fopen( "_guidetree", "r" );
6835 reporterr( "cannot open _guidetree\n" );
6842 // treetmp = AllocateCharVec( njob*50 );
6844 // tree = AllocateCharMtx( njob, njob*50 );
6845 tree = AllocateCharMtx( njob, 0 );
6846 nametmp = AllocateCharVec( 1000 ); // nagasugi
6847 hist = AllocateIntVec( njob );
6848 ac = (Achain *)malloc( njob * sizeof( Achain ) );
6851 for( i=0; i<nseq; i++ )
6853 for( j=0; j<999; j++ ) nametmp[j] = 0;
6854 for( j=0; j<999; j++ )
6859 else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
6865 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
6867 nameptr = strstr( nametmp, "_numo_e" ) + 8;
6869 nameptr = nametmp + 1;
6871 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
6873 tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
6874 if( tree[i] == NULL )
6876 reporterr( "Cannot allocate tree!\n" );
6879 sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
6882 for( i=0; i<nseq; i++ )
6888 ac[nseq-1].next = -1;
6890 for( i=0; i<nseq; i++ ) hist[i] = -1;
6893 for( k=0; k<nseq-1; k++ )
6895 if( k % 10 == 0 ) reporterr( "%d / %d\r", k, nseq );
6899 for( i=0; ac[i].next!=-1; i=ac[i].next )
6901 for( j=ac[i].next; j!=-1; j=ac[j].next )
6903 tmpdouble = eff[i][j];
6904 if( tmpdouble < minscore )
6906 minscore = tmpdouble;
6912 lenfl[0] = lenfl[1] = -1.0;
6913 loadtreeoneline( node, lenfl, fp );
6916 minscore = eff[im][jm];
6918 if( im > nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL )
6920 reporterr( "\n\nCheck the guide tree.\n" );
6921 reporterr( "im=%d, jm=%d\n", im+1, jm+1 );
6922 reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" );
6927 // reporterr( "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
6930 if( lenfl[0] == -1.0 || lenfl[1] == -1.0 )
6932 reporterr( "\n\nWARNING: Branch length is not given.\n" );
6936 if( lenfl[0] < 0.0 ) lenfl[0] = 0.0;
6937 if( lenfl[1] < 0.0 ) lenfl[1] = 0.0;
6940 // reporterr( "im=%d, jm=%d\n", im, jm );
6942 intpt = topol[k][0];
6943 prevnode = hist[im];
6944 if( prevnode == -1 )
6951 pt1 = topol[prevnode][0];
6952 pt2 = topol[prevnode][1];
6963 for( intpt2=pt11; *intpt2!=-1; )
6964 *intpt++ = *intpt2++;
6965 for( intpt2=pt22; *intpt2!=-1; )
6966 *intpt++ = *intpt2++;
6970 intpt = topol[k][1];
6971 prevnode = hist[jm];
6972 if( prevnode == -1 )
6979 pt1 = topol[prevnode][0];
6980 pt2 = topol[prevnode][1];
6991 for( intpt2=pt11; *intpt2!=-1; )
6992 *intpt++ = *intpt2++;
6993 for( intpt2=pt22; *intpt2!=-1; )
6994 *intpt++ = *intpt2++;
7001 len[k][0] = minscore - tmptmplen[im];
7002 len[k][1] = minscore - tmptmplen[jm];
7004 len[k][0] = lenfl[0];
7005 len[k][1] = lenfl[1];
7011 for( i=0; i!=-1; i=ac[i].next )
7013 if( i != im && i != jm )
7036 eff0 = eff[miniim][maxiim];
7037 eff1 = eff[minijm][maxijm];
7038 eff[miniim][maxiim] =
7039 MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) +
7040 ( eff0 + eff1 ) * 0.5 * sueff_global;
7043 acjmprev = ac[jm].prev;
7044 acjmnext = ac[jm].next;
7045 ac[acjmprev].next = acjmnext;
7046 if( acjmnext != -1 )
7047 ac[acjmnext].prev = acjmprev;
7050 treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
7053 reporterr( "Cannot allocate treetmp\n" );
7056 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
7059 tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
7061 if( tree[im] == NULL )
7063 reporterr( "Cannot reallocate tree!\n" );
7066 strcpy( tree[im], treetmp );
7068 // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
7069 // strcpy( tree[im], treetmp );
7072 fprintf( stdout, "STEP-%03d:\n", k+1 );
7073 fprintf( stdout, "len0 = %f\n", len[k][0] );
7074 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
7075 fprintf( stdout, "\n" );
7076 fprintf( stdout, "len1 = %f\n", len[k][1] );
7077 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
7078 fprintf( stdout, "\n" );
7084 fp = fopen( "infile.tree", "w" );
7085 fprintf( fp, "%s\n", treetmp );
7086 // fprintf( fp, "by veryfastsupg_double_loadtree\n" );
7093 FreeCharMtx( tree );
7099 // reporterr( "reconstructing eff[][]\n" ); // Tsune ni hat2 ha aru node koreha iranai.
7100 for( k=0; k<nseq; k++ ) for( i=0; i<nseq; i++ ) eff[i][k] = 0.0;
7101 for( k=0; k<nseq-1; k++ )
7103 reporterr( "len[k][0], len[k][1] = %f, %f\n", len[k][0], len[k][1] );
7104 for( i=0; (im=topol[k][0][i])>-1; i++ )
7106 reporterr( " %03d", im );
7108 fprintf( stdout, "\n" );
7109 for( i=0; (jm=topol[k][1][i])>-1; i++ )
7111 reporterr( " %03d", jm );
7113 for( i=0; (im=topol[k][0][i])>-1; i++ ) for( j=0; (jm=topol[k][1][j])>-1; j++ )
7115 eff[im][jm] += len[k][0] + len[k][1];
7116 eff[jm][im] += len[k][0] + len[k][1];
7123 void veryfastsupg_double( int nseq, double **eff, int ***topol, double **len )
7125 int i, j, k, miniim, maxiim, minijm, maxijm;
7126 int *intpt, *intpt2;
7129 static double *tmptmplen = NULL;
7130 static int *hist = NULL;
7131 static Achain *ac = NULL;
7133 int im = -1, jm = -1;
7134 int prevnode, acjmnext, acjmprev;
7135 int *pt1, *pt2, *pt11, *pt22;
7138 hist = AllocateIntVec( njob );
7139 tmptmplen = (double *)malloc( njob * sizeof( double ) );
7140 ac = (Achain *)malloc( njob * sizeof( Achain ) );
7143 for( i=0; i<nseq; i++ )
7149 ac[nseq-1].next = -1;
7151 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
7152 for( i=0; i<nseq; i++ ) hist[i] = -1;
7155 for( k=0; k<nseq-1; k++ )
7157 if( k % 10 == 0 ) reporterr( "%d / %d\r", k, nseq );
7160 for( i=0; ac[i].next!=-1; i=ac[i].next )
7162 for( j=ac[i].next; j!=-1; j=ac[j].next )
7164 tmpdouble = eff[i][j];
7165 if( tmpdouble < minscore )
7167 minscore = tmpdouble;
7173 // reporterr( "im=%d, jm=%d\n", im, jm );
7175 intpt = topol[k][0];
7176 prevnode = hist[im];
7177 if( prevnode == -1 )
7184 pt1 = topol[prevnode][0];
7185 pt2 = topol[prevnode][1];
7196 for( intpt2=pt11; *intpt2!=-1; )
7197 *intpt++ = *intpt2++;
7198 for( intpt2=pt22; *intpt2!=-1; )
7199 *intpt++ = *intpt2++;
7203 intpt = topol[k][1];
7204 prevnode = hist[jm];
7205 if( prevnode == -1 )
7212 pt1 = topol[prevnode][0];
7213 pt2 = topol[prevnode][1];
7224 for( intpt2=pt11; *intpt2!=-1; )
7225 *intpt++ = *intpt2++;
7226 for( intpt2=pt22; *intpt2!=-1; )
7227 *intpt++ = *intpt2++;
7233 len[k][0] = minscore - tmptmplen[im];
7234 len[k][1] = minscore - tmptmplen[jm];
7236 tmptmplen[im] = minscore;
7240 for( i=0; i!=-1; i=ac[i].next )
7242 if( i != im && i != jm )
7265 eff0 = eff[miniim][maxiim];
7266 eff1 = eff[minijm][maxijm];
7267 eff[miniim][maxiim] =
7268 MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) +
7269 ( eff0 + eff1 ) * 0.5 * sueff_global;
7272 acjmprev = ac[jm].prev;
7273 acjmnext = ac[jm].next;
7274 ac[acjmprev].next = acjmnext;
7275 if( acjmnext != -1 )
7276 ac[acjmnext].prev = acjmprev;
7278 fprintf( stdout, "STEP-%03d:\n", k+1 );
7279 fprintf( stdout, "len0 = %f\n", len[k][0] );
7280 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
7281 fprintf( stdout, "\n" );
7282 fprintf( stdout, "len1 = %f\n", len[k][1] );
7283 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
7284 fprintf( stdout, "\n" );
7289 free( (void *)tmptmplen ); tmptmplen = NULL;
7290 free( hist ); hist = NULL;
7291 free( (char *)ac ); ac = NULL;
7296 void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name ) // not used
7298 int i, j, k, miniim, maxiim, minijm, maxijm;
7299 int *intpt, *intpt2;
7302 static double *tmptmplen = NULL;
7303 static int *hist = NULL;
7304 static Achain *ac = NULL;
7307 static char *treetmp;
7308 static char *nametmp;
7310 int im = -1, jm = -1;
7311 int prevnode, acjmnext, acjmprev;
7312 int *pt1, *pt2, *pt11, *pt22;
7313 double (*clusterfuncpt[1])(double,double);
7316 sueff1 = 1 - sueff_global;
7317 sueff05 = sueff_global * 0.5;
7318 if ( treemethod == 'X' )
7319 clusterfuncpt[0] = cluster_mix_double;
7320 else if ( treemethod == 'E' )
7321 clusterfuncpt[0] = cluster_average_double;
7322 else if ( treemethod == 'q' )
7323 clusterfuncpt[0] = cluster_minimum_double;
7326 reporterr( "Unknown treemethod, %c\n", treemethod );
7332 treetmp = AllocateCharVec( njob*50 );
7333 tree = AllocateCharMtx( njob, njob*50 );
7334 hist = AllocateIntVec( njob );
7335 tmptmplen = (double *)malloc( njob * sizeof( double ) );
7336 ac = (Achain *)malloc( njob * sizeof( Achain ) );
7337 nametmp = AllocateCharVec( 31 );
7340 // for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
7341 for( i=0; i<nseq; i++ )
7343 for( j=0; j<30; j++ ) nametmp[j] = 0;
7344 for( j=0; j<30; j++ )
7346 if( isalnum( name[i][j] ) )
7347 nametmp[j] = name[i][j];
7352 sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
7355 for( i=0; i<nseq; i++ )
7361 ac[nseq-1].next = -1;
7363 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
7364 for( i=0; i<nseq; i++ ) hist[i] = -1;
7367 for( k=0; k<nseq-1; k++ )
7369 if( k % 10 == 0 ) reporterr( "%d / %d\r", k, nseq );
7372 for( i=0; ac[i].next!=-1; i=ac[i].next )
7374 for( j=ac[i].next; j!=-1; j=ac[j].next )
7376 tmpdouble = eff[i][j];
7377 if( tmpdouble < minscore )
7379 minscore = tmpdouble;
7385 // reporterr( "im=%d, jm=%d\n", im, jm );
7387 intpt = topol[k][0];
7388 prevnode = hist[im];
7389 if( prevnode == -1 )
7396 pt1 = topol[prevnode][0];
7397 pt2 = topol[prevnode][1];
7408 for( intpt2=pt11; *intpt2!=-1; )
7409 *intpt++ = *intpt2++;
7410 for( intpt2=pt22; *intpt2!=-1; )
7411 *intpt++ = *intpt2++;
7415 intpt = topol[k][1];
7416 prevnode = hist[jm];
7417 if( prevnode == -1 )
7424 pt1 = topol[prevnode][0];
7425 pt2 = topol[prevnode][1];
7436 for( intpt2=pt11; *intpt2!=-1; )
7437 *intpt++ = *intpt2++;
7438 for( intpt2=pt22; *intpt2!=-1; )
7439 *intpt++ = *intpt2++;
7445 len[k][0] = minscore - tmptmplen[im];
7446 len[k][1] = minscore - tmptmplen[jm];
7448 tmptmplen[im] = minscore;
7452 for( i=0; i!=-1; i=ac[i].next )
7454 if( i != im && i != jm )
7477 eff0 = eff[miniim][maxiim];
7478 eff1 = eff[minijm][maxijm];
7479 eff[miniim][maxiim] =
7480 (clusterfuncpt[0])( eff0, eff1 );
7483 acjmprev = ac[jm].prev;
7484 acjmnext = ac[jm].next;
7485 ac[acjmprev].next = acjmnext;
7486 if( acjmnext != -1 )
7487 ac[acjmnext].prev = acjmprev;
7489 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
7490 strcpy( tree[im], treetmp );
7492 fprintf( stdout, "STEP-%03d:\n", k+1 );
7493 fprintf( stdout, "len0 = %f\n", len[k][0] );
7494 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
7495 fprintf( stdout, "\n" );
7496 fprintf( stdout, "len1 = %f\n", len[k][1] );
7497 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
7498 fprintf( stdout, "\n" );
7501 fpout = fopen( "infile.tree", "w" );
7502 fprintf( fpout, "%s\n", treetmp );
7503 // fprintf( fpout, "by veryfastsupg_double_outtree\n" );
7507 free( (void *)tmptmplen ); tmptmplen = NULL;
7508 free( hist ); hist = NULL;
7509 free( (char *)ac ); ac = NULL;
7510 FreeCharMtx( tree );
7516 void veryfastsupg( int nseq, double **oeff, int ***topol, double **len )
7518 int i, j, k, miniim, maxiim, minijm, maxijm;
7519 int *intpt, *intpt2;
7522 static double *tmptmplen = NULL;
7523 static int **eff = NULL;
7524 static int *hist = NULL;
7525 static Achain *ac = NULL;
7528 int im = -1, jm = -1;
7529 int prevnode, acjmnext, acjmprev;
7530 int *pt1, *pt2, *pt11, *pt22;
7533 eff = AllocateIntMtx( njob, njob );
7534 hist = AllocateIntVec( njob );
7535 tmptmplen = (double *)malloc( njob * sizeof( double ) );
7536 ac = (Achain *)malloc( njob * sizeof( Achain ) );
7539 for( i=0; i<nseq; i++ )
7541 for( j=0; j<nseq; j++ )
7543 eff[i][j] = (int)( oeff[i][j] * INTMTXSCALE + 0.5 );
7547 for( i=0; i<nseq; i++ )
7553 ac[nseq-1].next = -1;
7555 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
7556 for( i=0; i<nseq; i++ ) hist[i] = -1;
7559 for( k=0; k<nseq-1; k++ )
7561 if( k % 10 == 0 ) reporterr( "%d / %d\r", k, nseq );
7563 minscore = INTMTXSCALE*4;
7564 for( i=0; ac[i].next!=-1; i=ac[i].next )
7566 for( j=ac[i].next; j!=-1; j=ac[j].next )
7569 if( tmpint < minscore )
7576 minscoref = (double)minscore * 0.5 / ( INTMTXSCALE );
7578 // reporterr( "im=%d, jm=%d\n", im, jm );
7581 intpt = topol[k][0];
7582 prevnode = hist[im];
7583 if( prevnode == -1 )
7590 pt1 = topol[prevnode][0];
7591 pt2 = topol[prevnode][1];
7602 for( intpt2=pt11; *intpt2!=-1; )
7603 *intpt++ = *intpt2++;
7604 for( intpt2=pt22; *intpt2!=-1; )
7605 *intpt++ = *intpt2++;
7609 intpt = topol[k][1];
7610 prevnode = hist[jm];
7611 if( prevnode == -1 )
7618 pt1 = topol[prevnode][0];
7619 pt2 = topol[prevnode][1];
7630 for( intpt2=pt11; *intpt2!=-1; )
7631 *intpt++ = *intpt2++;
7632 for( intpt2=pt22; *intpt2!=-1; )
7633 *intpt++ = *intpt2++;
7637 intpt = topol[k][0];
7638 for( i=0; i<nseq; i++ )
7639 if( pair[im][i] > -2 )
7643 intpt = topol[k][1];
7644 for( i=0; i<nseq; i++ )
7645 if( pair[jm][i] > -2 )
7650 len[k][0] = minscoref - tmptmplen[im];
7651 len[k][1] = minscoref - tmptmplen[jm];
7653 tmptmplen[im] = minscoref;
7657 for( i=0; i!=-1; i=ac[i].next )
7659 if( i != im && i != jm )
7682 eff0 = eff[miniim][maxiim];
7683 eff1 = eff[minijm][maxijm];
7684 eff[miniim][maxiim] =
7685 MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) + // int??
7686 ( eff0 + eff1 ) * 0.5 * sueff_global; // int??
7689 acjmprev = ac[jm].prev;
7690 acjmnext = ac[jm].next;
7691 ac[acjmprev].next = acjmnext;
7692 if( acjmnext != -1 )
7693 ac[acjmnext].prev = acjmprev;
7695 fprintf( stdout, "STEP-%03d:\n", k+1 );
7696 fprintf( stdout, "len0 = %f\n", len[k][0] );
7697 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
7698 fprintf( stdout, "\n" );
7699 fprintf( stdout, "len1 = %f\n", len[k][1] );
7700 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
7701 fprintf( stdout, "\n" );
7705 FreeIntMtx( eff ); eff = NULL;
7706 free( (void *)tmptmplen ); tmptmplen = NULL;
7707 free( hist ); hist = NULL;
7708 free( (char *)ac ); ac = NULL;
7712 void fastsupg( int nseq, double **oeff, int ***topol, double **len )
7714 int i, j, k, miniim, maxiim, minijm, maxijm;
7716 double eff[nseq][nseq];
7717 char pair[njob][njob];
7719 static double *tmplen;
7723 static double **eff = NULL;
7724 static char **pair = NULL;
7727 int im = -1, jm = -1;
7730 eff = AllocateFloatMtx( njob, njob );
7731 pair = AllocateCharMtx( njob, njob );
7732 tmplen = AllocateFloatVec( njob );
7733 ac = (Achain *)calloc( njob, sizeof( Achain ) );
7737 for( i=0; i<nseq; i++ )
7739 for( j=0; j<nseq; j++ )
7741 eff[i][j] = (double)oeff[i][j];
7745 for( i=0; i<nseq; i++ )
7751 ac[nseq-1].next = -1;
7753 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
7754 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
7755 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
7758 for( k=0; k<nseq-1; k++ )
7760 if( k % 10 == 0 ) reporterr( "%d / %d\r", k, nseq );
7763 for( i=0; ac[i].next!=-1; i=ac[i].next )
7764 // for( i=0; i<nseq-1; i++ )
7766 for( j=ac[i].next; j!=-1; j=ac[j].next )
7767 // for( j=i+1; j<nseq; j++ )
7769 tmpdouble = eff[i][j];
7770 if( tmpdouble < minscore )
7772 minscore = tmpdouble;
7778 // reporterr( "im=%d, jm=%d\n", im, jm );
7780 intpt = topol[k][0];
7781 for( i=0; i<nseq; i++ )
7782 if( pair[im][i] > 0 )
7786 intpt = topol[k][1];
7787 for( i=0; i<nseq; i++ )
7788 if( pair[jm][i] > 0 )
7794 len[k][0] = (double)minscore - tmplen[im];
7795 len[k][1] = (double)minscore - tmplen[jm];
7797 tmplen[im] = (double)minscore;
7799 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
7800 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
7802 // for( i=0; i<nseq; i++ )
7803 for( i=0; i!=-1; i=ac[i].next )
7805 if( i != im && i != jm )
7828 eff0 = eff[miniim][maxiim];
7829 eff1 = eff[minijm][maxijm];
7830 eff[miniim][maxiim] =
7831 MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) +
7832 ( eff0 + eff1 ) * 0.5 * sueff_global;
7833 // eff[minijm][maxijm] = 9999.0;
7836 ac[ac[jm].prev].next = ac[jm].next;
7837 ac[ac[jm].next].prev = ac[jm].prev;
7838 // eff[im][jm] = 9999.0;
7840 reporterr( "STEP-%03d:\n", k+1 );
7841 reporterr( "len0 = %f\n", len[k][0] );
7842 for( i=0; topol[k][0][i]>-1; i++ ) reporterr( " %03d", topol[k][0][i] );
7844 reporterr( "len1 = %f\n", len[k][1] );
7845 for( i=0; topol[k][1][i]>-1; i++ ) reporterr( " %03d", topol[k][1][i] );
7851 // FreeFloatMtx( eff );
7852 // FreeCharMtx( pair );
7853 // FreeFloatVec( tmplen );
7856 void supg( int nseq, double **oeff, int ***topol, double **len )
7858 int i, j, k, miniim, maxiim, minijm, maxijm;
7860 double eff[nseq][nseq];
7861 char pair[njob][njob];
7863 static double *tmplen;
7865 double **doubleptpt;
7869 static double **eff = NULL;
7870 static char **pair = NULL;
7873 eff = AllocateFloatMtx( njob, njob );
7874 pair = AllocateCharMtx( njob, njob );
7875 tmplen = AllocateFloatVec( njob );
7880 for( i=0; i<nseq; i++ )
7882 for( j=0; j<nseq; j++ )
7884 eff[i][j] = (double)oeff[i][j];
7887 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
7888 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
7889 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
7891 for( k=0; k<nseq-1; k++ )
7893 double minscore = 9999.0;
7894 int im = -1, jm = -1;
7898 for( i=0; i<nseq-1; i++ )
7900 doublept = *doubleptpt++ + i + 1;
7901 for( j=i+1; j<nseq; j++ )
7903 tmpdouble = *doublept++;
7904 if( tmpdouble < minscore )
7906 minscore = tmpdouble;
7911 intpt = topol[k][0];
7912 for( i=0; i<nseq; i++ )
7913 if( pair[im][i] > 0 )
7917 intpt = topol[k][1];
7918 for( i=0; i<nseq; i++ )
7919 if( pair[jm][i] > 0 )
7923 len[k][0] = (double)minscore / 2.0 - tmplen[im];
7924 len[k][1] = (double)minscore / 2.0 - tmplen[jm];
7926 tmplen[im] = (double)minscore / 2.0;
7928 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
7929 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
7931 for( i=0; i<nseq; i++ )
7933 if( i != im && i != jm )
7958 miniim = MIN( i, im );
7959 maxiim = MAX( i, im );
7960 minijm = MIN( i, jm );
7961 maxijm = MAX( i, jm );
7964 eff0 = eff[miniim][maxiim];
7965 eff1 = eff[minijm][maxijm];
7966 eff[miniim][maxiim] =
7967 MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) +
7968 ( eff0 + eff1 ) * 0.5 * sueff_global;
7970 MIN( eff[miniim][maxiim], eff[minijm][maxijm] ) * ( 1.0 - sueff_global ) +
7971 ( eff[miniim][maxiim] + eff[minijm][maxijm] ) * 0.5 * sueff_global;
7973 eff[minijm][maxijm] = 9999.0;
7974 eff[im][jm] = 9999.0;
7978 printf( "STEP-%03d:\n", k+1 );
7979 printf( "len0 = %f\n", len[k][0] );
7980 for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
7982 printf( "len1 = %f\n", len[k][1] );
7983 for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
7989 void spg( int nseq, double **oeff, int ***topol, double **len )
7994 double eff[nseq][nseq];
7995 char pair[njob][njob];
7997 double **eff = NULL;
8001 eff = AllocateDoubleMtx( njob, njob );
8002 pair = AllocateCharMtx( njob, njob );
8006 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) eff[i][j] = oeff[i][j];
8007 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
8008 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
8009 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
8011 for( k=0; k<nseq-1; k++ )
8013 double minscore = 9999.0;
8014 int im = -1, jm = -1;
8017 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
8019 if( eff[i][j] < minscore )
8021 minscore = eff[i][j];
8025 for( i=0, count=0; i<nseq; i++ )
8026 if( pair[im][i] > 0 )
8028 topol[k][0][count] = i;
8031 topol[k][0][count] = -1;
8032 for( i=0, count=0; i<nseq; i++ )
8033 if( pair[jm][i] > 0 )
8035 topol[k][1][count] = i;
8038 topol[k][1][count] = -1;
8040 len[k][0] = minscore / 2.0 - tmplen[im];
8041 len[k][1] = minscore / 2.0 - tmplen[jm];
8043 tmplen[im] = minscore / 2.0;
8045 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
8046 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
8048 for( i=0; i<nseq; i++ )
8050 if( i != im && i != jm )
8052 eff[MIN(i,im)][MAX(i,im)] =
8053 MIN( eff[MIN(i,im)][MAX(i,im)], eff[MIN(i,jm)][MAX(i,jm)] );
8054 eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
8056 eff[im][jm] = 9999.0;
8059 printf( "STEP-%03d:\n", k+1 );
8060 printf( "len0 = %f\n", len[k][0] );
8061 for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
8063 printf( "len1 = %f\n", len[k][1] );
8064 for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
8070 double ipower( double x, int n ) /* n > 0 */
8083 void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */
8085 int i, j, k, s1, s2;
8086 static double rootnode[M];
8090 reporterr( "Too few sequence for countnode: nseq = %d\n", nseq );
8094 for( i=0; i<nseq; i++ ) rootnode[i] = 0;
8095 for( i=0; i<nseq-2; i++ )
8097 for( j=0; topol[i][0][j]>-1; j++ )
8098 rootnode[topol[i][0][j]]++;
8099 for( j=0; topol[i][1][j]>-1; j++ )
8100 rootnode[topol[i][1][j]]++;
8101 for( j=0; topol[i][0][j]>-1; j++ )
8103 s1 = topol[i][0][j];
8104 for( k=0; topol[i][1][k]>-1; k++ )
8106 s2 = topol[i][1][k];
8107 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
8111 for( j=0; topol[nseq-2][0][j]>-1; j++ )
8113 s1 = topol[nseq-2][0][j];
8114 for( k=0; topol[nseq-2][1][k]>-1; k++ )
8116 s2 = topol[nseq-2][1][k];
8117 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
8122 void countnode_int( int nseq, int ***topol, int **node ) /* node[i][j] == node[j][i] */
8124 int i, j, k, s1, s2;
8127 for( i=0; i<nseq; i++ ) rootnode[i] = 0;
8128 for( i=0; i<nseq-2; i++ )
8130 for( j=0; topol[i][0][j]>-1; j++ )
8131 rootnode[topol[i][0][j]]++;
8132 for( j=0; topol[i][1][j]>-1; j++ )
8133 rootnode[topol[i][1][j]]++;
8134 for( j=0; topol[i][0][j]>-1; j++ )
8136 s1 = topol[i][0][j];
8137 for( k=0; topol[i][1][k]>-1; k++ )
8139 s2 = topol[i][1][k];
8140 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
8144 for( j=0; topol[nseq-2][0][j]>-1; j++ )
8146 s1 = topol[nseq-2][0][j];
8147 for( k=0; topol[nseq-2][1][k]>-1; k++ )
8149 s2 = topol[nseq-2][1][k];
8150 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
8153 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
8154 node[j][i] = node[i][j];
8156 reporterr( "node[][] in countnode_int" );
8157 for( i=0; i<nseq; i++ )
8159 for( j=0; j<nseq; j++ )
8161 reporterr( "%#3d", node[i][j] );
8168 void counteff_simple_double( int nseq, int ***topol, double **len, double *node )
8172 static double rootnode[M];
8173 static double eff[M];
8176 for( i=0; i<nseq; i++ ){
8177 reporterr( "len0 = %f\n", len[i][0] );
8178 reporterr( "len1 = %f\n", len[i][1] );
8181 for( i=0; i<nseq; i++ )
8189 for( i=0; i<nseq-1; i++ )
8191 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
8193 rootnode[s1] += (double)len[i][0] * eff[s1];
8196 rootnode[s1] *= 0.5;
8200 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
8202 rootnode[s2] += (double)len[i][1] * eff[s2];
8205 rootnode[s2] *= 0.5;
8210 for( i=0; i<nseq; i++ )
8213 rootnode[i] += GETA3;
8216 reporterr( "### rootnode for %d = %f\n", i, rootnode[i] );
8221 for( i=0; i<nseq; i++ )
8223 total += rootnode[i];
8229 for( i=0; i<nseq; i++ )
8231 node[i] = rootnode[i] / total;
8235 reporterr( "weight array in counteff_simple\n" );
8236 for( i=0; i<nseq; i++ )
8237 reporterr( "%f\n", node[i] );
8243 void counteff_simple_double_nostatic_memsave( int nseq, int ***topol, double **len, Treedep *dep, double *node )
8252 rootnode = AllocateDoubleVec( nseq );
8253 eff = AllocateDoubleVec( nseq );
8254 localmem = AllocateIntMtx( 2, nseq+1 );
8256 for( i=0; i<nseq; i++ ) // 2014/06/07, fu no eff wo sakeru.
8258 if( len[i][0] < 0.0 )
8260 reporterr( "WARNING: negative branch length %f, step %d-0\n", len[i][0], i );
8263 if( len[i][1] < 0.0 )
8265 reporterr( "WARNING: negative branch length %f, step %d-1\n", len[i][1], i );
8270 for( i=0; i<nseq-1; i++ )
8272 reporterr( "\nstep %d, group 0\n", i );
8273 for( j=0; topol[i][0][j]!=-1; j++) reporterr( "%3d ", topol[i][0][j] );
8274 reporterr( "\n", i );
8275 reporterr( "step %d, group 1\n", i );
8276 for( j=0; topol[i][1][j]!=-1; j++) reporterr( "%3d ", topol[i][1][j] );
8277 reporterr( "\n", i );
8278 reporterr( "len0 = %f\n", len[i][0] );
8279 reporterr( "len1 = %f\n", len[i][1] );
8282 for( i=0; i<nseq; i++ )
8290 for( i=0; i<nseq-1; i++ )
8292 localmem[0][0] = -1;
8294 topolorder( njob, localmem[0], &posinmem, topol, dep, i, 0 );
8295 localmem[1][0] = -1;
8297 topolorder( njob, localmem[1], &posinmem, topol, dep, i, 1 );
8299 for( j=0; (s1=localmem[0][j]) > -1; j++ )
8301 rootnode[s1] += (double)len[i][0] * eff[s1];
8304 rootnode[s1] *= 0.5;
8308 for( j=0; (s2=localmem[1][j]) > -1; j++ )
8310 rootnode[s2] += (double)len[i][1] * eff[s2];
8313 rootnode[s2] *= 0.5;
8318 for( i=0; i<nseq; i++ )
8321 rootnode[i] += GETA3;
8324 reporterr( "### rootnode for %d = %f\n", i, rootnode[i] );
8329 for( i=0; i<nseq; i++ )
8331 total += rootnode[i];
8337 for( i=0; i<nseq; i++ )
8339 node[i] = rootnode[i] / total;
8343 reporterr( "weight array in counteff_simple\n" );
8344 for( i=0; i<nseq; i++ )
8345 reporterr( "%f\n", node[i] );
8351 FreeIntMtx( localmem );
8354 void counteff_simple_double_nostatic( int nseq, int ***topol, double **len, double *node )
8361 rootnode = AllocateDoubleVec( nseq );
8362 eff = AllocateDoubleVec( nseq );
8364 for( i=0; i<nseq; i++ ) // 2014/06/07, fu no eff wo sakeru.
8366 if( len[i][0] < 0.0 )
8368 reporterr( "WARNING: negative branch length %f, step %d-0\n", len[i][0], i );
8371 if( len[i][1] < 0.0 )
8373 reporterr( "WARNING: negative branch length %f, step %d-1\n", len[i][1], i );
8378 for( i=0; i<nseq-1; i++ )
8380 reporterr( "\nstep %d, group 0\n", i );
8381 for( j=0; topol[i][0][j]!=-1; j++) reporterr( "%3d ", topol[i][0][j] );
8382 reporterr( "\n", i );
8383 reporterr( "step %d, group 1\n", i );
8384 for( j=0; topol[i][1][j]!=-1; j++) reporterr( "%3d ", topol[i][1][j] );
8385 reporterr( "\n", i );
8386 reporterr( "len0 = %f\n", len[i][0] );
8387 reporterr( "len1 = %f\n", len[i][1] );
8390 for( i=0; i<nseq; i++ )
8398 for( i=0; i<nseq-1; i++ )
8400 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
8402 rootnode[s1] += (double)len[i][0] * eff[s1];
8405 rootnode[s1] *= 0.5;
8409 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
8411 rootnode[s2] += (double)len[i][1] * eff[s2];
8414 rootnode[s2] *= 0.5;
8419 for( i=0; i<nseq; i++ )
8422 rootnode[i] += GETA3;
8425 reporterr( "### rootnode for %d = %f\n", i, rootnode[i] );
8430 for( i=0; i<nseq; i++ )
8432 total += rootnode[i];
8438 for( i=0; i<nseq; i++ )
8440 node[i] = rootnode[i] / total;
8444 reporterr( "weight array in counteff_simple\n" );
8445 for( i=0; i<nseq; i++ )
8446 reporterr( "%f\n", node[i] );
8454 void counteff_simple( int nseq, int ***topol, double **len, double *node )
8459 static double rootnode[M];
8460 static double eff[M];
8464 rootnode = AllocateDoubleVec( nseq );
8465 eff = AllocateDoubleVec( nseq );
8469 for( i=0; i<nseq; i++ ){
8470 reporterr( "len0 = %f\n", len[i][0] );
8471 reporterr( "len1 = %f\n", len[i][1] );
8474 for( i=0; i<nseq; i++ )
8482 for( i=0; i<nseq-1; i++ )
8484 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
8486 rootnode[s1] += len[i][0] * eff[s1];
8489 rootnode[s1] *= 0.5;
8493 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
8495 rootnode[s2] += len[i][1] * eff[s2];
8498 rootnode[s2] *= 0.5;
8503 for( i=0; i<nseq; i++ )
8506 rootnode[i] += GETA3;
8509 reporterr( "### rootnode for %d = %f\n", i, rootnode[i] );
8514 for( i=0; i<nseq; i++ )
8516 total += rootnode[i];
8522 for( i=0; i<nseq; i++ )
8524 node[i] = rootnode[i] / total;
8528 reporterr( "weight array in counteff_simple\n" );
8529 for( i=0; i<nseq; i++ )
8530 reporterr( "%f\n", node[i] );
8541 void counteff( int nseq, int ***topol, double **len, double **node )
8543 int i, j, k, s1, s2;
8558 ErrorExit( "mix error" );
8565 for( i=0; i<nseq; i++ ) rootnode[i] = 0;
8566 for( i=0; i<nseq-2; i++ )
8568 for( j=0; topol[i][0][j]>-1; j++ )
8569 rootnode[topol[i][0][j]]++;
8570 for( j=0; topol[i][1][j]>-1; j++ )
8571 rootnode[topol[i][1][j]]++;
8572 for( j=0; topol[i][0][j]>-1; j++ )
8574 s1 = topol[i][0][j];
8575 for( k=0; topol[i][1][k]>-1; k++ )
8577 s2 = topol[i][1][k];
8578 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
8582 for( j=0; topol[nseq-2][0][j]>-1; j++ )
8584 s1 = topol[nseq-2][0][j];
8585 for( k=0; topol[nseq-2][1][k]>-1; k++ )
8587 s2 = topol[nseq-2][1][k];
8588 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
8591 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
8592 node[i][j] = ipower( 0.5, (int)node[i][j] ) + geta2;
8593 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
8594 node[j][i] = node[i][j];
8600 for( i=0; i<nseq; i++ ){
8601 reporterr( "len0 = %f\n", len[i][0] );
8602 reporterr( "len1 = %f\n", len[i][1] );
8605 for( i=0; i<nseq; i++ )
8613 for( i=0; i<nseq-1; i++ )
8615 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
8617 rootnode[s1] += len[i][0] * eff[s1];
8620 rootnode[s1] *= 0.5;
8624 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
8626 rootnode[s2] += len[i][1] * eff[s2];
8629 rootnode[s2] *= 0.5;
8634 for( i=0; i<nseq; i++ )
8637 rootnode[i] += GETA3;
8640 reporterr( "rootnode for %d = %f\n", i, rootnode[i] );
8643 for( i=0; i<nseq; i++ )
8645 for( j=0; j<nseq; j++ )
8647 node[i][j] = (double)rootnode[i] * rootnode[j];
8648 else node[i][i] = rootnode[i];
8653 printf( "weight matrix in counteff\n" );
8654 for( i=0; i<nseq; i++ )
8656 for( j=0; j<nseq; j++ )
8658 printf( "%f ", node[i][j] );
8665 double score_calcp( char *seq1, char *seq2, int len )
8668 unsigned char ms1, ms2;
8673 for( k=0; k<len; k++ )
8675 ms1 = (unsigned char)seq1[k];
8676 ms2 = (unsigned char)seq2[k];
8677 if( ms1 == '-' && ms2 == '-' ) continue;
8678 tmpscore += (double)amino_dis[ms1][ms2];
8680 if( ms1 == (int)'-' )
8682 tmpscore += (double)penalty;
8683 tmpscore += (double)amino_dis[ms1][ms2];
8684 while( (ms1=(unsigned char)seq1[++k]) == '-' )
8685 tmpscore += (double)amino_dis[ms1][ms2];
8687 if( k >len2 ) break;
8690 if( ms2 == (int)'-' )
8692 tmpscore += (double)penalty;
8693 tmpscore += (double)amino_dis[ms1][ms2];
8694 while( (ms2=(unsigned char)seq2[++k]) == '-' )
8695 tmpscore += (double)amino_dis[ms1][ms2];
8697 if( k > len2 ) break;
8704 double score_calc1( char *seq1, char *seq2 ) /* method 1 */
8709 int len = strlen( seq1 );
8711 for( k=0; k<len; k++ )
8713 if( seq1[k] != '-' && seq2[k] != '-' )
8715 score += (double)amino_dis[(unsigned char)seq1[k]][(unsigned char)seq2[k]];
8719 if( count ) score /= (double)count;
8724 double substitution_nid( char *seq1, char *seq2 )
8728 int len = strlen( seq1 );
8731 for( k=0; k<len; k++ )
8732 if( seq1[k] != '-' && seq2[k] != '-' )
8733 s12 += ( seq1[k] == seq2[k] );
8735 // fprintf( stdout, "s12 = %f\n", s12 );
8739 double substitution_score( char *seq1, char *seq2 )
8743 int len = strlen( seq1 );
8746 for( k=0; k<len; k++ )
8747 if( seq1[k] != '-' && seq2[k] != '-' )
8748 s12 += amino_dis[(unsigned char)seq1[k]][(unsigned char)seq2[k]];
8750 // fprintf( stdout, "s12 = %f\n", s12 );
8754 double substitution_hosei( char *seq1, char *seq2 ) /* method 1 */
8760 int len = strlen( seq1 );
8762 for( k=0; k<len; k++ )
8764 if( seq1[k] != '-' && seq2[k] != '-' )
8766 score += (double)( seq1[k] != seq2[k] );
8770 if( count ) score /= (double)count;
8772 if( score < 0.95 ) score = - log( 1.0 - score );
8783 while( (s1=*seq1++) )
8786 if( s1 == '-' ) continue;
8787 if( s2 == '-' ) continue;
8788 iscore += ( s1 != s2 );
8791 if( count ) score = (double)iscore / count;
8793 if( score < 0.95 ) score = - log( 1.0 - score );
8799 double substitution( char *seq1, char *seq2 ) /* method 1 */
8804 int len = strlen( seq1 );
8806 for( k=0; k<len; k++ )
8808 if( seq1[k] != '-' && seq2[k] != '-' )
8810 score += (double)( seq1[k] != seq2[k] );
8814 if( count ) score /= (double)count;
8820 void treeconstruction( char **seq, int nseq, int ***topol, double **len, double **eff )
8828 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
8831 eff[i][j] = (double)score_calc1( seq[i], seq[j] );
8833 eff[i][j] = (double)substitution_hosei( seq[i], seq[j] );
8835 reporterr( "%f\n", eff[i][j] );
8839 reporterr( "distance matrix\n" );
8840 for( i=0; i<nseq; i++ )
8842 for( j=0; j<nseq; j++ )
8844 reporterr( "%f ", eff[i][j] );
8850 upg( nseq, eff, topol, len );
8851 upg2( nseq, eff, topol, len );
8853 spg( nseq, eff, topol, len );
8854 counteff( nseq, topol, len, eff );
8859 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
8863 reporterr( "weight matrix\n" );
8864 for( i=0; i<nseq; i++ )
8866 for( j=0; j<nseq; j++ )
8868 reporterr( "%f ", eff[i][j] );
8875 double bscore_calc( char **seq, int s, double **eff ) /* algorithm B */
8878 int gb1, gb2, gc1, gc2;
8881 int len = strlen( seq[0] );
8886 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
8888 double efficient = eff[i][j];
8892 for( k=0; k<len; k++ )
8897 gc1 = ( seq[i][k] == '-' );
8898 gc2 = ( seq[j][k] == '-' );
8919 score += (long)cob * penalty * efficient;
8920 score += (long)amino_dis[(unsigned char)seq[i][k]][(unsigned char)seq[j][k]] * efficient;
8921 nglen += ( !gc1 * !gc2 );
8924 return( (double)score / nglen + 400.0 * !scoremtx );
8927 void AllocateTmpSeqs( char ***mseq2pt, char **mseq1pt, int locnlenmax )
8929 *mseq2pt = AllocateCharMtx( njob, locnlenmax+1 );
8930 *mseq1pt = AllocateCharVec( locnlenmax+1 );
8933 void FreeTmpSeqs( char **mseq2, char *mseq1 )
8935 FreeCharMtx( mseq2 );
8936 free( (char *)mseq1 );
8940 void gappick0( char *aseq, char *seq )
8942 for( ; *seq != 0; seq++ )
8951 int isallgap( char *seq )
8953 for( ; *seq != 0; seq++ )
8961 void gappick( int nseq, int s, char **aseq, char **mseq2,
8962 double **eff, double *effarr )
8964 int i, j, count, countjob, len, allgap;
8965 len = strlen( aseq[0] );
8966 for( i=0, count=0; i<len; i++ )
8969 for( j=0; j<nseq; j++ ) if( j != s ) allgap *= ( aseq[j][i] == '-' );
8972 for( j=0, countjob=0; j<nseq; j++ )
8976 mseq2[countjob][count] = aseq[j][i];
8983 for( i=0; i<nseq-1; i++ ) mseq2[i][count] = 0;
8985 for( i=0, countjob=0; i<nseq; i++ )
8989 effarr[countjob] = eff[s][i];
8994 fprintf( stdout, "effarr in gappick s = %d\n", s+1 );
8995 for( i=0; i<countjob; i++ )
8996 fprintf( stdout, " %f", effarr[i] );
9001 void commongappick_record( int nseq, char **seq, int *map )
9004 int len = strlen( seq[0] );
9007 for( i=0, count=0; i<=len; i++ )
9011 for( j=0; j<nseq; j++ )
9012 allgap *= ( seq[j][i] == '-' );
9015 for( j=0; j<nseq; j++ )
9016 if( seq[j][i] != '-' ) break;
9019 for( j=0; j<nseq; j++ )
9021 seq[j][count] = seq[j][i];
9030 void commongappick( int nseq, char **seq )
9033 int len = strlen( seq[0] );
9036 int *mapfromnewtoold;
9038 mapfromnewtoold = calloc( len+1, sizeof( int ) );
9040 for( i=0, count=0; i<=len; i++ )
9042 for( j=0; j<nseq; j++ )
9043 if( seq[j][i] != '-' ) break;
9046 mapfromnewtoold[count++] = i;
9049 // mapfromnewtoold[count] = -1; // iranai
9050 for( j=0; j<nseq; j++ )
9052 for( i=0; i<count; i++ )
9054 seq[j][i] = seq[j][mapfromnewtoold[i]];
9057 free( mapfromnewtoold );
9059 --------------------------
9061 int *mapfromoldtonew;
9064 mapfromoldtonew = calloc( len+1, sizeof( int ) );
9065 for( i=0; i<=len; i++ ) mapfromoldtonew[i] = -1;
9067 for( i=0, count=0; i<=len; i++ )
9069 for( j=0; j<nseq; j++ )
9070 if( seq[j][i] != '-' ) break;
9073 mapfromoldtonew[i] = count;
9077 for( j=0; j<nseq; j++ )
9079 for( i=0; i<=len; i++ )
9081 if( (pos=mapfromoldtonew[i]) != -1 )
9082 seq[j][pos] = seq[j][i];
9085 free( mapfromoldtonew );
9086 --------------------------
9088 for( i=0, count=0; i<=len; i++ )
9092 for( j=0; j<nseq; j++ )
9093 allgap *= ( seq[j][i] == '-' );
9096 for( j=0; j<nseq; j++ )
9097 if( seq[j][i] != '-' ) break;
9100 for( j=0; j<nseq; j++ )
9102 seq[j][count] = seq[j][i];
9112 void commongaprecord( int nseq, char **seq, char *originallygapped )
9115 int len = strlen( seq[0] );
9117 for( i=0; i<len; i++ )
9119 for( j=0; j<nseq; j++ )
9120 if( seq[j][i] != '-' ) break;
9122 originallygapped[i] = '-';
9124 originallygapped[i] = 'o';
9126 originallygapped[len] = 0;
9130 double score_calc0( char **seq, int s, double **eff, int ex )
9134 if( scmtd == 4 ) tmp = score_calc4( seq, s, eff, ex );
9135 if( scmtd == 5 ) tmp = score_calc5( seq, s, eff, ex );
9136 else tmp = score_calc5( seq, s, eff, ex );
9143 double score_m_1( char **seq, int ex, double **eff )
9146 int len = strlen( seq[0] );
9147 int gb1, gb2, gc1, gc2;
9154 for( i=0; i<njob; i++ )
9156 double efficient = eff[MIN(i,ex)][MAX(i,ex)];
9157 if( i == ex ) continue;
9161 for( k=0; k<len; k++ )
9166 gc1 = ( seq[i][k] == '-' );
9167 gc2 = ( seq[ex][k] == '-' );
9188 score += (double)cob * penalty * efficient;
9189 score += (double)amino_dis[seq[i][k]][seq[ex][k]] * efficient;
9191 nglen += ( !gc1 * !gc2 );
9193 if( !gc1 && !gc2 ) fprintf( stdout, "%f\n", score );
9196 return( (double)score / nglen + 400.0 * !scoremtx );
9201 void sitescore( char **seq, double **eff, char sco1[], char sco2[], char sco3[] )
9204 int len = strlen( seq[0] );
9210 for( i=0; i<len; i++ )
9212 tmp = 0.0; count = 0;
9213 for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ )
9216 if( seq[j][i] != '-' && seq[k][i] != '-' )
9219 tmp += amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx;
9223 if( count > 0.0 ) tmp /= count;
9225 ch = (int)( tmp/100.0 - 0.000001 );
9226 sprintf( sco1+i, "%c", ch+0x61 );
9230 for( i=0; i<len; i++ )
9232 tmp = 0.0; count = 0;
9233 for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ )
9236 if( seq[j][i] != '-' && seq[k][i] != '-' )
9239 tmp += eff[j][k] * ( amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx );
9243 if( count > 0.0 ) tmp /= count;
9245 tmp = ( tmp - 400 * !scoremtx ) * 2;
9246 if( tmp < 0 ) tmp = 0;
9247 ch = (int)( tmp/100.0 - 0.000001 );
9248 sprintf( sco2+i, "%c", ch+0x61 );
9253 for( i=WIN; i<len-WIN; i++ )
9256 for( j=i-WIN; j<=i+WIN; j++ )
9260 for( j=0; j<njob; j++ )
9262 if( seq[j][i] == '-' )
9269 ch = (int)( tmp/100.0 - 0.0000001 );
9270 sprintf( sco3+i, "%c", ch+0x61 );
9272 for( i=0; i<WIN; i++ ) sco3[i] = '-';
9273 for( i=len-WIN; i<len; i++ ) sco3[i] = '-';
9278 void strins( char *str1, char *str2 )
9281 int len1 = strlen( str1 );
9282 int len2 = strlen( str2 );
9288 while( str2 >= bk+len1 ) { *str2 = *(str2-len1); str2--;} // by D.Mathog
9289 while( str2 >= bk ) { *str2-- = *str1--; }
9292 int isaligned( int nseq, char **seq )
9295 int len = strlen( seq[0] );
9296 for( i=1; i<nseq; i++ )
9298 if( strlen( seq[i] ) != len ) return( 0 );
9303 double score_calc_for_score( int nseq, char **seq )
9306 int len = strlen( seq[0] );
9309 char *mseq1, *mseq2;
9312 for( i=0; i<nseq-1; i++ )
9314 for( j=i+1; j<nseq; j++ )
9320 for( k=0; k<len; k++ )
9322 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
9323 tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
9325 if( mseq1[k] == '-' )
9327 tmpscore += penalty - n_dis[0][24];
9328 while( mseq1[++k] == '-' )
9331 if( k > len-2 ) break;
9334 if( mseq2[k] == '-' )
9336 tmpscore += penalty - n_dis[0][24];
9337 while( mseq2[++k] == '-' )
9340 if( k > len-2 ) break;
9344 score += (double)tmpscore / (double)c;
9346 printf( "tmpscore in mltaln9.c = %f\n", tmpscore );
9347 printf( "tmpscore / c = %f\n", tmpscore/(double)c );
9351 reporterr( "raw score = %f\n", score );
9352 score /= (double)nseq * ( nseq-1.0 ) / 2.0;
9355 printf( "score in mltaln9.c = %f\n", score );
9357 return( (double)score );
9360 void doublencpy( double *vec1, double *vec2, int len )
9366 double score_calc_a( char **seq, int s, double **eff ) /* algorithm A+ */
9369 int gb1, gb2, gc1, gc2;
9372 int len = strlen( seq[0] );
9377 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
9379 double efficient = eff[i][j];
9383 for( k=0; k<len; k++ )
9388 gc1 = ( seq[i][k] == '-' );
9389 gc2 = ( seq[j][k] == '-' );
9422 score += 0.5 * (double)cob * penalty * efficient;
9423 score += (double)amino_dis[(unsigned char)seq[i][k]][(unsigned char)seq[j][k]] * (double)efficient;
9424 nglen += ( !gc1 * !gc2 );
9427 return( (double)score / nglen + 400.0 * !scoremtx );
9431 double score_calc_s( char **seq, int s, double **eff ) /* algorithm S, not used */
9434 int gb1, gb2, gc1, gc2;
9437 int len = strlen( seq[0] );
9442 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
9444 double efficient = eff[i][j];
9448 for( k=0; k<len; k++ )
9453 gc1 = ( seq[i][k] == '-' );
9454 gc2 = ( seq[j][k] == '-' );
9489 score += 0.5 * (double)cob * penalty * efficient;
9490 score += (double)amino_dis[(unsigned char)seq[i][k]][(int)seq[j][k]] * (double)efficient;
9491 nglen += ( !gc1 * !gc2 );
9494 return( (double)score / nglen + 400.0 );
9497 double score_calc_for_score_s( int s, char **seq ) /* algorithm S */
9500 int gb1, gb2, gc1, gc2;
9503 int len = strlen( seq[0] );
9508 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
9513 for( k=0; k<len; k++ )
9518 gc1 = ( seq[i][k] == '-' );
9519 gc2 = ( seq[j][k] == '-' );
9554 score += 0.5 * (double)cob * penalty;
9555 score += (double)amino_dis[(int)seq[i][k]][(unsigned char)seq[j][k]];
9556 nglen += ( !gc1 * !gc2 );
9559 reporterr( "i = %d, j=%d\n", i+1, j+1 );
9560 reporterr( "score = %f\n", score );
9563 return( (double)score / nglen + 400.0 );
9566 double SSPscore___( int s, char **seq, int ex ) /* algorithm S */
9569 int gb1, gb2, gc1, gc2;
9572 int len = strlen( seq[0] );
9577 i=ex; for( j=0; j<s; j++ )
9580 if( j == ex ) continue;
9584 for( k=0; k<len; k++ )
9589 gc1 = ( seq[i][k] == '-' );
9590 gc2 = ( seq[j][k] == '-' );
9625 score += 0.5 * (double)cob * penalty;
9626 score += (double)amino_dis[(unsigned char)seq[i][k]][(unsigned char)seq[j][k]];
9627 nglen += ( !gc1 * !gc2 ); /* tsukawanai */
9630 reporterr( "i = %d, j=%d\n", i+1, j+1 );
9631 reporterr( "score = %f\n", score );
9634 return( (double)score );
9637 double SSPscore( int s, char **seq ) /* algorithm S */
9640 int gb1, gb2, gc1, gc2;
9643 int len = strlen( seq[0] );
9648 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
9653 for( k=0; k<len; k++ )
9658 gc1 = ( seq[i][k] == '-' );
9659 gc2 = ( seq[j][k] == '-' );
9694 score += 0.5 * (double)cob * penalty;
9695 score += (double)amino_dis[(unsigned char)seq[i][k]][(unsigned char)seq[j][k]];
9696 nglen += ( !gc1 * !gc2 ); /* tsukawanai */
9699 reporterr( "i = %d, j=%d\n", i+1, j+1 );
9700 reporterr( "score = %f\n", score );
9703 return( (double)score );
9708 double DSPscore( int s, char **seq ) /* method 3 deha nai */
9712 int len = strlen( seq[0] );
9715 char *mseq1, *mseq2;
9723 for( i=0; i<s-1; i++ )
9725 for( j=i+1; j<s; j++ )
9730 for( k=0; k<len; k++ )
9732 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
9733 tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
9735 if( mseq1[k] == '-' )
9737 tmpscore += penalty;
9738 while( mseq1[++k] == '-' )
9739 tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
9741 if( k > len-2 ) break;
9744 if( mseq2[k] == '-' )
9746 tmpscore += penalty;
9747 while( mseq2[++k] == '-' )
9748 tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
9750 if( k > len-2 ) break;
9754 score += (double)tmpscore;
9762 #define SEGMENTSIZE 150
9764 int searchAnchors( int nseq, char **seq, Segment *seg )
9772 static double *stra = NULL;
9773 static int alloclen = 0;
9775 static double threshold;
9777 len = strlen( seq[0] );
9778 if( alloclen < len )
9782 FreeDoubleVec( stra );
9786 threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize;
9788 stra = AllocateDoubleVec( len );
9792 for( i=0; i<len; i++ )
9796 for( k=0; k<kcyc; k++ ) for( j=k+1; j<nseq; j++ )
9797 stra[i] += n_dis[(int)amino_n[(unsigned char)seq[k][i]]][(int)amino_n[(unsigned char)seq[j][i]]];
9798 stra[i] /= (double)nseq * ( nseq-1 ) / 2;
9801 (seg+0)->skipForeward = 0;
9802 (seg+1)->skipBackward = 0;
9806 length = 0; /* modified at 01/09/11 */
9807 for( j=0; j<divWinSize; j++ ) score += stra[j];
9808 for( i=1; i<len-divWinSize; i++ )
9810 score = score - stra[i-1] + stra[i+divWinSize-1];
9812 reporterr( "%d %f ? %f", i, score, threshold );
9813 if( score > threshold ) reporterr( "YES\n" );
9814 else reporterr( "NO\n" );
9817 if( score > threshold )
9829 if( score <= threshold || length > SEGMENTSIZE )
9834 seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
9835 seg->score = cumscore;
9837 reporterr( "%d-%d length = %d\n", seg->start, seg->end, length );
9839 if( length > SEGMENTSIZE )
9841 (seg+0)->skipForeward = 1;
9842 (seg+1)->skipBackward = 1;
9846 (seg+0)->skipForeward = 0;
9847 (seg+1)->skipBackward = 0;
9854 if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!");
9861 seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
9862 seg->score = cumscore;
9864 reporterr( "%d-%d length = %d\n", seg->start, seg->end, length );
9871 void dontcalcimportance_target( int nseq, double *eff, char **seq, LocalHom **localhom, int ntarget )
9877 nogaplen = AllocateIntVec( nseq );
9879 for( i=0; i<nseq; i++ )
9881 nogaplen[i] = seqlen( seq[i] );
9882 // reporterr( "nogaplen[%d] = %d\n", i, nogaplen[i] );
9885 for( i=0; i<ntarget; i++ )
9887 for( j=0; j<nseq; j++ )
9889 for( ptr=localhom[i]+j; ptr; ptr=ptr->next )
9891 // reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr );
9893 ptr->importance = ptr->opt / ptr->overlapaa;
9894 // ptr->fimportance = (double)ptr->importance;
9896 ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] );
9903 void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
9909 nogaplen = AllocateIntVec( nseq );
9911 for( i=0; i<nseq; i++ )
9913 nogaplen[i] = seqlen( seq[i] );
9914 // reporterr( "nogaplen[%d] = %d\n", i, nogaplen[i] );
9917 for( i=0; i<nseq; i++ )
9919 for( j=0; j<nseq; j++ )
9921 for( ptr=localhom[i]+j; ptr; ptr=ptr->next )
9923 // reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr );
9925 ptr->importance = ptr->opt / ptr->overlapaa;
9926 // ptr->fimportance = (double)ptr->importance;
9928 ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] );
9936 void dontcalcimportance_firstone( int nseq, double *eff, char **seq, LocalHom **localhom )
9943 nogaplen = AllocateIntVec( nseq );
9944 for( i=0; i<nseq; i++ )
9946 nogaplen[i] = seqlen( seq[i] );
9947 // reporterr( "nogaplen[%d] = %d\n", i, nogaplen[i] );
9952 for( i=0; i<nseq1; i++ )
9956 for( ptr=localhom[i]+j; ptr; ptr=ptr->next )
9958 // reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr );
9960 // ptr->importance = ptr->opt / ptr->overlapaa;
9961 ptr->importance = ptr->opt * 0.5; // tekitou
9962 // ptr->fimportance = (double)ptr->importance;
9963 // reporterr( "i=%d, j=%d, importance = %f, opt=%f\n", i, j, ptr->fimportance, ptr->opt );
9965 ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] );
9976 void calcimportance_target( int nseq, int ntarget, double *eff, char **seq, LocalHom **localhom, int *targetmap, int *targetmapr )
9978 int i, j, pos, len, ti, tj;
9979 double *importance; // static -> local, 2012/02/25
9981 double *ieff, totaleff; // counteff_simple_double ni utsusu kamo
9982 int *nogaplen; // static -> local, 2012/02/25
9985 importance = AllocateDoubleVec( nlenmax );
9986 nogaplen = AllocateIntVec( nseq );
9987 ieff = AllocateDoubleVec( nseq );
9990 for( i=0; i<nseq; i++ )
9992 nogaplen[i] = seqlen( seq[i] );
9993 // reporterr( "nogaplen[] = %d\n", nogaplen[i] );
9994 if( nogaplen[i] == 0 ) ieff[i] = 0.0;
9995 else ieff[i] = eff[i];
9996 totaleff += ieff[i];
9998 for( i=0; i<nseq; i++ ) ieff[i] /= totaleff;
9999 for( i=0; i<nseq; i++ ) printf( "eff[%d] = %30.25f\n", i, ieff[i] );
10002 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
10004 tmpptr = localhom[i]+j;
10005 reporterr( "%d-%d\n", i, j );
10008 reporterr( "reg1=%d-%d, reg2=%d-%d, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt );
10009 } while( tmpptr=tmpptr->next );
10014 // for( i=0; i<nseq; i++ )
10015 for( ti=0; ti<ntarget; ti++ )
10017 i = targetmapr[ti];
10018 // reporterr( "i = %d\n", i );
10019 for( pos=0; pos<nlenmax; pos++ )
10020 importance[pos] = 0.0;
10021 for( j=0; j<nseq; j++ )
10023 if( i == j ) continue;
10024 // tmpptr = localhom[ti]+j;
10025 for( tmpptr = localhom[ti]+j; tmpptr; tmpptr=tmpptr->next )
10027 if( tmpptr->opt == -1 ) continue;
10028 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
10031 // if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos );
10032 importance[pos] += ieff[j];
10034 importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] );
10035 importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa;
10041 reporterr( "position specific importance of seq %d:\n", i );
10042 for( pos=0; pos<nlenmax; pos++ )
10043 reporterr( "%d: %f\n", pos, importance[pos] );
10046 for( j=0; j<nseq; j++ )
10048 // reporterr( "i=%d, j=%d\n", i, j );
10049 if( i == j ) continue;
10050 if( localhom[ti][j].opt == -1.0 ) continue;
10052 for( tmpptr = localhom[ti]+j; tmpptr; tmpptr=tmpptr->next )
10054 if( tmpptr->opt == -1.0 ) continue;
10057 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
10059 tmpdouble += importance[pos];
10063 tmpdouble /= (double)len;
10065 tmpptr->importance = tmpdouble * tmpptr->opt;
10066 // tmpptr->fimportance = (double)tmpptr->importance;
10071 for( tmpptr = localhom[ti]+j; tmpptr; tmpptr=tmpptr->next )
10073 if( tmpptr->opt == -1.0 ) continue;
10074 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
10076 tmpdouble += importance[pos];
10081 tmpdouble /= (double)len;
10083 for( tmpptr = localhom[ti]+j; tmpptr; tmpptr=tmpptr->next )
10085 if( tmpptr->opt == -1.0 ) continue;
10086 tmpptr->importance = tmpdouble * tmpptr->opt;
10087 // tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //
\e$B$J$+$C$?$3$H$K$9$k
\e(B
10091 // reporterr( "importance of match between %d - %d = %f\n", i, j, tmpdouble );
10096 printf( "before averaging:\n" );
10098 for( ti=0; ti<ntarget; ti++ ) for( j=0; j<nseq; j++ )
10100 i = targetmapr[ti];
10101 if( i == j ) continue;
10102 printf( "%d-%d\n", i, j );
10103 for( tmpptr = localhom[ti]+j; tmpptr; tmpptr=tmpptr->next )
10105 printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%30.25f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt );
10111 // reporterr( "average?\n" );
10112 // for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
10113 for( ti=0; ti<ntarget; ti++ ) for( tj=ti+1; tj<ntarget; tj++ )
10116 LocalHom *tmpptr1, *tmpptr2;
10118 i = targetmapr[ti];
10119 j = targetmapr[tj];
10120 // if( i == j ) continue;
10122 // reporterr( "i=%d, j=%d\n", i, j );
10124 tmpptr1 = localhom[ti]+j; tmpptr2 = localhom[tj]+i;
10125 for( ; tmpptr1 && tmpptr2; tmpptr1 = tmpptr1->next, tmpptr2 = tmpptr2->next)
10127 if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 )
10129 // reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt );
10132 // reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance );
10133 imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance );
10134 tmpptr1->importance = tmpptr2->importance = imp;
10135 // tmpptr1->fimportance = tmpptr2->fimportance = (double)imp;
10137 // reporterr( "## importance = %f\n", tmpptr1->importance );
10141 #if 0 // commented out, 2012/02/10
10142 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) )
10144 reporterr( "ERROR: i=%d, j=%d\n", i, j );
10150 for( ti=0; ti<ntarget; ti++ ) for( j=0; j<nseq; j++ )
10155 i = targetmapr[ti];
10156 if( i == j ) continue;
10157 if( targetmap[j] != -1 ) continue;
10159 // reporterr( "i=%d, j=%d\n", i, j );
10161 tmpptr1 = localhom[ti]+j;
10162 for( ; tmpptr1; tmpptr1 = tmpptr1->next )
10164 if( tmpptr1->opt == -1.0 )
10166 // reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt );
10169 // reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance );
10170 imp = 0.5 * ( tmpptr1->importance );
10171 // imp = 1.0 * ( tmpptr1->importance );
10172 tmpptr1->importance = imp;
10173 // tmpptr1->fimportance = (double)imp;
10175 // reporterr( "## importance = %f\n", tmpptr1->importance );
10179 #if 0 // commented out, 2012/02/10
10180 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) )
10182 reporterr( "ERROR: i=%d, j=%d\n", i, j );
10189 printf( "after averaging:\n" );
10191 for( ti=0; ti<ntarget; ti++ ) for( j=0; j<nseq; j++ )
10193 i = targetmapr[ti];
10194 if( i == j ) continue;
10195 printf( "%d-%d\n", i, j );
10196 for( tmpptr = localhom[ti]+j; tmpptr; tmpptr=tmpptr->next )
10199 printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt );
10204 free( importance );
10209 void calcimportance_half( int nseq, double *eff, char **seq, LocalHom **localhom )
10211 int i, j, pos, len;
10212 double *importance; // static -> local, 2012/02/25
10214 double *ieff, totaleff; // counteff_simple_double ni utsusu kamo
10215 int *nogaplen; // static -> local, 2012/02/25
10218 importance = AllocateDoubleVec( nlenmax );
10219 nogaplen = AllocateIntVec( nseq );
10220 ieff = AllocateDoubleVec( nseq );
10223 for( i=0; i<nseq; i++ )
10225 nogaplen[i] = seqlen( seq[i] );
10226 // reporterr( "nogaplen[] = %d\n", nogaplen[i] );
10227 if( nogaplen[i] == 0 ) ieff[i] = 0.0;
10228 else ieff[i] = eff[i];
10229 totaleff += ieff[i];
10231 for( i=0; i<nseq; i++ ) ieff[i] /= totaleff;
10232 // for( i=0; i<nseq; i++ ) reporterr( "eff[%d] = %f\n", i, ieff[i] );
10235 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
10237 tmpptr = localhom[i]+j;
10238 reporterr( "%d-%d\n", i, j );
10241 reporterr( "reg1=%d-%d, reg2=%d-%d, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt );
10242 } while( tmpptr=tmpptr->next );
10247 for( i=0; i<nseq; i++ )
10249 // reporterr( "i = %d\n", i );
10250 for( pos=0; pos<nlenmax; pos++ )
10251 importance[pos] = 0.0;
10252 for( j=0; j<nseq; j++ )
10254 if( i == j ) continue;
10258 for( tmpptr = localhom[i]+j-i; tmpptr; tmpptr=tmpptr->next )
10260 if( tmpptr->opt == -1 ) continue;
10261 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
10264 // if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos );
10265 importance[pos] += ieff[j];
10267 importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] );
10268 importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa;
10275 for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next )
10277 if( tmpptr->opt == -1 ) continue;
10278 for( pos=tmpptr->start2; pos<=tmpptr->end2; pos++ )
10281 // if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos );
10282 importance[pos] += ieff[j];
10284 importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] );
10285 importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa;
10292 reporterr( "position specific importance of seq %d:\n", i );
10293 for( pos=0; pos<nlenmax; pos++ )
10294 reporterr( "%d: %f\n", pos, importance[pos] );
10297 for( j=0; j<nseq; j++ )
10299 // reporterr( "i=%d, j=%d\n", i, j );
10300 if( i == j ) continue;
10304 if( localhom[i][j-i].opt == -1.0 ) continue;
10306 for( tmpptr = localhom[i]+j-i; tmpptr; tmpptr=tmpptr->next )
10308 if( tmpptr->opt == -1.0 ) continue;
10311 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
10313 tmpdouble += importance[pos];
10317 tmpdouble /= (double)len;
10319 tmpptr->importance = tmpdouble * tmpptr->opt;
10320 // tmpptr->fimportance = (double)tmpptr->importance;
10325 if( localhom[j][i-j].opt == -1.0 ) continue;
10327 for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next )
10329 if( tmpptr->opt == -1.0 ) continue;
10332 for( pos=tmpptr->start2; pos<=tmpptr->end2; pos++ )
10334 tmpdouble += importance[pos];
10338 tmpdouble /= (double)len;
10340 tmpptr->rimportance = tmpdouble * tmpptr->opt;
10341 // tmpptr->fimportance = (double)tmpptr->importance;
10345 // reporterr( "importance of match between %d - %d = %f\n", i, j, tmpdouble );
10350 printf( "before averaging:\n" );
10352 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
10354 if( i == j ) continue;
10358 printf( "%d-%d\n", i, j );
10359 for( tmpptr = localhom[i]+j-i; tmpptr; tmpptr=tmpptr->next )
10361 printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt );
10366 printf( "%d-%d\n", i, j );
10367 for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next )
10369 printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start2, tmpptr->end2, tmpptr->start1, tmpptr->end1, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->rimportance, tmpptr->opt );
10376 // reporterr( "average?\n" );
10377 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
10382 // reporterr( "i=%d, j=%d\n", i, j );
10384 tmpptr1 = localhom[i]+j-i;
10385 for( ; tmpptr1; tmpptr1 = tmpptr1->next)
10387 if( tmpptr1->opt == -1.0 )
10389 // reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt );
10392 // reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance );
10393 imp = 0.5 * ( tmpptr1->importance + tmpptr1->rimportance );
10394 tmpptr1->importance = tmpptr1->rimportance = imp;
10395 // tmpptr1->fimportance = tmpptr2->fimportance = (double)imp;
10397 // reporterr( "## importance = %f\n", tmpptr1->importance );
10401 #if 0 // commented out, 2012/02/10
10402 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) )
10404 reporterr( "ERROR: i=%d, j=%d\n", i, j );
10411 printf( "after averaging:\n" );
10413 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
10415 if( i < j ) for( tmpptr = localhom[i]+j-i; tmpptr; tmpptr=tmpptr->next )
10417 if( tmpptr->end1 && tmpptr->start1 != -1 )
10418 printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt );
10420 else for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next )
10422 if( tmpptr->end2 && tmpptr->start2 != -1 )
10423 printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start2, tmpptr->end2, tmpptr->start1, tmpptr->end1, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt );
10428 free( importance );
10433 void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
10435 int i, j, pos, len;
10436 double *importance; // static -> local, 2012/02/25
10438 double *ieff, totaleff; // counteff_simple_double ni utsusu kamo
10439 int *nogaplen; // static -> local, 2012/02/25
10442 importance = AllocateDoubleVec( nlenmax );
10443 nogaplen = AllocateIntVec( nseq );
10444 ieff = AllocateDoubleVec( nseq );
10447 for( i=0; i<nseq; i++ )
10449 nogaplen[i] = seqlen( seq[i] );
10450 // reporterr( "nogaplen[] = %d\n", nogaplen[i] );
10451 if( nogaplen[i] == 0 ) ieff[i] = 0.0;
10452 else ieff[i] = eff[i];
10453 totaleff += ieff[i];
10455 for( i=0; i<nseq; i++ ) ieff[i] /= totaleff;
10456 // for( i=0; i<nseq; i++ ) reporterr( "eff[%d] = %f\n", i, ieff[i] );
10459 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
10461 tmpptr = localhom[i]+j;
10462 reporterr( "%d-%d\n", i, j );
10465 reporterr( "reg1=%d-%d, reg2=%d-%d, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt );
10466 } while( tmpptr=tmpptr->next );
10471 for( i=0; i<nseq; i++ )
10473 // reporterr( "i = %d\n", i );
10474 for( pos=0; pos<nlenmax; pos++ )
10475 importance[pos] = 0.0;
10476 for( j=0; j<nseq; j++ )
10478 if( i == j ) continue;
10479 tmpptr = localhom[i]+j;
10480 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
10482 if( tmpptr->opt == -1 ) continue;
10483 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
10486 // if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos );
10487 importance[pos] += ieff[j];
10489 importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] );
10490 importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa;
10496 reporterr( "position specific importance of seq %d:\n", i );
10497 for( pos=0; pos<nlenmax; pos++ )
10498 reporterr( "%d: %f\n", pos, importance[pos] );
10501 for( j=0; j<nseq; j++ )
10503 // reporterr( "i=%d, j=%d\n", i, j );
10504 if( i == j ) continue;
10505 if( localhom[i][j].opt == -1.0 ) continue;
10507 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
10509 if( tmpptr->opt == -1.0 ) continue;
10512 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
10514 tmpdouble += importance[pos];
10518 tmpdouble /= (double)len;
10520 tmpptr->importance = tmpdouble * tmpptr->opt;
10521 // tmpptr->fimportance = (double)tmpptr->importance;
10526 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
10528 if( tmpptr->opt == -1.0 ) continue;
10529 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
10531 tmpdouble += importance[pos];
10536 tmpdouble /= (double)len;
10538 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
10540 if( tmpptr->opt == -1.0 ) continue;
10541 tmpptr->importance = tmpdouble * tmpptr->opt;
10542 // tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //
\e$B$J$+$C$?$3$H$K$9$k
\e(B
10546 // reporterr( "importance of match between %d - %d = %f\n", i, j, tmpdouble );
10551 printf( "before averaging:\n" );
10553 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
10555 if( i == j ) continue;
10556 printf( "%d-%d\n", i, j );
10557 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
10559 printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt );
10565 // reporterr( "average?\n" );
10566 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
10569 LocalHom *tmpptr1, *tmpptr2;
10571 // reporterr( "i=%d, j=%d\n", i, j );
10573 tmpptr1 = localhom[i]+j; tmpptr2 = localhom[j]+i;
10574 for( ; tmpptr1 && tmpptr2; tmpptr1 = tmpptr1->next, tmpptr2 = tmpptr2->next)
10576 if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 )
10578 // reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt );
10581 // reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance );
10582 imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance );
10583 tmpptr1->importance = tmpptr2->importance = imp;
10584 // tmpptr1->fimportance = tmpptr2->fimportance = (double)imp;
10586 // reporterr( "## importance = %f\n", tmpptr1->importance );
10590 #if 0 // commented out, 2012/02/10
10591 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) )
10593 reporterr( "ERROR: i=%d, j=%d\n", i, j );
10600 printf( "after averaging:\n" );
10602 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
10604 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
10606 if( tmpptr->end1 && tmpptr->start1 != -1 )
10607 printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt );
10612 free( importance );
10620 static void addlocalhom2_e( LocalHom *pt, LocalHom *lh, int sti, int stj, int eni, int enj, double opt, int overlp, int interm )
10622 // dokka machigatteru
10623 if( pt != lh ) // susumeru
10625 pt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
10630 else // sonomamatsukau
10635 // reporterr( "in addlocalhom2_e, pt = %p, pt->next = %p, interm=%d, sti-eni-stj-enj=%d %d %d %d\n", pt, pt->next, interm, sti, eni, stj, enj );
10642 pt->extended = interm;
10643 pt->overlapaa = overlp;
10645 reporterr( "i: %d-%d\n", sti, eni );
10646 reporterr( "j: %d-%d\n", stj, enj );
10647 reporterr( "opt=%f\n", opt );
10648 reporterr( "overlp=%d\n", overlp );
10652 void extendlocalhom2( int nseq, LocalHom **localhom, double **dist )
10656 int pi, pj, pk, len;
10657 int status, sti, stj;
10660 static int *ini = NULL;
10661 static int *inj = NULL;
10664 sti = 0; // by D.Mathog, a guess
10665 stj = 0; // by D.Mathog, a guess
10669 ini = AllocateIntVec( nlenmax+1 );
10670 inj = AllocateIntVec( nlenmax+1 );
10674 for( i=0; i<nseq-1; i++ )
10676 for( j=i+1; j<nseq; j++ )
10679 for( k=0; k<nseq; k++ ) sai[k] = 0;
10683 k = (int)( rnd() * nseq );
10684 if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii
10685 if( numint-- == 0 ) break;
10686 if( sai[k] ) continue;
10689 for( k=0; k<nseq; k++ )
10692 // reporterr( "i=%d, j=%d, k=%d, dists = %f,%f,%f thrinter=%f\n", i, j, k, dist[i][j], dist[MIN(i,k)][MAX(i,k)], dist[MIN(j,k)][MAX(j,k)], thrinter );
10693 if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii
10694 if( dist[MIN(i,k)][MAX(i,k)] > dist[i][j] * thrinter || dist[MIN(j,k)][MAX(j,k)] > dist[i][j] * thrinter ) continue;
10695 ipt = ini; co = nlenmax+1;
10696 while( co-- ) *ipt++ = -1;
10697 ipt = inj; co = nlenmax+1;
10698 while( co-- ) *ipt++ = -1;
10702 for( pt=localhom[i]+k; pt; pt=pt->next )
10704 // reporterr( "i=%d,k=%d,st1:st2=%d:%d,pt=%p,extended=%p\n", i, k, pt->start1, pt->start2, pt, pt->extended );
10705 if( pt->opt == -1 )
10707 reporterr( "opt kainaide tbfast.c = %f\n", pt->opt );
10709 if( pt->extended > -1 ) break;
10712 len = pt->end1 - pt->start1 + 1;
10714 while( len-- ) *ipt++ = pi++;
10719 for( pt=localhom[j]+k; pt; pt=pt->next )
10721 if( pt->opt == -1 )
10723 reporterr( "opt kainaide tbfast.c = %f\n", pt->opt );
10725 if( pt->extended > -1 ) break;
10728 len = pt->end1 - pt->start1 + 1;
10730 while( len-- ) *ipt++ = pj++;
10734 reporterr( "i=%d,j=%d,k=%d\n", i, j, k );
10736 for( pk = 0; pk < nlenmax; pk++ )
10738 if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
10739 reporterr( " %d", inj[pk] );
10743 reporterr( "i=%d,j=%d,k=%d\n", i, j, k );
10745 for( pk = 0; pk < nlenmax; pk++ )
10747 if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
10748 reporterr( " %d", ini[pk] );
10754 for( pk = 0; pk < plim; pk++ )
10755 if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
10760 for( pk=0; pk<plim; pk++ )
10762 // reporterr( "%d %d: %d-%d\n", i, j, ini[pk], inj[pk] );
10765 if( ini[pk] == -1 || inj[pk] == -1 || ini[pk-1] != ini[pk] - 1 || inj[pk-1] != inj[pk] - 1 ) // saigonoshori
10768 // reporterr( "end here!\n" );
10770 pt = localhom[i][j].last;
10771 // reporterr( "in ex (ba), pt = %p, nokori=%d, i,j,k=%d,%d,%d\n", pt, localhom[i][j].nokori, i, j, k );
10772 addlocalhom2_e( pt, localhom[i]+j, sti, stj, ini[pk-1], inj[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k );
10773 // reporterr( "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next );
10775 pt = localhom[j][i].last;
10776 // reporterr( "in ex (ba), pt = %p, pt->next = %p\n", pt, pt->next );
10777 // reporterr( "in ex (ba), pt = %p, pt->next = %p, k=%d\n", pt, pt->next, k );
10778 addlocalhom2_e( pt, localhom[j]+i, stj, sti, inj[pk-1], ini[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k );
10779 // reporterr( "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next );
10782 if( !status ) // else deha arimasenn.
10784 if( ini[pk] == -1 || inj[pk] == -1 ) continue;
10787 // reporterr( "start here!\n" );
10791 // if( status ) reporterr( "end here\n" );
10794 // fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next );
10797 for( pt=localhomtable[i]+j; pt; pt=pt->next )
10799 if( tmpptr->opt == -1.0 ) continue;
10800 fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next );
10807 int makelocal( char *s1, char *s2, int thr )
10809 int start, maxstart, maxend;
10817 maxend = 0; // by D.Mathog, a guess
10819 // reporterr( "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 );
10826 // reporterr( "*pt1 = %c*pt2 = %c\n", *pt1, *pt2 );
10827 if( *pt1 == '-' || *pt2 == '-' )
10829 // reporterr( "penalty = %d\n", penalty );
10831 while( *pt1 == '-' || *pt2 == '-' )
10838 score += ( amino_dis[(unsigned char)*pt1++][(unsigned char)*pt2++] - thr );
10839 // score += ( amino_dis[(int)*pt1++][(int)*pt2++] );
10840 if( score > maxscore )
10842 // reporterr( "score = %f\n", score );
10845 // reporterr( "## max! maxstart = %d, start = %d\n", maxstart, start );
10849 // reporterr( "## resetting, start = %d, maxstart = %d\n", start, maxstart );
10850 if( start == maxstart )
10853 // reporterr( "maxend = %d\n", maxend );
10859 if( start == maxstart )
10860 maxend = pt1 - s1 - 1;
10862 // reporterr( "maxstart = %d, maxend = %d, maxscore = %f\n", maxstart, maxend, maxscore );
10865 return( maxstart );
10868 void resetlocalhom( int nseq, LocalHom **lh )
10873 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
10875 for( pt=lh[i]+j; pt; pt=pt->next )
10881 void gapireru( char *res, char *ori, char *gt )
10884 while( (g = *gt++) )
10888 *res++ = *newgapstr;
10898 void getkyokaigap( char *g, char **s, int pos, int n )
10901 // while( n-- ) *g++ = '-';
10902 while( n-- ) *g++ = (*s++)[pos];
10904 // reporterr( "bk = %s\n", bk );
10907 void new_OpeningGapCount( double *ogcp, int clus, char **seq, double *eff, int len, char *sgappat )
10914 for( i=0; i<len+1; i++ ) ogcp[i] = 0.0;
10915 for( j=0; j<clus; j++ )
10917 feff = (double)eff[j];
10918 gc = ( sgappat[j] == '-' );
10919 for( i=0; i<len; i++ )
10922 gc = ( seq[j][i] == '-' );
10923 if( !gb * gc ) ogcp[i] += feff;
10936 while( i-- ) *fpt++ = 0.0;
10937 for( j=0; j<clus; j++ )
10939 feff = (double)eff[j];
10942 gc = ( sgappat[j] == '-' );
10947 gc = ( *spt++ == '-' );
10949 if( !gb * gc ) *fpt += feff;
10956 void new_OpeningGapCount_zure( double *ogcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
10963 for( i=0; i<len+1; i++ ) ogcp[i] = 0.0;
10964 for( j=0; j<clus; j++ )
10966 feff = (double)eff[j];
10967 gc = ( sgappat[j] == '-' );
10968 for( i=0; i<len; i++ )
10971 gc = ( seq[j][i] == '-' );
10972 if( !gb * gc ) ogcp[i] += feff;
10976 gc = ( egappat[j] == '-' );
10977 if( !gb * gc ) ogcp[i] += feff;
10990 while( i-- ) *fpt++ = 0.0;
10991 for( j=0; j<clus; j++ )
10993 feff = (double)eff[j];
10996 gc = ( sgappat[j] == '-' );
11001 gc = ( *spt++ == '-' );
11003 if( !gb * gc ) *fpt += feff;
11009 gc = ( egappat[j] == '-' );
11010 if( !gb * gc ) *fpt += feff;
11016 void new_FinalGapCount_zure( double *fgcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
11022 for( i=0; i<len+1; i++ ) fgcp[i] = 0.0;
11023 for( j=0; j<clus; j++ )
11025 feff = (double)eff[j];
11026 gc = ( sgappat[j] == '-' );
11027 for( i=0; i<len; i++ )
11030 gc = ( seq[j][i] == '-' );
11032 if( gb * !gc ) fgcp[i] += feff;
11037 gc = ( egappat[j] == '-' );
11039 if( gb * !gc ) fgcp[len] += feff;
11053 while( i-- ) *fpt++ = 0.0;
11054 for( j=0; j<clus; j++ )
11056 feff = (double)eff[j];
11059 gc = ( sgappat[j] == '-' );
11064 gc = ( *spt++ == '-' );
11066 if( gb * !gc ) *fpt += feff;
11072 gc = ( egappat[j] == '-' );
11074 if( gb * !gc ) *fpt += feff;
11080 void new_FinalGapCount( double *fgcp, int clus, char **seq, double *eff, int len, char *egappat )
11086 for( i=0; i<len; i++ ) fgcp[i] = 0.0;
11087 for( j=0; j<clus; j++ )
11089 feff = (double)eff[j];
11090 gc = ( seq[j][0] == '-' );
11091 for( i=1; i<len; i++ )
11094 gc = ( seq[j][i] == '-' );
11096 if( gb * !gc ) fgcp[i-1] += feff;
11101 gc = ( egappat[j] == '-' );
11103 if( gb * !gc ) fgcp[len-1] += feff;
11117 while( i-- ) *fpt++ = 0.0;
11118 for( j=0; j<clus; j++ )
11120 feff = (double)eff[j];
11123 gc = ( *spt == '-' );
11128 gc = ( *++spt == '-' );
11130 if( gb * !gc ) *fpt += feff;
11136 gc = ( egappat[j] == '-' );
11138 if( gb * !gc ) *fpt += feff;
11145 void st_OpeningGapAdd( double *ogcp, int clus, char **seq, double *eff, int len )
11150 int newmem = clus-1;
11151 double neweff = eff[newmem];
11152 double orieff = 1.0 - neweff;
11157 // while( i-- ) *fpt++ = 0.0;
11160 // for( j=0; j<clus; j++ )
11162 feff = (double)eff[j];
11170 gc = ( *spt++ == '-' );
11172 if( !gb * gc ) *fpt += feff;
11179 for( i=0; i<len; i++ )
11180 reporterr( "ogcp[%d]=%f\n", i, ogcp[i] );
11181 for( i=0; i<clus; i++ )
11182 reporterr( "%s\n", seq[i] );
11187 void st_OpeningGapCount( double *ogcp, int clus, char **seq, double *eff, int len )
11196 while( i-- ) *fpt++ = 0.0;
11197 for( j=0; j<clus; j++ )
11199 feff = (double)eff[j];
11208 gc = ( *spt++ == '-' );
11210 if( !gb * gc ) *fpt += feff;
11218 void st_FinalGapCount_zure( double *fgcp, int clus, char **seq, double *eff, int len )
11227 while( i-- ) *fpt++ = 0.0;
11228 for( j=0; j<clus; j++ )
11230 feff = (double)eff[j];
11233 gc = ( *spt == '-' );
11235 // for( i=1; i<len; i++ )
11239 gc = ( *++spt == '-' );
11241 if( gb * !gc ) *fpt += feff;
11250 if( gb * !gc ) *fpt += feff;
11256 void st_FinalGapAdd( double *fgcp, int clus, char **seq, double *eff, int len )
11261 int newmem = clus-1;
11262 double neweff = eff[newmem];
11263 double orieff = 1.0 - neweff;
11268 // while( i-- ) *fpt++ = 0.0;
11271 // for( j=0; j<clus; j++ )
11273 feff = (double)eff[j];
11276 gc = ( *spt == '-' );
11278 // for( i=1; i<len; i++ )
11283 gc = ( *++spt == '-' );
11285 if( gb * !gc ) *fpt += feff;
11295 if( gb * !gc ) *fpt += feff;
11301 void st_FinalGapCount( double *fgcp, int clus, char **seq, double *eff, int len )
11310 while( i-- ) *fpt++ = 0.0;
11311 for( j=0; j<clus; j++ )
11313 feff = (double)eff[j];
11316 gc = ( *spt == '-' );
11318 // for( i=1; i<len; i++ )
11322 gc = ( *++spt == '-' );
11324 if( gb * !gc ) *fpt += feff;
11333 if( gb * !gc ) *fpt += feff;
11339 void getGapPattern( double *fgcp, int clus, char **seq, double *eff, int len, char *xxx )
11348 while( i-- ) *fpt++ = 0.0;
11349 for( j=0; j<clus; j++ )
11351 feff = (double)eff[j];
11354 gc = ( *spt == '-' );
11359 gc = ( *++spt == '-' );
11361 if( gb * !gc ) *fpt += feff;
11368 gc = ( egappat[j] == '-' );
11370 if( gb * !gc ) *fpt += feff;
11375 for( j=0; j<len; j++ )
11377 reporterr( "%d, %f\n", j, fgcp[j] );
11381 void getdigapfreq_st( double *freq, int clus, char **seq, double *eff, int len )
11385 for( i=0; i<len+1; i++ ) freq[i] = 0.0;
11386 for( i=0; i<clus; i++ )
11389 if( 0 && seq[i][0] == '-' ) // machigai kamo
11391 for( j=1; j<len; j++ )
11393 if( seq[i][j] == '-' && seq[i][j-1] == '-' )
11396 if( 0 && seq[i][len-1] == '-' )
11399 // reporterr( "\ndigapf = \n" );
11400 // for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
11403 void getdiaminofreq_x( double *freq, int clus, char **seq, double *eff, int len )
11407 for( i=0; i<len+2; i++ ) freq[i] = 0.0;
11408 for( i=0; i<clus; i++ )
11411 if( seq[i][0] != '-' ) // tadashii
11413 for( j=1; j<len; j++ )
11415 if( seq[i][j] != '-' && seq[i][j-1] != '-' )
11418 if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
11421 // reporterr( "\ndiaaf = \n" );
11422 // for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
11425 void getdiaminofreq_st( double *freq, int clus, char **seq, double *eff, int len )
11429 for( i=0; i<len+1; i++ ) freq[i] = 0.0;
11430 for( i=0; i<clus; i++ )
11433 if( seq[i][0] != '-' )
11435 for( j=1; j<len; j++ )
11437 if( seq[i][j] != '-' && seq[i][j-1] != '-' )
11440 // if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
11443 // reporterr( "\ndiaaf = \n" );
11444 // for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
11447 void getdigapfreq_part( double *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
11451 for( i=0; i<len+2; i++ ) freq[i] = 0.0;
11452 for( i=0; i<clus; i++ )
11455 // if( seq[i][0] == '-' )
11456 if( seq[i][0] == '-' && sgappat[i] == '-' )
11458 for( j=1; j<len; j++ )
11460 if( seq[i][j] == '-' && seq[i][j-1] == '-' )
11463 // if( seq[i][len] == '-' && seq[i][len-1] == '-' ) // xxx wo tsukawanaitoki arienai
11464 if( egappat[i] == '-' && seq[i][len-1] == '-' )
11467 // reporterr( "\ndigapf = \n" );
11468 // for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
11471 void getdiaminofreq_part( double *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
11475 for( i=0; i<len+2; i++ ) freq[i] = 0.0;
11476 for( i=0; i<clus; i++ )
11479 if( seq[i][0] != '-' && sgappat[i] != '-' )
11481 for( j=1; j<len; j++ )
11483 if( seq[i][j] != '-' && seq[i][j-1] != '-' )
11486 // if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
11487 if( egappat[i] != '-' && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
11490 // reporterr( "\ndiaaf = \n" );
11491 // for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
11494 void getgapfreq_zure_part( double *freq, int clus, char **seq, double *eff, int len, char *sgap )
11498 for( i=0; i<len+2; i++ ) freq[i] = 0.0;
11499 for( i=0; i<clus; i++ )
11502 if( sgap[i] == '-' )
11504 for( j=0; j<len; j++ )
11506 if( seq[i][j] == '-' )
11509 // if( egap[i] == '-' )
11510 // freq[len+1] += feff;
11512 // reporterr( "\ngapf = \n" );
11513 // for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
11516 void getgapfreq_zure( double *freq, int clus, char **seq, double *eff, int len )
11520 for( i=0; i<len+1; i++ ) freq[i] = 0.0;
11521 for( i=0; i<clus; i++ )
11524 for( j=0; j<len; j++ )
11526 if( seq[i][j] == '-' )
11531 // reporterr( "\ngapf = \n" );
11532 // for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
11535 void getgapfreq( double *freq, int clus, char **seq, double *eff, int len )
11539 for( i=0; i<len+1; i++ ) freq[i] = 0.0;
11540 for( i=0; i<clus; i++ )
11543 for( j=0; j<len; j++ )
11545 if( seq[i][j] == '-' )
11550 // reporterr( "\ngapf = \n" );
11551 // for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
11554 void st_getGapPattern( Gappat **pat, int clus, char **seq, double *eff, int len )
11556 int i, j, k, gb, gc;
11567 if( *fpt ) free( *fpt );
11571 for( j=0; j<clus; j++ )
11573 // reporterr( "seq[%d] = %s\n", j, seq[j] );
11574 feff = (double)eff[j];
11577 *fpt = NULL; // Falign.c kara yobareru tokiha chigau.
11582 for( i=0; i<len+1; i++ )
11585 // reporterr( "i=%d, gaplen = %d\n", i, gaplen );
11587 gc = ( i != len && *spt++ == '-' );
11596 if( *fpt ) for( ; (*fpt)[k].len != -1; k++ )
11598 if( (*fpt)[k].len == gaplen )
11600 // reporterr( "known\n" );
11608 *fpt = (Gappat *)realloc( *fpt, (k+3) * sizeof( Gappat ) ); // mae1 (total), ato2 (len0), term
11611 reporterr( "Cannot allocate gappattern!'n" );
11612 reporterr( "Use an approximate method, with the --mafft5 option.\n" );
11615 (*fpt)[k].freq = 0.0;
11616 (*fpt)[k].len = gaplen;
11617 (*fpt)[k+1].len = -1;
11618 (*fpt)[k+1].freq = 0.0; // iranai
11619 // reporterr( "gaplen=%d, Unknown, %f\n", gaplen, (*fpt)[k].freq );
11622 // reporterr( "adding pos %d, len=%d, k=%d, freq=%f->", i, gaplen, k, (*fpt)[k].freq );
11623 (*fpt)[k].freq += feff;
11624 // reporterr( "%f\n", (*fpt)[k].freq );
11632 for( j=0; j<len+1; j++ )
11636 // reporterr( "j=%d\n", j );
11637 // for( i=1; pat[j][i].len!=-1; i++ )
11638 // reporterr( "pos=%d, i=%d, len=%d, freq=%f\n", j, i, pat[j][i].len, pat[j][i].freq );
11640 pat[j][0].len = 0; // iminashi
11641 pat[j][0].freq = 0.0;
11642 for( i=1; pat[j][i].len!=-1;i++ )
11644 pat[j][0].freq += pat[j][i].freq;
11645 // reporterr( "totaling, i=%d, result = %f\n", i, pat[j][0].freq );
11647 // reporterr( "totaled, result = %f\n", pat[j][0].freq );
11649 pat[j][i].freq = 1.0 - pat[j][0].freq;
11650 pat[j][i].len = 0; // imiari
11651 pat[j][i+1].len = -1;
11655 pat[j] = (Gappat *)calloc( 3, sizeof( Gappat ) );
11656 pat[j][0].freq = 0.0;
11657 pat[j][0].len = 0; // iminashi
11659 pat[j][1].freq = 1.0 - pat[j][0].freq;
11660 pat[j][1].len = 0; // imiari
11661 pat[j][2].len = -1;
11667 static int minimum( int i1, int i2 )
11669 return MIN( i1, i2 );
11672 static void commongappickpairfast( char *r1, char *r2, char *i1, char *i2, int *skip1, int *skip2 )
11674 // char *i1bk = i1;
11675 int skip, skipped1, skipped2;
11676 // int skip, skipped1, skipped2, scand1, scand2;
11677 skipped1 = skipped2 = 0;
11678 // reporterr("\n");
11682 // fprintf( stderr, "i1 pos =%d\n", (int)(i1- i1bk) );
11683 // reporterr( "\nSkip cand %d-%d\n", *skip1-skipped1, *skip2-skipped2 );
11685 scand1 = *skip1-skipped1;
11686 scand2 = *skip2-skipped2;
11687 skip = MIN( scand1, scand2 );
11689 skip = minimum( *skip1-skipped1, *skip2-skipped2 );
11691 // reporterr( "Skip %d\n", skip );
11696 // fprintf( stderr, "i1 pos =%d, nlenmax=%d\n", (int)(i1- i1bk), nlenmax );
11698 // reporterr( "%d, %c-%c\n", i1-i1bk, *i1, *i2 );
11699 // if( *i1 == '-' && *i2 == '-' ) // iranai?
11701 // reporterr( "Error in commongappickpairfast" );
11727 static void commongappickpair( char *r1, char *r2, char *i1, char *i2 )
11729 // strcpy( r1, i1 );
11730 // strcpy( r2, i2 );
11731 // return; // not SP
11734 if( *i1 == '-' && *i2 == '-' )
11749 double naiveRpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
11757 char *p1, *p2, *p1p, *p2p;
11759 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
11761 deff = eff1[i] * eff2[j];
11762 // reporterr( "feff %d-%d = %f\n", i, j, feff );
11763 // reporterr( "i1 = %s\n", seq1[i] );
11764 // reporterr( "i2 = %s\n", seq2[j] );
11765 // reporterr( "s1 = %s\n", s1 );
11766 // reporterr( "s2 = %s\n", s2 );
11767 // reporterr( "penal = %d\n", penal );
11770 p1 = seq1[i]; p2 = seq2[j];
11772 if( *p1 == '-' && *p2 != '-' )
11774 if( *p1 != '-' && *p2 == '-' )
11776 // if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
11777 p1p = p1; p2p = p2;
11778 valf += (double)amino_dis[(unsigned char)*p1++][(unsigned char)*p2++] + 0.5 * pv;
11782 if( *p1p != '-' && *p2p != '-' )
11784 if( *p1 == '-' && *p2 != '-' )
11786 if( *p1 != '-' && *p2 == '-' )
11788 if( *p1 != '-' && *p2 != '-' )
11790 if( *p1 == '-' && *p2 == '-' )
11793 if( *p1p == '-' && *p2p == '-' )
11795 if( *p1 == '-' && *p2 != '-' )
11798 if( *p1 != '-' && *p2 == '-' )
11801 if( *p1 != '-' && *p2 != '-' )
11803 if( *p1 == '-' && *p2 == '-' )
11806 if( *p1p != '-' && *p2p == '-' )
11808 if( *p1 == '-' && *p2 != '-' )
11809 pv = penal * 2; // ??
11811 if( *p1 != '-' && *p2 == '-' )
11813 if( *p1 != '-' && *p2 != '-' )
11816 if( *p1 == '-' && *p2 == '-' )
11820 if( *p1p == '-' && *p2p != '-' )
11822 if( *p1 == '-' && *p2 != '-' )
11824 if( *p1 != '-' && *p2 == '-' )
11825 pv = penal * 2; // ??
11827 if( *p1 != '-' && *p2 != '-' )
11830 if( *p1 == '-' && *p2 == '-' )
11834 // reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
11835 // if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
11836 valf += amino_dis[(unsigned char)*p1++][(unsigned char)*p2++] + 0.5 * pv;
11839 // reporterr( "valf = %d\n", valf );
11840 val += deff * ( valf );
11842 reporterr( "val = %f\n", val );
11846 double naiveQpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
11853 char *p1, *p2, *p1p, *p2p;
11856 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
11858 deff = eff1[i] * eff2[j];
11859 // reporterr( "feff %d-%d = %f\n", i, j, feff );
11860 // reporterr( "i1 = %s\n", seq1[i] );
11861 // reporterr( "i2 = %s\n", seq2[j] );
11862 // reporterr( "s1 = %s\n", s1 );
11863 // reporterr( "s2 = %s\n", s2 );
11864 // reporterr( "penal = %d\n", penal );
11867 p1 = seq1[i]; p2 = seq2[j];
11869 if( *p1 == '-' && *p2 != '-' )
11871 if( *p1 != '-' && *p2 == '-' )
11873 // if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
11874 p1p = p1; p2p = p2;
11875 valf += (double)amino_dis[(unsigned char)*p1++][(unsigned char)*p2++] + 0.5 * pv;
11879 if( *p1p != '-' && *p2p != '-' )
11881 if( *p1 == '-' && *p2 != '-' )
11883 if( *p1 != '-' && *p2 == '-' )
11885 if( *p1 != '-' && *p2 != '-' )
11887 if( *p1 == '-' && *p2 == '-' )
11890 if( *p1p == '-' && *p2p == '-' )
11892 if( *p1 == '-' && *p2 != '-' )
11895 if( *p1 != '-' && *p2 == '-' )
11898 if( *p1 != '-' && *p2 != '-' )
11900 if( *p1 == '-' && *p2 == '-' )
11903 if( *p1p != '-' && *p2p == '-' )
11905 if( *p1 == '-' && *p2 != '-' )
11906 pv = penal * 2; // ??
11908 if( *p1 != '-' && *p2 == '-' )
11910 if( *p1 != '-' && *p2 != '-' )
11913 if( *p1 == '-' && *p2 == '-' )
11917 if( *p1p == '-' && *p2p != '-' )
11919 if( *p1 == '-' && *p2 != '-' )
11921 if( *p1 != '-' && *p2 == '-' )
11922 pv = penal * 2; // ??
11924 if( *p1 != '-' && *p2 != '-' )
11927 if( *p1 == '-' && *p2 == '-' )
11931 // reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
11932 // if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
11933 valf += amino_dis[(unsigned char)*p1++][(unsigned char)*p2++] + 0.5 * pv;
11936 // reporterr( "valf = %d\n", valf );
11937 val += deff * ( valf );
11939 reporterr( "val = %f\n", val );
11943 double naiveHpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
11949 // double feff = 0.0; // by D.Mathog, a guess
11951 char *p1, *p2, *p1p, *p2p;
11953 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
11955 deff = eff1[i] * eff2[j];
11956 // reporterr( "i1 = %s\n", seq1[i] );
11957 // reporterr( "i2 = %s\n", seq2[j] );
11958 // reporterr( "s1 = %s\n", s1 );
11959 // reporterr( "s2 = %s\n", s2 );
11960 // reporterr( "penal = %d\n", penal );
11963 p1 = seq1[i]; p2 = seq2[j];
11965 if( *p1 == '-' && *p2 != '-' )
11967 if( *p1 != '-' && *p2 == '-' )
11969 if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, (int)(p1-seq1[i]), (int)(p2-seq2[j]) );
11970 p1p = p1; p2p = p2;
11971 valf += (double)amino_dis[(unsigned char)*p1++][(unsigned char)*p2++] + 0.5 * pv;
11975 if( *p1p != '-' && *p2p != '-' )
11977 if( *p1 == '-' && *p2 != '-' )
11979 if( *p1 != '-' && *p2 == '-' )
11981 if( *p1 != '-' && *p2 != '-' )
11983 if( *p1 == '-' && *p2 == '-' )
11986 if( *p1p == '-' && *p2p == '-' )
11988 if( *p1 == '-' && *p2 != '-' )
11991 if( *p1 != '-' && *p2 == '-' )
11994 if( *p1 != '-' && *p2 != '-' )
11996 if( *p1 == '-' && *p2 == '-' )
11999 if( *p1p != '-' && *p2p == '-' )
12001 if( *p1 == '-' && *p2 != '-' )
12004 if( *p1 != '-' && *p2 == '-' )
12006 if( *p1 != '-' && *p2 != '-' )
12008 if( *p1 == '-' && *p2 == '-' )
12012 if( *p1p == '-' && *p2p != '-' )
12014 if( *p1 == '-' && *p2 != '-' )
12016 if( *p1 != '-' && *p2 == '-' )
12019 if( *p1 != '-' && *p2 != '-' )
12021 if( *p1 == '-' && *p2 == '-' )
12025 // reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
12026 // if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
12027 valf += amino_dis[(unsigned char)*p1++][(unsigned char)*p2++] + 0.5 * pv;
12030 // reporterr( "valf = %d\n", valf );
12031 val += deff * ( valf );
12033 reporterr( "val = %f\n", val );
12038 double naivepairscorefast( char *seq1, char *seq2, int *skip1, int *skip2, int penal )
12041 int len = strlen( seq1 );
12045 s1 = calloc( len+1, sizeof( char ) );
12046 s2 = calloc( len+1, sizeof( char ) );
12049 commongappickpairfast( s1, s2, seq1, seq2, skip1, skip2 );
12050 // commongappickpair( s1, s2, seq1, seq2 );
12051 // printf( "\n###s1 = %s\n", seq1 );
12052 // printf( "###s2 = %s\n", seq2 );
12053 // printf( "\n###i1 = %s\n", s1 );
12054 // printf( "###i2 = %s\n", s2 );
12055 // printf( "allocated size, len+1 = %d\n", len+1 );
12056 // printf( "###penal = %d\n", penal );
12063 // reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
12064 vali += (double)penal;
12065 // while( *p1 == '-' || *p2 == '-' )
12066 while( *p1 == '-' ) // SP
12075 // reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
12076 vali += (double)penal;
12077 // while( *p2 == '-' || *p1 == '-' )
12078 while( *p2 == '-' ) // SP
12085 // reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
12086 vali += (double)amino_dis[(unsigned char)*p1++][(unsigned char)*p2++];
12091 // reporterr( "###vali = %d\n", vali );
12095 double naivepairscore11_dynmtx( double **mtx, char *seq1, char *seq2, int penal )
12098 int len = strlen( seq1 );
12099 char *s1, *s2, *p1, *p2;
12103 s1 = calloc( len+1, sizeof( char ) );
12104 s2 = calloc( len+1, sizeof( char ) );
12107 commongappickpair( s1, s2, seq1, seq2 );
12108 // reporterr( "###i1 = %s\n", s1 );
12109 // reporterr( "###i2 = %s\n", s2 );
12110 // reporterr( "###penal = %d\n", penal );
12117 // reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
12118 vali += (double)penal;
12119 // while( *p1 == '-' || *p2 == '-' )
12120 while( *p1 == '-' ) // SP
12129 // reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
12130 vali += (double)penal;
12131 // while( *p2 == '-' || *p1 == '-' )
12132 while( *p2 == '-' ) // SP
12139 // reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
12140 c1 = amino_n[(unsigned char)*p1++];
12141 c2 = amino_n[(unsigned char)*p2++];
12142 vali += (double)mtx[c1][c2];
12147 // reporterr( "###vali = %d\n", vali );
12151 double naivepairscore11( char *seq1, char *seq2, int penal )
12154 int len = strlen( seq1 );
12155 char *s1, *s2, *p1, *p2;
12157 s1 = calloc( len+1, sizeof( char ) );
12158 s2 = calloc( len+1, sizeof( char ) );
12161 commongappickpair( s1, s2, seq1, seq2 );
12162 // reporterr( "###i1 = %s\n", s1 );
12163 // reporterr( "###i2 = %s\n", s2 );
12164 // reporterr( "###penal = %d\n", penal );
12171 // reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
12172 vali += (double)penal;
12173 // while( *p1 == '-' || *p2 == '-' )
12174 while( *p1 == '-' ) // SP
12183 // reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
12184 vali += (double)penal;
12185 // while( *p2 == '-' || *p1 == '-' )
12186 while( *p2 == '-' ) // SP
12193 // reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
12194 vali += (double)amino_dis[(unsigned char)*p1++][(unsigned char)*p2++];
12199 // reporterr( "###vali = %d\n", vali );
12203 double naivepairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
12210 int len = strlen( seq1[0] );
12211 char *s1, *s2, *p1, *p2;
12212 s1 = calloc( len+1, sizeof( char ) );
12213 s2 = calloc( len+1, sizeof( char ) );
12215 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
12218 feff = eff1[i] * eff2[j];
12219 // reporterr( "feff %d-%d = %f\n", i, j, feff );
12220 commongappickpair( s1, s2, seq1[i], seq2[j] );
12221 // reporterr( "i1 = %s\n", seq1[i] );
12222 // reporterr( "i2 = %s\n", seq2[j] );
12223 // reporterr( "s1 = %s\n", s1 );
12224 // reporterr( "s2 = %s\n", s2 );
12225 // reporterr( "penal = %d\n", penal );
12232 // reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
12234 // while( *p1 == '-' || *p2 == '-' )
12235 while( *p1 == '-' ) // SP
12244 // reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
12246 // while( *p2 == '-' || *p1 == '-' )
12247 while( *p2 == '-' ) // SP
12254 // reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
12255 vali += amino_dis[(unsigned char)*p1++][(unsigned char)*p2++];
12257 // reporterr( "vali = %d\n", vali );
12258 val += feff * vali;
12262 reporterr( "val = %f\n", val );
12267 double plainscore( int nseq, char **s )
12273 for( i=0; i<ilim; i++ ) for( j=i+1; j<nseq; j++ )
12275 v += (double)naivepairscore11( s[i], s[j], penalty );
12278 reporterr( "penalty = %d\n", penalty );
12284 int addonetip( int njobc, int ***topolc, double **lenc, double **iscorec, int ***topol, double **len, Treedep *dep, int treeout, Addtree *addtree, int iadd, char **name, int *alnleninnode, int *nogaplen, int noalign )
12286 int i, j, mem0, mem1, posinnew, m;
12289 double minscore, minscoreo, eff0, eff1, addedlen, tmpmin;
12290 int nearest, nearesto;
12293 int *additionaltopol;
12294 // double (*clusterfuncpt[1])(double,double);
12295 Bchain *ac, *acpt, *acori, *acnext, *acprev;
12297 char *neighborlist;
12299 int reflen, nearestnode, nogaplentoadd;
12300 int *topoldum0 = NULL;
12301 int *topoldum1 = NULL;
12304 int seqlengthcondition;
12305 double sueff1_double_local = 1.0 - sueff_global;
12306 double sueff05_double_local = sueff_global * 0.5;
12307 // char **tree; //static?
12308 // char *treetmp; //static?
12310 // for( i=0; i<njobc; i++ ) reporterr( "nogaplen of %d = %d\n", i+1, nogaplen[i] );
12314 // treetmp = AllocateCharVec( njob*150 );
12315 // tree = AllocateCharMtx( njob, njob*150 );
12317 // sueff1_double = 1.0 - sueff_global;
12318 // sueff05_double = sueff_global * 0.5;
12319 // if ( treemethod == 'X' )
12320 // clusterfuncpt[0] = cluster_mix_double;
12321 // else if ( treemethod == 'E' )
12322 // clusterfuncpt[0] = cluster_average_double;
12323 // else if ( treemethod == 'q' )
12324 // clusterfuncpt[0] = cluster_minimum_double;
12327 // reporterr( "Unknown treemethod, %c\n", treemethod );
12334 additionaltopol = (int *)calloc( 2, sizeof( int ) );
12335 leaf2node= (int *)calloc( norg, sizeof( int ) );
12338 neighborlist = calloc( norg * 30, sizeof( char ) );
12340 // for( i=0; i<njobc; i++ ) sprintf( tree[i], "%d", i+1 );
12343 reporterr( "Cannot allocate leaf2node.\n" );
12346 additionaltopol[0] = norg;
12347 additionaltopol[1] = -1;
12349 ac = (Bchain *)malloc( norg * sizeof( Bchain ) );
12350 for( i=0; i<norg; i++ )
12352 ac[i].next = ac+i+1;
12353 ac[i].prev = ac+i-1;
12356 ac[norg-1].next = NULL;
12359 acori = (Bchain *)malloc( 1 * sizeof( Bchain ) );
12362 ac[0].prev = acori;
12365 // for( i=0; i<nstep; i++ )
12367 // reporterr( "distfromtip = %f\n", dep[i].distfromtip );
12370 // for( i=0; i<norg; i++ )
12372 // reporterr( "disttofrag(%d,%d) = %f\n", i, njobc-1, iscorec[i][norg-i] );
12378 for( i=0; i<norg; i++ )
12380 tmpmin = iscorec[i][norg-i];
12381 if( minscore > tmpmin )
12387 nearesto = nearest;
12388 minscoreo = minscore;
12392 // for( i=0; i<njobc-1; i++ ) for( j=i+1; j<njobc; j++ )
12393 // reporterr( "iscorec[%d][%d] = %f\n", i, j, iscorec[i][j-i] );
12394 // reporterr( "nearest = %d\n", nearest+1 );
12395 // reporterr( "nearesto = %d\n", nearesto+1 );
12399 nogaplentoadd = nogaplen[norg];
12403 for( i=0; i<norg; i++ ) leaf2node[i] = -1;
12404 for( i=0; i<nstep; i++ )
12406 mem0 = topol[i][0][0];
12407 mem1 = topol[i][1][0];
12409 reporterr( "\n\nstep %d (old) \n", i );
12411 reporterr( "group0 = \n" );
12412 for( j=0; topol[i][0][j]>-1; j++ )
12414 reporterr( "%d ", topol[i][0][j]+1 );
12417 reporterr( "len=%f\n", len[i][0] );
12418 reporterr( "group1 = \n" );
12419 for( j=0; topol[i][1][j]>-1; j++ )
12421 reporterr( "%d ", topol[i][1][j]+1 );
12424 reporterr( "len=%f\n", len[i][1] );
12426 reporterr( "\n\n\nminscore = %f ? %f\n", minscore, dep[i].distfromtip*2 );
12427 reporterr( "i = %d\n", i );
12428 if( leaf2node[nearest] == -1 )
12430 reporterr( "nogaplen[nearest] = %d\n", nogaplen[nearest] );
12434 reporterr( "alnleninnode[leaf2node[nearest]] = %d\n", alnleninnode[leaf2node[nearest]] );
12435 reporterr( "leaf2node[nearest] = %d\n", leaf2node[nearest] );
12438 nearestnode = leaf2node[nearest];
12439 if( nearestnode == -1 )
12440 reflen = nogaplen[nearest];
12442 reflen = alnleninnode[nearestnode];
12443 // reflen = alnleninnode[i]; // BUG!!
12445 if( noalign ) seqlengthcondition = 1;
12446 else seqlengthcondition = ( nogaplentoadd <= reflen );
12448 //seqlengthcondition = 1; // CHUUI
12449 //seqlengthcondition = ( nogaplentoadd <= reflen ); // CHUUI
12451 if( repnorg == -1 && dep[i].distfromtip * 2 > minscore && seqlengthcondition ) // Keitouteki ichi ha fuseikaku.
12452 // if( repnorg == -1 && dep[i].distfromtip * 2 > minscore ) // Keitouteki ichi dake ga hitsuyouna baaiha kore wo tsukau.
12454 // reporterr( "INSERT HERE, %d-%d\n", nearest, norg );
12455 // reporterr( "nearest = %d\n", nearest );
12456 // reporterr( "\n\n\nminscore = %f\n", minscore );
12457 // reporterr( "distfromtip *2 = %f\n", dep[i].distfromtip * 2 );
12458 // reporterr( "nearest=%d, leaf2node[]=%d\n", nearest, leaf2node[nearest] );
12460 if( nearestnode == -1 )
12462 // reporterr( "INSERTING to 0!!!\n" );
12463 // reporterr( "lastlength = %d\n", nogaplen[norg] );
12464 // reporterr( "reflength = %d\n", nogaplen[nearest] );
12465 topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( 1 + 1 ) * sizeof( int ) );
12466 topolc[posinnew][0][0] = nearest;
12467 topolc[posinnew][0][1] = -1;
12469 addedlen = lenc[posinnew][0] = minscore / 2;
12474 // reporterr( "INSERTING to g, leaf2node = %d, cm=%d!!!\n", leaf2node[nearest], countmem(topol[leaf2node[nearest]][0] ) );
12475 // reporterr( "alnleninnode[i] = %d\n", alnleninnode[i] );
12476 // reporterr( "alnleninnode[leaf2node[nearest]] = %d\n", alnleninnode[leaf2node[nearest]] );
12478 topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( ( countmem( topol[nearestnode][0] ) + countmem( topol[nearestnode][1] ) + 1 ) * sizeof( int ) ) );
12479 // reporterr( "leaf2node[%d] = %d\n", nearest, leaf2node[nearest] );
12480 intcpy( topolc[posinnew][0], topol[nearestnode][0] );
12481 intcat( topolc[posinnew][0], topol[nearestnode][1] );
12482 // addedlen = lenc[posinnew][0] = minscore / 2 - len[nearestnode][0]; // bug!!
12483 addedlen = lenc[posinnew][0] = dep[i].distfromtip - minscore / 2; // 2014/06/10
12484 // fprintf( stderr, "addedlen = %f, dep[i].distfromtip = %f, len[nearestnode][0] = %f, minscore/2 = %f, lenc[posinnew][0] = %f\n", addedlen, dep[i].distfromtip, len[nearestnode][0], minscore/2, lenc[posinnew][0] );
12487 neighbor = lastmem( topolc[posinnew][0] );
12492 fp = fopen( "infile.tree", "a" ); // kyougou!!
12495 reporterr( "File error!\n" );
12498 fprintf( fp, "\n" );
12499 fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] );
12500 fprintf( fp, " nearest sequence: %d\n", nearest + 1 );
12501 fprintf( fp, " distance: %f\n", minscore );
12502 fprintf( fp, " cousin: " );
12503 for( j=0; topolc[posinnew][0][j]!=-1; j++ )
12504 fprintf( fp, "%d ", topolc[posinnew][0][j]+1 );
12505 fprintf( fp, "\n" );
12508 addtree[iadd].nearest = nearesto;
12509 addtree[iadd].dist1 = minscoreo;
12510 addtree[iadd].dist2 = minscore;
12511 neighborlist[0] = 0;
12512 npt = neighborlist;
12513 for( j=0; topolc[posinnew][0][j]!=-1; j++ )
12515 sprintf( npt, "%d ", topolc[posinnew][0][j]+1 );
12516 npt += strlen( npt );
12518 addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) );
12519 strcpy( addtree[iadd].neighbors, neighborlist );
12523 // reporterr( "INSERTING to 1!!!\n" );
12524 topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( 1 + 1 ) * sizeof( int ) );
12525 topolc[posinnew][1][0] = norg;
12526 topolc[posinnew][1][1] = -1;
12527 lenc[posinnew][1] = minscore / 2;
12529 // reporterr( "STEP %d (newnew)\n", posinnew );
12530 // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j]+1 );
12531 // reporterr( "\n len=%f\n", lenc[posinnew][0] );
12532 // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j]+1 );
12533 // reporterr( "\n len=%f\n", lenc[posinnew][1] );
12537 // reporterr( "STEP %d\n", posinnew );
12538 // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j] );
12539 // reporterr( "\n len=%f\n", lenc[i][0] );
12540 // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j] );
12541 // reporterr( "\n len=%f\n", lenc[i][1] );
12543 // im = topolc[posinnew][0][0];
12544 // jm = topolc[posinnew][1][0];
12545 // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[posinnew][0], tree[jm], lenc[posinnew][1] );
12546 // strcpy( tree[im], treetmp );
12551 // reporterr( "minscore = %f\n", minscore );
12552 // reporterr( "distfromtip = %f\n", dep[i].distfromtip );
12553 // reporterr( "Modify matrix, %d-%d\n", nearest, norg );
12554 eff0 = iscorec[mem0][norg-mem0];
12555 eff1 = iscorec[mem1][norg-mem1];
12557 // iscorec[mem0][norg-mem0] = (clusterfuncpt[0])( eff0, eff1 );
12558 iscorec[mem0][norg-mem0] = MIN( eff0, eff1 ) * sueff1_double_local + ( eff0 + eff1 ) * sueff05_double_local;
12559 iscorec[mem1][norg-mem1] = 9999.9; // sukoshi muda
12561 acprev = ac[mem1].prev;
12562 acnext = ac[mem1].next;
12563 acprev->next = acnext;
12564 if( acnext != NULL ) acnext->prev = acprev;
12566 if( ( nearest == mem1 || nearest == mem0 ) )
12569 // for( j=0; j<norg; j++ ) // sukoshi muda
12571 // if( minscore > iscorec[j][norg-j] )
12573 // minscore = iscorec[j][norg-j];
12577 // reporterr( "searching on modified ac " );
12578 for( acpt=acori->next; acpt!=NULL; acpt=acpt->next ) // sukoshi muda
12580 // reporterr( "." );
12582 tmpmin = iscorec[j][norg-j];
12583 if( minscore > tmpmin )
12589 // reporterr( "done\n" );
12592 // reporterr( "posinnew = %d\n", posinnew );
12595 if( topol[i][0][0] == repnorg )
12597 topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + 2 ) * sizeof( int ) );
12598 intcpy( topolc[posinnew][0], topol[i][0] );
12599 intcat( topolc[posinnew][0], additionaltopol );
12600 lenc[posinnew][0] = len[i][0] - addedlen; // 2014/6/10
12601 // fprintf( stderr, "i=%d, dep[i].distfromtip=%f\n", i, dep[i].distfromtip );
12602 // fprintf( stderr, "addedlen=%f, len[i][0]=%f, lenc[][0]=%f\n", addedlen, len[i][0], lenc[posinnew][0] );
12603 // fprintf( stderr, "lenc[][1] = %f\n", lenc[posinnew][0] );
12608 topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + 1 ) * sizeof( int ) );
12609 intcpy( topolc[posinnew][0], topol[i][0] );
12610 lenc[posinnew][0] = len[i][0];
12613 if( topol[i][1][0] == repnorg )
12615 topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( countmem( topol[i][1] ) + 2 ) * sizeof( int ) );
12616 intcpy( topolc[posinnew][1], topol[i][1] );
12617 intcat( topolc[posinnew][1], additionaltopol );
12618 lenc[posinnew][1] = len[i][1] - addedlen; // 2014/6/10
12619 // fprintf( stderr, "i=%d, dep[i].distfromtip=%f\n", i, dep[i].distfromtip );
12620 // fprintf( stderr, "addedlen=%f, len[i][1]=%f, lenc[][1]=%f\n", addedlen, len[i][1], lenc[posinnew][1] );
12621 // fprintf( stderr, "lenc[][1] = %f\n", lenc[posinnew][1] );
12624 repnorg = topolc[posinnew][0][0]; // juuyou
12628 topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( countmem( topol[i][1] ) + 1 ) * sizeof( int ) );
12629 intcpy( topolc[posinnew][1], topol[i][1] );
12630 lenc[posinnew][1] = len[i][1];
12633 // reporterr( "\nSTEP %d (new)\n", posinnew );
12634 // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j]+1 );
12635 // reporterr( "\n len=%f\n", lenc[posinnew][0] );
12636 // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j]+1 );
12637 // reporterr( "\n len=%f\n", lenc[posinnew][1] );
12639 // reporterr("\ni=%d\n####### leaf2node[nearest]= %d\n", i, leaf2node[nearest] );
12641 for( j=0; (m=topol[i][0][j])!=-1; j++ ) leaf2node[m] = i;
12642 for( j=0; (m=topol[i][1][j])!=-1; j++ ) leaf2node[m] = i;
12644 // reporterr("####### leaf2node[nearest]= %d\n", leaf2node[nearest] );
12646 // im = topolc[posinnew][0][0];
12647 // jm = topolc[posinnew][1][0];
12648 // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[posinnew][0], tree[jm], lenc[posinnew][1] );
12649 // strcpy( tree[im], treetmp );
12651 // reporterr( "%s\n", treetmp );
12659 topolo0 = topol[i][0];
12660 topolo1 = topol[i][1];
12665 // free( topol[i][0] );//?
12666 // free( topol[i][1] );//?
12667 // topol[i][0] = calloc( 2, sizeof( int ) );
12668 // topol[i][1] = calloc( 1, sizeof( int ) );
12669 // topol[i][0][0] = 0;
12670 // topol[i][0][1] = -1;
12671 // topol[i][1][0] = -1;
12673 topoldum0 = calloc( 2, sizeof( int ) );
12674 topoldum1 = calloc( 1, sizeof( int ) );
12679 topolo0 = topoldum0;
12680 topolo1 = topoldum1;
12682 if( repnorg == -1 )
12684 // topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + countmem( topol[i][1] ) + 1 ) * sizeof( int ) );
12685 // intcpy( topolc[posinnew][0], topol[i][0] );
12686 // intcat( topolc[posinnew][0], topol[i][1] );
12687 topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topolo0 ) + countmem( topolo1 ) + 1 ) * sizeof( int ) );
12688 intcpy( topolc[posinnew][0], topolo0 );
12689 intcat( topolc[posinnew][0], topolo1 );
12690 // lenc[posinnew][0] = len[i][0] + len[i][1] - minscore / 2; // BUG!! 2014/06/07 ni hakken
12692 lenc[posinnew][0] = minscore / 2 - dep[nstep-1].distfromtip; // only when nstep>0, 2014/11/21
12694 lenc[posinnew][0] = minscore / 2;
12696 // reporterr( "\ndep[nstep-1].distfromtip = %f\n", dep[nstep-1].distfromtip );
12697 // reporterr( "lenc[][0] = %f\n", lenc[posinnew][0] );
12699 topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], 2 * sizeof( int ) );
12700 intcpy( topolc[posinnew][1], additionaltopol );
12701 lenc[posinnew][1] = minscore / 2;
12703 // neighbor = lastmem( topolc[posinnew][0] );
12704 neighbor = norg-1; // hakkirishita neighbor ga inai baai saigo ni hyouji
12709 fp = fopen( "infile.tree", "a" ); // kyougou!!
12712 reporterr( "File error!\n" );
12715 fprintf( fp, "\n" );
12716 fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] );
12717 fprintf( fp, " nearest sequence: %d\n", nearest + 1 );
12718 fprintf( fp, " cousin: " );
12719 for( j=0; topolc[posinnew][0][j]!=-1; j++ )
12720 fprintf( fp, "%d ", topolc[posinnew][0][j]+1 );
12721 fprintf( fp, "\n" );
12724 addtree[iadd].nearest = nearesto;
12725 addtree[iadd].dist1 = minscoreo;
12726 addtree[iadd].dist2 = minscore;
12727 neighborlist[0] = 0;
12728 npt = neighborlist;
12729 for( j=0; topolc[posinnew][0][j]!=-1; j++ )
12731 sprintf( npt, "%d ", topolc[posinnew][0][j]+1 );
12732 npt += strlen( npt );
12734 addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) );
12735 strcpy( addtree[iadd].neighbors, neighborlist );
12739 // reporterr( "STEP %d\n", posinnew );
12740 // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j] );
12741 // reporterr( "\n len=%f", lenc[posinnew][0] );
12742 // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j] );
12743 // reporterr( "\n len=%f\n", lenc[posinnew][1] );
12746 if( topoldum0 ) free( topoldum0 );
12747 if( topoldum1 ) free( topoldum1 );
12749 free( additionaltopol );
12752 if( treeout ) free( neighborlist );
12754 #if 0 // create a newick tree for CHECK
12759 treetmp = AllocateCharVec( njob*150 );
12760 tree = AllocateCharMtx( njob, njob*150 );
12761 for( i=0; i<njobc; i++ ) sprintf( tree[i], "%d", i+1 );
12763 for( i=0; i<njobc-1; i++ )
12765 reporterr( "\nSTEP %d\n", i );
12766 for( j=0; topolc[i][0][j]!=-1; j++ ) reporterr( " %d", topolc[i][0][j] );
12767 reporterr( "\n len=%f\n", lenc[i][0] );
12768 for( j=0; topolc[i][1][j]!=-1; j++ ) reporterr( " %d", topolc[i][1][j] );
12769 reporterr( "\n len=%f\n", lenc[i][1] );
12771 im = topolc[i][0][0];
12772 jm = topolc[i][1][0];
12773 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[i][0], tree[jm], lenc[i][1] );
12774 strcpy( tree[im], treetmp );
12778 reporterr( "%s\n", treetmp );
12779 FreeCharMtx( tree );
12783 return( neighbor );
12787 int samemember( int *mem, int *cand )
12792 reporterr( "mem = " );
12793 for( i=0; mem[i]>-1; i++ ) reporterr( "%d ", mem[i] );
12796 reporterr( "cand = " );
12797 for( i=0; cand[i]>-1; i++ ) reporterr( "%d ", cand[i] );
12801 for( i=0, j=0; mem[i]>-1; )
12803 if( mem[i++] != cand[j++] ) return( 0 );
12806 if( cand[j] == -1 )
12816 int samemember( int *mem, int *cand )
12821 nm = 0; for( i=0; mem[i]>-1; i++ ) nm++;
12822 nc = 0; for( i=0; cand[i]>-1; i++ ) nc++;
12824 if( nm != nc ) return( 0 );
12826 for( i=0; mem[i]>-1; i++ )
12828 for( j=0; cand[j]>-1; j++ )
12829 if( mem[i] == cand[j] ) break;
12830 if( cand[j] == -1 ) return( 0 );
12836 reporterr( "mem = " );
12837 for( i=0; mem[i]>-1; i++ ) reporterr( "%d ", mem[i] );
12840 reporterr( "cand = " );
12841 for( i=0; cand[i]>-1; i++ ) reporterr( "%d ", cand[i] );
12853 int samemembern( int *mem, int *cand, int nc )
12859 for( i=0; mem[i]>-1; i++ )
12862 if( nm > nc ) return( 0 );
12865 if( nm != nc ) return( 0 );
12867 for( i=0; mem[i]>-1; i++ )
12869 for( j=0; j<nc; j++ )
12870 if( mem[i] == cand[j] ) break;
12871 if( j == nc ) return( 0 );
12877 reporterr( "mem = " );
12878 for( i=0; mem[i]>-1; i++ ) reporterr( "%d ", mem[i] );
12881 reporterr( "cand = " );
12882 for( i=0; cand[i]>-1; i++ ) reporterr( "%d ", cand[i] );
12894 int includemember( int *mem, int *cand ) // mem in cand
12899 reporterr( "mem = " );
12900 for( i=0; mem[i]>-1; i++ ) reporterr( "%d ", mem[i] );
12903 reporterr( "cand = " );
12904 for( i=0; cand[i]>-1; i++ ) reporterr( "%d ", cand[i] );
12908 for( i=0; mem[i]>-1; i++ )
12910 for( j=0; cand[j]>-1; j++ )
12911 if( mem[i] == cand[j] ) break;
12912 if( cand[j] == -1 ) return( 0 );
12914 // reporterr( "INCLUDED! mem[0]=%d\n", mem[0] );
12918 int overlapmember( int *mem1, int *mem2 )
12922 for( i=0; mem1[i]>-1; i++ )
12923 for( j=0; mem2[j]>-1; j++ )
12924 if( mem1[i] == mem2[j] ) return( 1 );
12927 void gapcount( double *freq, char **seq, int nseq, double *eff, int lgth )
12932 // for( i=0; i<lgth; i++ ) freq[i] = 0.0;
12935 for( i=0; i<lgth; i++ )
12938 for( j=0; j<nseq; j++ )
12940 if( seq[j][i] == '-' ) fr += eff[j];
12943 // reporterr( "freq[%d] = %f\n", i, freq[i] );
12945 // reporterr( "\n" );
12949 void gapcountadd( double *freq, char **seq, int nseq, double *eff, int lgth )
12953 double newfr = eff[j];
12954 double orifr = 1.0 - newfr;
12956 // for( i=0; i<lgth; i++ ) freq[i] = 0.0;
12958 // for( i=0; i<nseq; i++ )
12959 // reporterr( "%s\n", seq[i] );
12961 for( i=0; i<lgth; i++ )
12963 // reporterr( "freq[%d] = %f", i, freq[i] );
12964 freq[i] = 1.0 - freq[i]; // modosu
12967 if( seq[j][i] == '-' ) freq[i] += newfr;
12968 // reporterr( "-> %f\n", i, freq[i] );
12970 // reporterr( "\n" );
12973 void gapcountf( double *freq, char **seq, int nseq, double *eff, int lgth )
12978 // for( i=0; i<lgth; i++ ) freq[i] = 0.0;
12981 for( i=0; i<lgth; i++ )
12984 for( j=0; j<nseq; j++ )
12986 if( seq[j][i] == '-' ) fr += eff[j];
12989 // reporterr( "in gapcountf, freq[%d] = %f\n", i, freq[i] );
12991 // reporterr( "\n" );
12995 void outgapcount( double *freq, int nseq, char *gappat, double *eff )
13001 for( j=0; j<nseq; j++ )
13003 if( gappat[j] == '-' ) fr += eff[j];
13009 double dist2offset( double dist )
13011 double val = dist * 0.5 - specificityconsideration; // dist ha 0..2 dakara
13012 // double val = dist * 1.0 - specificityconsideration; // dist ha 0..2 dakara
13013 if( val > 0.0 ) val = 0.0;
13017 void makedynamicmtx( double **out, double **in, double offset )
13022 offset = dist2offset( offset * 2.0 ); // offset 0..1 -> 0..2
13024 // if( offset > 0.0 ) offset = 0.0;
13025 // reporterr( "dynamic offset = %f\n", offset );
13027 for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ )
13029 out[i][j] = in[i][j];
13031 if( offset == 0.0 ) return;
13033 for( i=0; i<nalphabets; i++ )
13035 ii = (int)amino[i];
13036 if( ii == '-' ) continue; // text no toki arieru
13037 for( j=0; j<nalphabets; j++ )
13039 jj = (int)amino[j];
13040 if( jj == '-' ) continue; // text no toki arieru
13041 out[i][j] = in[i][j] + offset * 600;
13042 // reporterr( "%c-%c: %f\n", ii, jj, out[i][j] );
13046 // reporterr( "offset = %f\n", offset );
13047 // reporterr( "out[W][W] = %f\n", out[amino_n['W']][amino_n['W']] );
13048 // reporterr( "out[A][A] = %f\n", out[amino_n['A']][amino_n['A']] );
13053 // Taikaku youso no heikin ga 600 ni naruyouni re-scale.
13054 // Hitaikaku youso ga ookiku narisugi.
13057 for( i=0; i<nalphabets; i++ )
13059 if( ii == '-' ) continue; // text no toki arieru
13062 av /= (double)nalphabets;
13064 for( i=0; i<nalphabets; i++ )
13066 if( amino[i] == '-' ) continue; // text no toki arieru
13067 for( j=0; j<nalphabets; j++ )
13069 if( amino[j] == '-' ) continue; // text no toki arieru
13070 out[i][j] = out[i][j] * 600 / av;
13071 reporterr( "%c-%c: %f\n", amino[i], amino[j], out[i][j] );
13075 void FreeCommonIP()
13077 if( commonIP ) FreeIntMtx( commonIP );
13083 void makeskiptable( int n, int **skip, char **seq )
13086 int nogaplen, alnlen;
13087 int i, j, posinseq, gaplen;
13089 nogapseq = calloc( strlen( seq[0] )+1, sizeof( char ) );
13090 for( i=0; i<n; i++ )
13092 gappick0( nogapseq, seq[i] );
13093 nogaplen = strlen( nogapseq );
13094 alnlen = strlen( seq[i] );
13095 skip[i] = calloc( nogaplen+1, sizeof( int ) );
13097 // reporterr( "%s\n", nogapseq );
13101 for( j=0; j<alnlen; j++ )
13103 if( seq[i][j] == '-' )
13105 skip[i][posinseq]++;
13112 // for( j=0; j<nogaplen+1; j++ )
13113 // reporterr( "%d ", skip[i][j] );
13114 // reporterr( "\n" );
13120 int generatesubalignmentstable( int nseq, int ***tablept, int *nsubpt, int *maxmempt, int ***topol, double **len, double threshold )
13122 int i, j, rep0, rep1, nmem, mem;
13123 double distfromtip0, distfromtip1;
13124 double *distfromtip;
13125 reporterr( "\n\n\n" );
13130 distfromtip = calloc( nseq, sizeof( double ) );
13131 for( i=0; i<nseq-1; i++ )
13134 reporterr( "STEP %d\n", i );
13135 for( j=0; topol[i][0][j]!=-1; j++ )
13136 reporterr( "%3d ", topol[i][0][j] );
13138 reporterr( "len=%f\n", len[i][0] );
13141 rep0 = topol[i][0][0];
13142 distfromtip0 = distfromtip[rep0];
13143 distfromtip[rep0] += len[i][0];
13144 // reporterr( "distfromtip[%d] = %f->%f\n", rep0, distfromtip0, distfromtip[rep0] );
13148 for( j=0; topol[i][1][j]!=-1; j++ )
13149 reporterr( "%3d ", topol[i][1][j] );
13151 reporterr( "len=%f\n", len[i][1] );
13154 rep1 = topol[i][1][0];
13155 distfromtip1 = distfromtip[rep1];
13156 distfromtip[rep1] += len[i][1];
13157 // reporterr( "distfromtip[%d] = %f->%f\n", rep1, distfromtip1, distfromtip[rep1] );
13159 if( topol[i][0][1] != -1 && distfromtip0 <= threshold && threshold < distfromtip[rep0] )
13161 // reporterr( "HIT 0!\n" );
13162 *tablept = realloc( *tablept, sizeof( char * ) * (*nsubpt+2) );
13163 for( j=0, nmem=0; (mem=topol[i][0][j])!=-1; j++ )
13165 // reporterr( "allocating %d\n", nmem+1 );
13166 (*tablept)[*nsubpt] = calloc( nmem+1, sizeof( int ) );
13167 (*tablept)[*nsubpt+1] = NULL;
13168 intcpy( (*tablept)[*nsubpt], topol[i][0] );
13169 if( *maxmempt < nmem ) *maxmempt = nmem;
13173 if( topol[i][1][1] != -1 && distfromtip1 <= threshold && threshold < distfromtip[rep1] )
13175 // reporterr( "HIT 1!\n" );
13176 *tablept = realloc( *tablept, sizeof( char * ) * (*nsubpt+2) );
13177 for( j=0, nmem=0; (mem=topol[i][1][j])!=-1; j++ )
13179 // reporterr( "allocating %d\n", nmem+1 );
13180 (*tablept)[*nsubpt] = calloc( nmem+1, sizeof( int ) );
13181 (*tablept)[*nsubpt+1] = NULL;
13182 intcpy( (*tablept)[*nsubpt], topol[i][1] );
13183 if( *maxmempt < nmem ) *maxmempt = nmem;
13189 if( distfromtip[0] <= threshold )
13191 free( distfromtip );
13195 free( distfromtip );
13201 double sumofpairsscore( int nseq, char **seq )
13205 for( i=1; i<nseq; i++ )
13207 for( j=0; j<i; j++ )
13209 v += naivepairscore11( seq[i], seq[j], penalty ) / 600;
13212 // v /= ( (nseq-1) * nseq ) / 2;
13216 int commonsextet_p( int *table, int *pointt )
13221 static TLS int *memo = NULL;
13222 static TLS int *ct = NULL;
13223 static TLS int *cp;
13225 if( table == NULL )
13227 if( memo ) free( memo );
13228 if( ct ) free( ct );
13234 if( *pointt == -1 )
13239 memo = (int *)calloc( tsize, sizeof( int ) );
13240 if( !memo ) ErrorExit( "Cannot allocate memo\n" );
13241 ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) ); // chuui!!
13242 if( !ct ) ErrorExit( "Cannot allocate ct\n" );
13246 while( ( point = *pointt++ ) != END_OF_VEC )
13248 tmp = memo[point]++;
13249 if( tmp < table[point] )
13251 if( tmp == 0 ) *cp++ = point;
13256 while( *cp != END_OF_VEC )
13262 double distcompact_msa( char *seq1, char *seq2, int *skiptable1, int *skiptable2, int ss1, int ss2 ) // osoi!
13264 int bunbo = MIN( ss1, ss2 );
13267 // reporterr( "msa-based dist\n" );
13272 value = ( 1.0 - (double)naivepairscorefast( seq1, seq2, skiptable1, skiptable2, penalty_dist ) / bunbo ) * 2.0; // 2014/Aug/15 fast
13273 if( value > 10 ) value = 10.0; // 2015/Mar/17
13278 double distcompact( int len1, int len2, int *table1, int *point2, int ss1, int ss2 )
13280 double longer, shorter, lenfac, value;
13284 longer=(double)len1;
13285 shorter=(double)len2;
13289 longer=(double)len2;
13290 shorter=(double)len1;
13292 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
13293 // reporterr( "lenfac=%f\n", lenfac );
13294 // reporterr( "commonsextet_p()=%d\n", commonsextet_p( table1, point2 ) );
13295 // reporterr( "ss1=%d, ss2=%d\n", ss1, ss2 );
13296 // reporterr( "val=%f\n", (1.0-(double)commonsextet_p( table1, point2 )/ss1) );
13298 if( ss1 == 0 || ss2 == 0 )
13301 value = ( 1.0 - (double)commonsextet_p( table1, point2 ) / MIN(ss1,ss2) ) * lenfac * 2.0;
13303 return( value ); // 2013/Oct/17 -> 2bai
13306 static void movereg( char *seq1, char *seq2, LocalHom *tmpptr, int *start1pt, int *start2pt, int *end1pt, int *end2pt )
13315 if( *pt++ != '-' ) tmpint++;
13316 if( tmpint == tmpptr->start1 ) break;
13318 *start1pt = (int)( pt - seq1 ) - 1;
13320 if( tmpptr->start1 == tmpptr->end1 ) *end1pt = *start1pt;
13325 // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] );
13326 if( *pt++ != '-' ) tmpint++;
13327 if( tmpint == tmpptr->end1 ) break;
13329 *end1pt = (int)( pt - seq1 ) - 1;
13336 if( *pt++ != '-' ) tmpint++;
13337 if( tmpint == tmpptr->start2 ) break;
13339 *start2pt = (int)( pt - seq2 ) - 1;
13340 if( tmpptr->start2 == tmpptr->end2 ) *end2pt = *start2pt;
13345 if( *pt++ != '-' ) tmpint++;
13346 if( tmpint == tmpptr->end2 ) break;
13348 *end2pt = (int)( pt - seq2 ) - 1;
13352 static void movereg_swap( char *seq1, char *seq2, LocalHom *tmpptr, int *start1pt, int *start2pt, int *end1pt, int *end2pt )
13362 if( *pt++ != '-' ) tmpint++;
13363 if( tmpint == tmpptr->start2 ) break;
13365 *start1pt = (int)( pt - seq1 ) - 1;
13367 if( tmpptr->start2 == tmpptr->end2 ) *end1pt = *start1pt;
13372 // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] );
13373 if( *pt++ != '-' ) tmpint++;
13374 if( tmpint == tmpptr->end2 ) break;
13376 *end1pt = (int)( pt - seq1 ) - 1;
13383 if( *pt++ != '-' ) tmpint++;
13384 if( tmpint == tmpptr->start1 ) break;
13386 *start2pt = (int)( pt - seq2 ) - 1;
13387 if( tmpptr->start1 == tmpptr->end1 ) *end2pt = *start2pt;
13392 if( *pt++ != '-' ) tmpint++;
13393 if( tmpint == tmpptr->end1 ) break;
13395 *end2pt = (int)( pt - seq2 ) - 1;
13399 void fillimp( double **impmtx, double *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1_kozo, double *eff2_kozo, LocalHom ***localhom, char *swaplist, int forscore, int *orinum1, int *orinum2 )
13401 int i, j, k1, k2, start1, start2, end1, end2;
13402 double effij, effijx, effij_kozo;
13405 void (*movefunc)(char *, char *, LocalHom *, int *, int *, int *, int * );
13408 fprintf( stderr, "eff1 in _init_strict = \n" );
13409 for( i=0; i<clus1; i++ )
13410 fprintf( stderr, "eff1[] = %f\n", eff1[i] );
13411 for( i=0; i<clus2; i++ )
13412 fprintf( stderr, "eff2[] = %f\n", eff2[i] );
13415 for( i=0; i<lgth1; i++ ) for( j=0; j<lgth2; j++ )
13416 impmtx[i][j] = 0.0;
13417 effijx = 1.0 * fastathreshold;
13418 for( i=0; i<clus1; i++ )
13420 if( swaplist && swaplist[i] ) movefunc = movereg_swap;
13421 else movefunc = movereg;
13422 for( j=0; j<clus2; j++ )
13425 if( swaplist == NULL && orinum1 && orinum2 ) // muda.
13427 if( orinum1[i]>orinum2[j] )
13428 movefunc = movereg_swap;
13430 movefunc = movereg;
13433 // effij = eff1[i] * eff2[j] * effijx;
13434 effij = eff1[i] * eff2[j] * effijx;
13435 effij_kozo = eff1_kozo[i] * eff2_kozo[j] * effijx;
13436 tmpptr = localhom[i][j];
13439 // fprintf( stderr, "start1 = %d\n", tmpptr->start1 );
13440 // fprintf( stderr, "end1 = %d\n", tmpptr->end1 );
13441 // fprintf( stderr, "i = %d, seq1 = \n%s\n", i, seq1[i] );
13442 // fprintf( stderr, "j = %d, seq2 = \n%s\n", j, seq2[j] );
13444 movefunc( seq1[i], seq2[j], tmpptr, &start1, &start2, &end1, &end2 );
13447 // fprintf( stderr, "start1 = %d (%c), end1 = %d (%c), start2 = %d (%c), end2 = %d (%c)\n", start1, seq1[i][start1], end1, seq1[i][end1], start2, seq2[j][start2], end2, seq2[j][end2] );
13448 // fprintf( stderr, "step 0\n" );
13449 if( end1 - start1 != end2 - start2 )
13451 // fprintf( stderr, "CHUUI!!, start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 );
13454 k1 = start1; k2 = start2;
13455 pt1 = seq1[i] + k1;
13456 pt2 = seq2[j] + k2;
13457 while( *pt1 && *pt2 )
13459 if( *pt1 != '-' && *pt2 != '-' )
13461 // ½Å¤ß¤òÆó½Å¤Ë¤«¤±¤Ê¤¤¤è¤¦¤ËÃí°Õ¤·¤Æ²¼¤µ¤¤¡£
13462 // impmtx[k1][k2] += tmpptr->wimportance * fastathreshold;
13463 // impmtx[k1][k2] += tmpptr->importance * effij;
13464 // impmtx[k1][k2] += tmpptr->fimportance * effij;
13465 if( tmpptr->korh == 'k' )
13466 impmtx[k1][k2] += tmpptr->importance * effij_kozo;
13468 impmtx[k1][k2] += tmpptr->importance * effij;
13469 // fprintf( stderr, "k1=%d, k2=%d, impalloclen=%d\n", k1, k2, impalloclen );
13470 // fprintf( stderr, "mark, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 );
13474 else if( *pt1 != '-' && *pt2 == '-' )
13476 // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 );
13479 else if( *pt1 == '-' && *pt2 != '-' )
13481 // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 );
13484 else if( *pt1 == '-' && *pt2 == '-' )
13486 // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 );
13490 if( k1 > end1 || k2 > end2 ) break;
13492 tmpptr = tmpptr->next;
13497 printf( "orinum1=%d, orinum2=%d\n", *orinum1, *orinum2 );
13498 if( *orinum1 == 0 )
13500 fprintf( stdout, "impmtx = \n" );
13501 for( k2=0; k2<lgth2; k2++ )
13502 fprintf( stdout, "%6.3f ", (double)k2 );
13503 fprintf( stdout, "\n" );
13504 for( k1=0; k1<lgth1; k1++ )
13506 fprintf( stdout, "%d", k1 );
13507 for( k2=0; k2<lgth2; k2++ )
13508 fprintf( stdout, "%2.1f ", impmtx[k1][k2] );
13509 fprintf( stdout, "\n" );