7 #include "io_lib_header.h"
8 #include "util_lib_header.h"
9 #include "dp_lib_header.h"
10 #include "define_header.h"
20 static NT_node compute_fj_tree (NT_node T, Alignment *A, int limit, char *mode);
21 static NT_node compute_cw_tree (Alignment *A);
22 static NT_node compute_std_tree (Alignment *A, int n, char **arg);
23 static NT_node tree2fj_tree (NT_node T);
24 int tree_contains_duplicates (NT_node T);
25 int display_tree_duplicates (NT_node T);
27 static int compare_node1 ( int *b1, int *b2, int n);
28 static int compare_node2 ( int *b1, int *b2, int n);
29 static int find_seq_chain (Alignment *A, int **sim,int *used,int seq0,int seq1, int seq2,int chain_length, int limit, int max_chain, int *nseq);
30 int new_display_tree (NT_node T, int n);
31 NT_node display_code (NT_node T, int nseq, FILE *fp);
32 NT_node display_dist (NT_node T, int n, FILE *fp);
33 /*********************************************************************/
35 /* dpa_tree_manipulation */
37 /*********************************************************************/
38 static NT_node code_dpa_tree ( NT_node T, int **D);
39 NT_node collapse_sub_tree ( NT_node T,int nseq, int *list, char *new_name);
40 NT_node seq2dpa_tree (Sequence *S, char *mode)
45 CL=declare_constraint_list_simple (S);
46 CL->local_stderr=NULL;
49 CL->DM=cl2distance_matrix (CL,NOALN,(mode==NULL)?"ktup":mode, NULL, 0);
51 T=int_dist2nj_tree ( (CL->DM)->similarity_matrix, S->name, S->nseq, vtmpnam (NULL));
54 Tree=recode_tree (Tree, S);
55 Tree=reset_dist_tree (Tree, -1);
57 Tree=code_dpa_tree (Tree, (CL->DM)->similarity_matrix);
58 free_distance_matrix (CL->DM);
62 NT_node tree2dpa_tree (NT_node T, Alignment *A, char *mode)
64 /*This Function sets the branches with Length values used by DP*/
65 /*The tree must be rooted*/
71 T=reset_dist_tree (T, -1);
72 D=get_sim_aln_array (A,mode);
74 T=code_dpa_tree (T, D);
78 NT_node code_dpa_tree ( NT_node T, int **D)
93 nl=(T->left)->nseq;ll=(T->left)->lseq;
94 nr=(T->right)->nseq;lr=(T->right)->lseq;
96 for (tot=0,n=0, a=0; a< nl; a++)
97 for ( b=0; b< nr; b++, n++)
100 min=MIN(min,(D[ll[a]][lr[b]]));
102 /* T->dist=(mode==AVERAGE)?(tot/n):min:;*/
103 T->dist=(n>0)?tot/n:0;
105 code_dpa_tree ( T->right, D);
106 code_dpa_tree ( T->left, D);
110 static int group_number;
111 char *tree2Ngroup (Alignment *A, NT_node T, int max_n, char *fname, char *mat)
113 double top, bot, mid, pmid;
123 list=declare_char ( 2, 100);
124 sprintf (list[0], "%s",mat);
126 fprintf ( stderr, "\nCompute Phylogenetic tree [Matrix=%s]", mat);
127 T=compute_std_tree(A,1, list);
128 fprintf ( stderr, "\nCompute dpa tree");
129 T=tree2dpa_tree (T,A, mat);
137 n=tree2group_file (T,S,0, max_n, fname);
138 fprintf ( stderr, "\n#TrimTC: Split in %d Groups at a minimum of %d%% ID\n",n, (int)max_n);
144 if ( max_n>S->nseq)max_n=S->nseq;
148 n=tree2group_file(T, S,0, (int)mid,fname);
149 mid=dichotomy((double)n, (double)max_n,(pmid=mid), &bot, &top);
150 while (n!=max_n && (int)pmid!=(int)mid)
152 n=tree2group_file(T, S,0, (int)mid, fname);
153 mid=dichotomy((double)n, (double)max_n,(pmid=mid), &bot, &top);
155 fprintf ( stderr, "\nDONE2");
156 fprintf ( stderr, "\n#TrimTC: Split in %d Groups at a minimum of %d%% ID\n",n, (int)mid);
161 static int group_number;
162 int tree2group_file ( NT_node T,Sequence *S, int maxnseq, int minsim, char *name)
167 fp=vfopen (name, "w");
168 vfclose (tree2group (T, S,maxnseq,minsim, "tree2ngroup",fp));
170 return count_n_line_in_file(name);
174 FILE * tree2group ( NT_node T,Sequence *S, int maxnseq, int minsim,char *name, FILE *fp)
181 m=(maxnseq==0)?S->nseq:maxnseq;
186 if ( T->nseq<=m && T->dist>=d)
189 fprintf ( fp, ">%s_%d ", (name)?name:"", ++group_number);
190 for ( a=0; a< T->nseq; a++)
191 fprintf ( fp, "%s ", S->name[T->lseq[a]]);
193 if (!T->parent)group_number=0;
198 fp=tree2group (T->right, S, maxnseq, minsim, name,fp);
199 fp=tree2group (T->left, S, maxnseq, minsim, name,fp);
200 if (!T->parent)group_number=0;
208 NT_node tree2collapsed_tree (NT_node T, int n, char **string)
217 list=vcalloc (A->nseq, sizeof (char***));
218 nlist=vcalloc (A->nseq, sizeof (int));
225 for (l=0,a=0; a< n; a++)l+=strlen (string[a]);
226 buf=vcalloc ( 2*n+l+1, sizeof (char));
227 for (a=0; a< n; a++){buf=strcat (buf,string[a]), buf=strcat ( buf, " ");}
228 list[0]=string2list (buf);
231 else if ( file_exists (NULL,string[0]))
233 list=read_group (string[0]);
237 fprintf (stderr, "\nERROR: file <%s> does not exist [FATAL:%s]\n",string[0], PROGRAM);
238 myexit (EXIT_FAILURE);
247 for (b=0; b<A->nseq; b++)nlist[b]=0;
250 i=name_is_in_list (list[a][b], A->name, A->nseq, MAXNAMES);
253 T=collapse_sub_tree ( T,A->nseq,nlist,list[a][1]);
254 free_char (list[a], -1);
261 NT_node collapse_sub_tree ( NT_node T,int nseq, int *list, char *new_name)
269 while (a<nseq && list[a]==T->lseq2[a]){a++;}
272 sprintf ( T->name, "%s", new_name);
274 T->left=T->right=NULL;
279 collapse_sub_tree (T->right, nseq, list, new_name);
280 collapse_sub_tree (T->left, nseq, list, new_name);
286 /*********************************************************************/
291 /*********************************************************************/
292 NT_node remove_leaf ( NT_node T);
293 NT_node prune_root (NT_node T);
294 NT_node main_prune_tree ( NT_node T, Sequence *S)
296 T=prune_tree ( T, S);
300 NT_node prune_tree ( NT_node T, Sequence *S)
305 if (T->leaf && T->isseq && name_is_in_list (T->name,S->name, S->nseq, 100)==-1)
313 for (a=0; a< S->nseq; a++)
315 HERE ("prune pb ---%s", S->name[a]);
317 myexit (EXIT_FAILURE);
319 C=(P->right==T)?P->left:P->right;
320 PP=C->parent=P->parent;
322 if (PP && PP->right==P)PP->right=C;
323 else if (PP)PP->left=C;
326 if (T==P->right)P->right=NULL;
334 prune_tree (T->left, S);
335 prune_tree (T->right, S);
337 return prune_root(T);
340 NT_node prune_root (NT_node T)
342 //This function prunes the root if needed (and frees it).
343 if (T->parent)return T;
345 if (!T->right && T->left)
347 return prune_root (T->left);
349 else if (T->right && !T->left)
352 return prune_root (T->right);
359 /*********************************************************************/
361 /* tree comparison */
364 /*********************************************************************/
365 int main_compare_cog_tree (NT_node T1, char *cogfile)
368 int a, nbac, n=0, p, c, b;
371 array=file2list(cogfile, ";\n");
372 nbac=atoi(array[0][0])-2;
374 A=declare_aln2 (nbac+1, 10);
375 for (a=0; a<nbac; a++)
377 sprintf ( A->name[a], "%s", array[0][a+2]);
379 A->seq_al[a][1]='\0';
382 sprintf ( A->name[nbac], "cons");
389 while (array[n]!=NULL)
391 for (b=0; b<nbac; b++)
393 p=atoi (array[1][b+2]);p=(p==0)?'O':'I';
394 c=atoi (array[n][b+2]);c=(c==0)?'O':'I';
397 A->seq_al[b][2]='\0';
400 sprintf (A->file[0], "%s", array[n][1]);
402 main_compare_aln_tree (T1, A, stdout);
409 int main_compare_aln_tree (NT_node T1, Alignment *A, FILE *fp)
413 fprintf ( fp, "\nTOT_CLASH COG %s N %d", A->file[0], compare_aln_tree (T1, A, &n, fp));
418 int compare_aln_tree (NT_node T, Alignment *A, int *n, FILE *fp)
423 i=name_is_in_list (T->name, A->name, A->nseq, 100);
424 T->seqal=A->seq_al[i];
430 if (!(T->left )->seqal)compare_aln_tree (T->left, A,n, fp);
431 if (!(T->right)->seqal)compare_aln_tree (T->right, A,n, fp);
433 seq1=(T->left)->seqal;
434 seq2=(T->right)->seqal;
435 (T->left)->seqal=(T->right)->seqal=NULL;
438 if (strm (seq1, seq2))
446 if (seq1[0]!=seq2[0] && seq1[1]!=seq2[1])
448 fprintf ( fp, "\nNODE_CLASH: COG %s (%s,%s):(",A->file[0],seq1,seq2 );
449 display_leaf_below_node (T->left, fp);
450 fprintf ( fp, ");(");
451 display_leaf_below_node (T->right, fp);
460 //**********************************************************************
462 int compare_split (int *s1, int *s2, int l);
463 int get_split_size (int *s, int l);
465 int main_compare_splits ( NT_node T1, NT_node T2, char *mode,FILE *fp)
467 Sequence *S1, *S2, *S;
474 if ( tree_contains_duplicates (T1))
476 display_tree_duplicates (T1);
477 printf_exit (EXIT_FAILURE, stderr, "\nFirst Tree Contains Duplicated Sequences [main_compare_trees][FATAL:%s]", PROGRAM);
480 else if ( tree_contains_duplicates (T2))
482 display_tree_duplicates (T2);
483 printf_exit (EXIT_FAILURE, stderr, "\nSecond Tree Contains Duplicated Sequences [main_compare_trees]");
487 //Identify the commom Sequence Set
488 S1=tree2seq(T1, NULL);
491 S2=tree2seq(T2, NULL);
494 S=trim_seq ( S1, S2);
496 //Prune the trees and recode the subtree list
497 T1=prune_tree (T1, S);
498 T1=recode_tree(T1, S);
500 T2=prune_tree (T2, S);
501 T2=recode_tree(T2, S);
503 sl1=declare_int (10*S->nseq, S->nseq);
504 sl2=declare_int (10*S->nseq, S->nseq);
508 tree2split_list (T1, S->nseq, sl1, &n1);
509 tree2split_list (T2, S->nseq, sl2, &n2);
515 n=get_split_size (sl1[a], S->nseq);
516 for (best=0,b=0; b<n2; b++)
518 s=compare_split (sl1[a], sl2[b], S->nseq);
521 fprintf ( fp, "\n%4d %4d ", MIN(n,(S->nseq)), best);
522 for (b=0; b<S->nseq; b++)fprintf ( fp, "%d", sl1[a][b]);
525 free_sequence (S, -1);
526 free_sequence (S1, -1);
527 free_sequence (S2, -1);
528 myexit (EXIT_SUCCESS);
531 int compare_split (int *s1, int *s2, int l)
533 int n1, n2, score1, score2, a;
534 n1=get_split_size (s1, l);
535 n2=get_split_size (s2, l);
537 for (score1=0,a=0; a< l; a++)
539 score1+=(s1[a]==1 && s2[a]==1)?1:0;
541 score1=(score1*200)/(n1+n2);
543 for ( score2=0, a=0; a<l; a++)
545 score2+=(s1[a]==0 && s2[a]==1)?1:0;
547 score2=(score2*200)/((l-n1)+n2);
548 return MAX(score1, score2);
550 int get_split_size (int *s, int l)
553 for (a=b=0; a<l; a++)b+=s[a];
556 //**********************************************************************
559 // TREEE COMPARISON FUNCTIONS
563 //////////////////////////////////////////////////////////////////////
566 void normalizeScore(float *score, int len)
569 float SCORE_MIN = FLT_MAX;
570 float SCORE_MAX = FLT_MIN;
571 for(i = 0; i < len; i++)
573 if(score[i] < SCORE_MIN)
574 SCORE_MIN = score[i];
575 if(score[i] > SCORE_MAX)
576 SCORE_MAX = score[i];
578 for(i = 0; i < len; i++)
579 score[i] = (9*(score[i]-SCORE_MIN)/(SCORE_MAX-SCORE_MIN));
582 int new_compare_trees ( NT_node T1, NT_node T2, int nseq, Tree_sim *TS);
583 NT_node new_search_split (NT_node T, NT_node B, int nseq);
584 int new_compare_split ( int *b1, int *b2, int n);
585 Tree_sim* tree_scan_pos (Alignment *A, int start, int end, char *ptree, NT_node RT);
586 Tree_sim* tree_scan_pos_woble (Alignment *A, int center, int max, char *ptree, NT_node RT, int *br, int *bl );
587 Tree_sim* tree_scan_pair_pos (Alignment *A, int start, int end, int start2, int end2,char *ptree, NT_node RT);
588 Tree_sim* tree_scan_multiple_pos (int *poslist, int *wlist,int nl, Alignment *A, char *ptree, NT_node RT);
589 NT_node aln2std_tree(Alignment *A, int ipara1, int ipara2, char *mode);
591 NT_node tree_scan (Alignment *A,NT_node RT, char *pscan, char *ptree)
593 int l, a,ax, c, cx, b;
599 char *pcFileName = A->file[0];
600 char prefix[200] ={0};
601 int len = (strrchr(pcFileName,'.')?strrchr(pcFileName,'.')-pcFileName:strlen(pcFileName));
602 strncpy(prefix, pcFileName, len);
605 char out_format[100];
607 char *score_csv_file = vcalloc(200, sizeof (char));
608 char *score_html_file = vcalloc(200, sizeof (char));
609 char *hit_matrix_file = vcalloc(200, sizeof (char));
610 char *hit_html_file = vcalloc(200, sizeof (char));
611 char *tree_file = vcalloc(200, sizeof (char));
613 sprintf(score_csv_file, "%s%s", prefix, ".score_csv");
614 sprintf(score_html_file, "%s%s", prefix, ".ts_html");
615 sprintf(hit_matrix_file, "%s%s", prefix, ".hit_matrix");
616 sprintf(hit_html_file, "%s%s", prefix, ".hit_html");
617 sprintf(tree_file, "%s%s", prefix, ".trees_txt");
619 if ( pscan && strstr ( pscan, "help"))
621 fprintf ( stdout, "\n+tree_scan| _W_ : Window size for the tree computation|STD size in norscan mode");
622 fprintf ( stdout, "\n+tree_scan| _MODE_ : Mode for the number of windows (single, double, list, scan, pairscan, norscan, hit, norhit)");
623 fprintf ( stdout, "\n+tree_scan| _MINW_ : Minimum Window size when using the scan mode (4)");
624 fprintf ( stdout, "\n+tree_scan| _OUTTREE_ : specify the format of outputing tree in every position (default: not ouput)");
625 myexit (EXIT_SUCCESS);
628 strget_param (pscan, "_W_", "5", "%d",&w);
629 strget_param (pscan, "_MODE_", "single", "%s",mode);
630 strget_param (pscan, "_MINW_", "1", "%d",&start);
631 strget_param (pscan, "_POSFILE_", "NO", "%s", posfile);
632 strget_param (pscan, "_OUTTREE_", "", "%s", &out_format);
634 if(strlen(out_format) > 1)
637 l=intlen (A->len_aln);
639 poslist=vcalloc ( A->len_aln, sizeof (int));
641 fascore = vcalloc(A->len_aln, sizeof (float));
643 if ( strm (posfile, "NO"))
646 for ( a=0; a< A->len_aln; a++)poslist[nl++]=a+1;
651 p=file2pos_list (A,posfile);
652 poslist=pos2list (p, A->len_aln, &nl);
653 for (a=0; a<nl; a++)poslist[a]++;
658 NT_node *TreeArray = vcalloc(nl, sizeof (NT_node));
660 if ( strm (mode, "woble"))
662 for (ax=0; ax<nl; ax++)
668 TS=tree_scan_pos_woble (A,a,w,ptree, RT, &left, &right);
669 fprintf ( stdout, "P: %*d I: %*d %*d SIM: %6.2f\n", l,a,l,left,l,right,TS->uw);
673 else if ( strm (mode, "single"))
675 for (b=0,ax=0; ax<nl; ax++)
686 if (pstart<1 || pstart>A->len_aln)continue;
687 if (pend<1 || pend>A->len_aln)continue;
688 TS=tree_scan_pos (A, pstart,pend, ptree, RT);
689 fprintf ( stdout, "P: %*d I: %*d %*d SIM: %6.2f L: %2d\n", l,a,l,pstart,l,pend,TS->uw, (w*2)+1);
693 else if (strm (mode, "scan")||strm (mode, "hit"))
696 fp_ts=vfopen (score_csv_file, "w");
697 fprintf ( fp_ts, "Position,Win_Beg,Win_End,Similarity,Win_Len\n");
698 for ( ax=0; ax<nl; ax++)
701 int best_pos=0, best_w=0, best_start, best_end;
704 best_pos = best_start = best_end = a;
705 for (b=start; b<=w; b++)
712 if (pstart<1 || pstart>A->len_aln)continue;
713 if (pend<1 || pend>A->len_aln)continue;
714 TS=tree_scan_pos (A, pstart,pend, ptree, RT);
715 if (TS->uw>=best_score)
716 {best_score=TS->uw;best_w=b;best_start=pstart; best_end=pend;}
719 fprintf (fp_ts, "%*d,%*d,%*d,%6.2f,%2d\n", l,best_pos, l,best_start, l,best_end, best_score,(best_w*2)+1);
720 fascore[ax]=(float)best_score;
721 if(strlen(out_format) > 1)
722 vfclose (print_tree (aln2std_tree(A, best_start, best_end, mode), out_format, vfopen (tree_file, "a+")));
723 if(strm (mode, "hit"))
724 TreeArray[ax] = aln2std_tree(A, best_start, best_end, mode);
728 //tree scan by using normal distribution window
730 //generate hit matrix
731 else if ( strm (mode, "norscan")||strm (mode, "norhit"))
734 ptree=vcalloc(100, sizeof (char));
735 fp_ts=vfopen (score_csv_file, "w");
736 fprintf ( fp_ts, "Position,Similarity,STD_Len\n");
737 for ( ax=0; ax<nl; ax++)
739 float best_score=DBL_MIN;
740 int best_STD = start;
742 for (b=start; b<=w; b++)
745 sprintf ( ptree, "+aln2tree _COMPARE_nordisaln__STD_%d__CENTER_%d_", b, a); //should be used a or ax
746 TS=tree_scan_pos (A, 1, nl, ptree, RT);
747 if (TS->uw>=best_score)
748 {best_score=TS->uw;best_STD=b;}
751 fascore[ax]=best_score;
752 fprintf ( fp_ts, "%*d,%6.2f,%d\n", l,a, fascore[ax], best_STD);
753 if(strlen(out_format) > 1)
754 vfclose (print_tree (aln2std_tree(A, best_STD, a, mode), out_format, vfopen (tree_file, "a+")));
755 if(strm (mode, "norhit"))
756 TreeArray[ax] = aln2std_tree(A, best_STD, a, mode);
760 //generate hit matrix
761 if (strm (mode, "hit")||strm (mode, "norhit"))
763 //Compute the pair score of tree scan segqtion
764 fprintf (stdout, "[STRAT] Calculate the hit matrix of the tree scan\n");
765 float **ffpHitScoreMatrix;
767 ffpHitScoreMatrix=vcalloc (nl, sizeof (float*));
769 for(i = 0; i < nl; i++)
770 ffpHitScoreMatrix[i]=vcalloc (nl-i, sizeof (float));
772 fprintf (stdout, "Process positions\n", i);
773 for(i = 0; i < nl; i++)
775 fprintf (stdout, "%d, ", i);
776 for(j = i; j < nl; j++)
779 TS=tree_cmp (TreeArray[i], TreeArray[j]);
780 ffpHitScoreMatrix[i][j-i] = TS->uw;
785 fprintf (stdout, "\n");
786 output_hit_matrix(hit_matrix_file, ffpHitScoreMatrix, nl);
787 fprintf (stdout, "[END]Calculate the hit matrix of the tree scan\n");
789 //Output Hit Score into color html
790 output_hit_color_html (A, ffpHitScoreMatrix, nl, hit_html_file);
791 vfree(ffpHitScoreMatrix);
793 else if ( strm (mode, "pairscan"))
797 for ( ax=0; ax<nl; ax++)
800 int best_pos=0, best_w=0, best_w2=0, best_start, best_end, best_pos2=0, best_start2, best_end2;
803 int pstart, pend, p2start, p2end;
805 for ( cx=0; cx<nl; cx++)
809 for (d=start; d<=w; d++)
811 for (b=start; b<=w; b++)
817 if (pstart<1 || pstart>A->len_aln)continue;
818 if (pend<1 || pend>A->len_aln)continue;
819 if (p2start<1 || p2start>A->len_aln)continue;
820 if (p2end<1 || p2end>A->len_aln)continue;
821 if (pstart<=p2start && pend>=p2start) continue;
822 if (pstart<=p2end && pend>=p2end) continue;
823 TS=tree_scan_pair_pos (A, pstart,pend,p2start, p2end, ptree, RT);
825 if (TS->uw>=best_score){best_score=TS->uw; best_pos=a;best_w=b;best_start=pstart; best_end=pend; best_pos2=c, best_w2=d, best_start2=p2start, best_end2=p2end;set=1;}
829 if (set)fprintf ( stdout, "P1: %*d I1: %*d %*d P2: %*d I2: %*d %*d SIM: %6.2f L: %2d\n", l,best_pos, l,best_start, l,best_end, l, best_pos2, l, best_start2, l, best_end2, best_score,(best_w*2)+1 );
835 else if ( strm (mode, "multiplescan"))
837 int n, **wlist, best_pos;
840 wlist=generate_array_int_list (nl*2,start, w,1, &n, NULL);
841 HERE ("Scan %d Possibilities", n);
843 for (best_score=best_pos=0,a=0; a<n; a++)
845 TS=tree_scan_multiple_pos (poslist,wlist[a],nl, A, ptree, RT);
846 if (TS && TS->uw>best_score)
849 fprintf ( stdout, "\n");
852 fprintf ( stdout, "[%3d %3d %3d]", poslist[b], wlist[a][b*2], wlist[a][b*2+1]);
854 fprintf ( stdout, " SCORE: %.2f", best_score);
860 //Output Tree Scan core into color html
861 normalizeScore(fascore, nl);
867 ST=copy_aln (A, NULL);
868 for (a=0; a<ST->nseq; a++)
870 i=name_is_in_list (ST->name[a],S->name, S->nseq, 100);
873 for (b=0; b<ST->len_aln; b++)
877 r1 = (int)fascore[b] + 48;
882 output_color_html ( A, ST, score_html_file);
887 vfree(score_csv_file);
888 vfree(score_html_file);
889 vfree(hit_matrix_file);
890 vfree(hit_html_file);
892 myexit(EXIT_SUCCESS);return NULL;
895 NT_node aln2std_tree(Alignment *A, int ipara1, int ipara2, char *mode)
899 char *cpSet = vcalloc(100, sizeof (char));
901 if(strm (mode, "norhit"))
903 B=extract_aln (A, 1, A->len_aln);
904 sprintf ( cpSet, "+aln2tree _COMPARE_nordisaln__STD_%d__CENTER_%d_", ipara1, ipara2);
907 B=extract_aln (A, ipara1, ipara2);
909 T=compute_std_tree (B, (cpSet)?1:0, (cpSet)?&cpSet:NULL);
914 Tree_sim* tree_scan_multiple_pos (int *poslist, int *wlist,int nl, Alignment *A, char *ptree, NT_node RT)
918 int a, b, n, s, p, left, right;
923 //poslist positions come [1..n]
927 pos=vcalloc ( A->len_aln+1, sizeof (int));
928 B=copy_aln (A, NULL);
935 for (b=p-left; b<=p+right; b++)
937 if (b<1 ||b>A->len_aln) return NULL;
940 if (pos[b]>1) return NULL;
944 for (s=0; s<A->nseq; s++)
946 for (n=0,a=1; a<=A->len_aln; a++)
948 if (pos[a])B->seq_al[s][n++]=A->seq_al[s][a-1];
953 for (s=0; s<A->nseq; s++)B->seq_al[s][B->len_aln]='\0';
955 T=compute_std_tree (B, (ptree)?1:0, (ptree)?&ptree:NULL);
963 Tree_sim* tree_scan_pair_pos (Alignment *A, int start, int end, int start2, int end2,char *ptree, NT_node RT)
966 Alignment *B,*B1, *B2;
971 B=copy_aln (A, NULL);
972 B1=extract_aln (A,start,end);
973 B2=extract_aln (A,start2, end2);
976 for ( a=0; a< B->nseq;a++)
977 sprintf (B->seq_al[a], "%s%s", B1->seq_al[a], B2->seq_al[a]);
978 B->len_aln=strlen (B->seq_al[0]);
980 T=compute_std_tree (B, (ptree)?1:0, (ptree)?&ptree:NULL);
984 free_aln (B);free_aln(B1); free_aln(B2);
989 Tree_sim* tree_scan_pos (Alignment *A, int start, int end, char *ptree, NT_node RT)
995 if ( start<1 || start>A->len_aln) return NULL;
996 if ( end<1 || end>A->len_aln) return NULL;
998 B=extract_aln (A,start,end);
999 T=compute_std_tree (B, (ptree)?1:0, (ptree)?&ptree:NULL);
1000 TS=tree_cmp (T, RT);
1001 free_tree(T);free_aln (B);
1004 Tree_sim* tree_scan_pos_woble (Alignment *A, int center, int max, char *ptree, NT_node RT, int *br, int *bl )
1014 BTS=vcalloc (1, sizeof (Tree_sim));
1016 for (left=0; left<max; left++)
1017 for (right=0; right<max; right++)
1021 TS=tree_scan_pos (A, start, end, ptree, RT);
1023 if (TS && TS->uw >best_score)
1027 br[0]=right; bl[0]=left;
1034 Tree_sim* tree_cmp( NT_node T1, NT_node T2)
1036 Sequence *S1, *S2, *S;
1040 Tree_sim *TS1, *TS2;
1042 if ( tree_contains_duplicates (T1))
1044 display_tree_duplicates (T1);
1045 printf_exit (EXIT_FAILURE, stderr, "\nFirst Tree Contains Duplicated Sequences [main_compare_trees][FATAL:%s]", PROGRAM);
1048 else if ( tree_contains_duplicates (T2))
1050 display_tree_duplicates (T2);
1051 printf_exit (EXIT_FAILURE, stderr, "\nSecond Tree Contains Duplicated Sequences [main_compare_trees]");
1055 //Identify the commom Sequence Set
1056 S1=tree2seq(T1, NULL);
1057 S2=tree2seq(T2, NULL);
1059 S=trim_seq ( S1, S2);
1063 fprintf ( stderr, "\nERROR: Your two do not have enough common leaf to be compared [FATAL:PROGRAM]");
1066 //Prune the trees and recode the subtree list
1067 T1=prune_tree (T1, S);
1068 T1=recode_tree(T1, S);
1070 T2=prune_tree (T2, S);
1071 T2=recode_tree(T2, S);
1073 TS1=vcalloc (1, sizeof (Tree_sim));
1074 TS2=vcalloc (1, sizeof (Tree_sim));
1076 new_compare_trees ( T1, T2, S->nseq, TS1);
1077 new_compare_trees ( T2, T1, S->nseq, TS2);
1080 TS1->n=tree2nnode (T1);
1083 TS2->n=tree2nnode (T2);
1084 /*if (TS1->n !=TS2->n)
1085 printf_exit (EXIT_FAILURE, stderr,"\nERROR: Different number of Nodes in the two provided trees after prunning [FATAL: %s]", PROGRAM);
1088 free_sequence (S, -1);
1089 free_sequence (S1, -1);
1090 free_sequence (S2, -1);
1092 TS1->uw=(TS1->uw+TS2->uw)*100/(TS1->max_uw+TS2->max_uw);
1093 TS1->w=(TS1->w+TS2->w)*100/(TS1->max_w+TS2->max_w);
1094 TS1->d=(TS1->d+TS2->d)*100/(TS1->max_d+TS2->max_d);
1095 TS1->rf=(TS1->rf+TS2->rf)/2;
1099 int print_node_list (NT_node T, Sequence *RS)
1105 S=tree2seq(T, NULL);
1106 L=tree2node_list (T, NULL);
1109 nlseq2=vcalloc ( RS->nseq, sizeof (int));
1113 d=MIN(((L[0])->nseq), (S->nseq-(L[0])->nseq));
1114 fprintf ( stdout, "Bootstrap: %5.2f Depth: %5d Splits: ", (L[0])->bootstrap, d);
1116 for (b=0; b<RS->nseq; b++)nlseq2[b]='-';
1117 for (b=0; b<S->nseq; b++)
1120 p=name_is_in_list (S->name[b], RS->name, RS->nseq, 100);
1121 if (p!=-1)nlseq2[p]=(L[0])->lseq2[b]+'0';
1123 for (b=0; b<RS->nseq; b++) fprintf ( stdout, "%c", nlseq2[b]);
1124 fprintf (stdout, "\n");
1129 NT_node main_compare_trees_list ( NT_node RT, Sequence *S, FILE *fp)
1136 T=vcalloc (1, sizeof (Tree_sim));
1137 RS=tree2seq(RT, NULL);
1139 TL=read_tree_list (S);
1141 reset_boot_tree (RT,0.0001);
1142 for (a=0; a<S->nseq; a++)
1144 TL[a]=prune_tree(TL[a],RS);
1145 TL[a]=recode_tree (TL[a],RS);
1146 new_compare_trees (RT, TL[a], RS->nseq,T);
1152 NT_node main_compare_trees ( NT_node T1, NT_node T2, FILE *fp)
1156 T=tree_cmp (T1, T2);
1157 fprintf ( fp, "\n#tree_cmp|T: %.f W: %.2f L: %.2f RF: %d N: %d S: %d", T->uw, T->w, T->d, T->rf, T->n, T->nseq);
1158 fprintf ( fp, "\n#tree_cmp_def|T: ratio of identical nodes");
1159 fprintf ( fp, "\n#tree_cmp_def|W: ratio of identical nodes weighted with the min Nseq below node");
1160 fprintf ( fp, "\n#tree_cmp_def|L: average branch length similarity");
1161 fprintf ( fp, "\n#tree_cmp_def|RF: Robinson and Foulds");
1162 fprintf ( fp, "\n#tree_cmp_def|N: number of Nodes in T1 [unrooted]");
1163 fprintf ( fp, "\n#tree_cmp_def|S: number of Sequences in T1\n");
1169 int new_compare_trees ( NT_node T1, NT_node T2, int nseq, Tree_sim *TS)
1175 if (!T1 || !T2) return 0;
1177 n+=new_compare_trees (T1->left, T2, nseq,TS);
1178 n+=new_compare_trees (T1->right, T2, nseq,TS);
1180 //Exclude arbitrary splits (dist==0)
1181 if ((T1->dist==0) && !(T1->parent))return n;
1183 N=new_search_split (T1, T2, nseq);
1185 t2=(N)?FABS(N->dist):0;
1186 TS->max_d+=MAX(t1, t2);
1192 w=MIN((nseq-T1->nseq),T1->nseq);
1202 //T1->dist=T1->nseq;
1206 //T1->dist=T1->nseq*-1;
1217 NT_node new_search_split (NT_node T, NT_node B, int nseq)
1220 if (!T || !B) return NULL;
1221 else if ( new_compare_split (T->lseq2, B->lseq2, nseq)==1)return B;
1222 else if ( (N=new_search_split (T, B->right, nseq)))return N;
1223 else return new_search_split (T, B->left, nseq);
1225 int new_compare_split ( int *b1, int *b2, int n)
1229 for (flag=1, a=0; a<n; a++)
1230 if (b1[a]!=b2[a])flag=0;
1231 if (flag) return flag;
1233 for (flag=1, a=0; a<n; a++)
1234 if (b1[a]==b2[a])flag=0;
1238 float compare_trees ( NT_node T1, NT_node T2, int nseq,int mode)
1240 /*search each branch of T1 in T2*/
1244 if ( !T1 || !T2)return 0;
1246 if (getenv4debug("DEBUG_TREE_COMPARE"))display_node (T1, "\nNODE ", nseq);
1248 if (T1->parent && T1->nseq>1)n+=search_node ( T1, T2, nseq, mode);
1250 n+=compare_trees ( T1->left, T2, nseq, mode);
1251 n+=compare_trees ( T1->right, T2, nseq, mode);
1256 float search_node ( NT_node B, NT_node T, int nseq, int mode)
1259 if ( !B || !T) return -1;
1260 if (getenv4debug("DEBUG_TREE_COMPARE"))display_node ( T, "\n\t", nseq);
1262 n=compare_node ( B->lseq2, T->lseq2, nseq );
1266 if (getenv4debug("DEBUG_TREE_COMPARE"))fprintf ( stderr, "[1][%d]", (int)evaluate_node_similarity ( B, T, nseq, mode));
1267 if (mode==RECODE)B->dist=B->leaf;
1268 return evaluate_node_similarity ( B, T, nseq, mode);
1272 if (getenv4debug("DEBUG_TREE_COMPARE"))fprintf ( stderr, "[-1]");
1273 if (mode==RECODE)B->dist=-B->leaf;
1278 if (getenv4debug("DEBUG_TREE_COMPARE"))fprintf ( stderr, "[0]");
1279 n=search_node ( B, T->left, nseq, mode);
1281 n=search_node ( B, T->right, nseq, mode);
1283 n=search_node ( B, T->bot, nseq, mode);
1289 float evaluate_node_similarity ( NT_node B, NT_node T, int nseq, int mode)
1293 if ( mode==TOPOLOGY || mode ==RECODE)
1295 for ( a=0; a< nseq; a++)
1296 if ( B->lseq2[a]!=T->lseq2[a]) return 0;
1299 else if ( mode == WEIGHTED)
1301 for (c=0, a=0; a< nseq; a++)
1303 if ( B->lseq2[a]!=T->lseq2[a]) return 0;
1304 else c+=B->lseq2[a];
1306 return (float)(MIN(c,nseq));
1308 else if ( mode == LENGTH )
1312 for (c=0, a=0; a< nseq; a++)
1314 if ( B->lseq2[a]!=T->lseq2[a]) return 0;
1316 d1=FABS((B->dist-T->dist));
1317 d2=MAX(B->dist, T->dist);
1318 return (d2>0)?(d1*100)/d2:0;
1325 int compare_node ( int *b1, int *b2, int nseq)
1329 n1=compare_node1 ( b1, b2, nseq);
1330 /*fprintf ( stderr, "[%d]", n1);*/
1331 if ( n1==1) return 1;
1333 n2=compare_node2 ( b1, b2, nseq);
1334 /* fprintf ( stderr, "[%d]", n2);*/
1335 if ( n2==1)return 1;
1336 else if ( n2==-1 && n1==-1) return -1;
1339 int compare_node1 ( int *b1, int *b2, int n)
1344 for ( a=0; a< n; a++)
1348 if ( l1==1 && l2==0) return -1;
1353 int compare_node2 ( int *b1, int *b2, int n)
1359 for ( a=0; a< n; a++)
1363 if ( l1==1 && l2==0) return -1;
1368 void display_node (NT_node N, char *string,int nseq)
1371 fprintf ( stderr, "%s", string);
1372 for (a=0; a< nseq; a++)fprintf ( stderr, "%d", N->lseq2[a]);
1376 /*********************************************************************/
1378 /* FJ_tree Computation */
1381 /*********************************************************************/
1384 NT_node tree_compute ( Alignment *A, int n, char ** arg_list)
1386 if (n==0 || strm (arg_list[0], "cw"))
1388 return compute_cw_tree (A);
1390 else if ( strm (arg_list[0], "fj"))
1392 return compute_fj_tree ( NULL, A, (n>=1)?atoi(arg_list[1]):8, (n>=2)?arg_list[2]:NULL);
1395 else if ( ( strm (arg_list[0], "nj")))
1397 return compute_std_tree (A, n, arg_list);
1400 return compute_std_tree (A, n, arg_list);
1403 NT_node compute_std_tree (Alignment *A, int n, char **arg_list)
1405 return compute_std_tree_2 (A, NULL, list2string (arg_list, n));
1407 NT_node compute_std_tree_2 (Alignment *A, int **s, char *cl)
1410 NT_node T, **BT=NULL;
1419 tree_name =vtmpnam (NULL);
1421 if (strstr (cl, "help"))
1423 fprintf ( stdout, "\n+aln2tree| _MATRIX_ : matrix used for the comparison (idmat, sarmat, pam250mt..)\n");
1424 fprintf ( stdout, "\n+aln2tree| _SCORE_ : score mode used for the distance (sim, raw)\n");
1425 fprintf ( stdout, "\n+aln2tree| _COMPARE_: comparison mode (aln, ktup, align, nordisaln)\n");
1426 fprintf ( stdout, "\n+aln2tree| _TMODE_ : tree mode (nj, upgma)\n");
1427 myexit (EXIT_SUCCESS);
1431 //matrix: idmat, ktup,sarmat, sarmat2
1432 strget_param (cl, "_MATRIX_", "idmat", "%s",matrix);
1435 strget_param (cl, "_SCORE_", "sim", "%s",score);
1437 //compare: aln, ktup, align
1438 strget_param (cl, "_COMPARE_", "aln", "%s",compare);
1440 //compare: aln, ktup, align
1441 strget_param (cl, "_TMODE_", "nj", "%s",tmode);
1444 if ( strm (compare, "nordisaln"))
1446 strget_param (cl, "_STD_", "1", "%d", &STD);
1447 strget_param (cl, "_CENTER_", "5", "%d", &CENTER);
1449 //Use external msa2tree methods
1450 if ( strm (tmode, "cw"))
1453 return compute_cw_tree (A);
1456 //compute distance matrix if needed
1460 if ( strm (compare, "ktup"))
1462 ungap_array (A->seq_al, A->nseq);
1463 s=get_sim_aln_array ( A,cl);
1465 else if ( strm ( compare, "aln"))
1467 if (strm (score, "sim"))
1468 s=get_sim_aln_array(A, matrix);
1469 else if ( strm (score, "raw"))
1471 s=get_raw_sim_aln_array (A,matrix);
1474 else if ( strm ( compare, "nordisaln"))
1476 s=get_sim_aln_array_normal_distribution(A, matrix, &STD, &CENTER);
1478 s=sim_array2dist_array(s, 100);
1482 if (strm (tmode, "nj"))
1485 BT=int_dist2nj_tree (s, A->name, A->nseq, tree_name);
1486 T=main_read_tree (tree_name);
1489 else if (strm (tmode, "upgma"))
1491 BT=int_dist2upgma_tree (s,A, A->nseq, tree_name);
1492 T=main_read_tree (tree_name);
1496 if ( strm ( cl, "dpa"))
1498 s=dist_array2sim_array(s, 100);
1499 T=code_dpa_tree (T,s);
1502 if (free_s)free_int (s, -1);
1506 NT_node similarities_file2tree (char *mat)
1515 tree_name =vtmpnam (NULL);
1517 s=input_similarities (mat,NULL, NULL);
1520 A=similarities_file2aln(mat);
1521 s=sim_array2dist_array(s, 100);
1524 int_dist2nj_tree (s, A->name, A->nseq, tree_name);
1525 T=main_read_tree(tree_name);
1530 NT_node compute_cw_tree (Alignment *A)
1532 char *tmp1, *tmp2, tmp3[1000];
1534 tmp1=vtmpnam (NULL);
1535 tmp2=vtmpnam (NULL);
1537 sprintf ( tmp3, "%s.ph", tmp1);
1538 output_clustal_aln (tmp1, A);
1539 printf_system ("clustalw -infile=%s -tree -newtree=%s %s ", tmp1,tmp3, TO_NULL_DEVICE);
1540 printf_system("mv %s %s", tmp3, tmp2);
1542 return main_read_tree(tmp2);
1545 NT_node compute_fj_tree (NT_node T, Alignment *A, int limit, char *mode)
1547 static int in_fj_tree;
1548 if (!in_fj_tree)fprintf ( stderr, "\nComputation of an NJ tree using conserved positions\n");
1551 if (T && T->leaf<=2);
1554 T=aln2fj_tree(T,A,limit, mode);
1555 T->right=compute_fj_tree ( T->right, A, limit, mode);
1556 T->left=compute_fj_tree ( T->left, A, limit, mode);
1563 NT_node aln2fj_tree(NT_node T, Alignment *A, int limit_in, char *mode)
1567 Alignment *subA=NULL;
1572 S=tree2seq (T,NULL);
1578 for ( fraction_gap=100; fraction_gap<=100 && l<1; fraction_gap+=10)
1579 for ( limit=limit_in; limit>0 && l<1; limit--)
1581 fprintf ( stderr, "\n%d %d", limit, fraction_gap);
1583 subA=extract_sub_aln2 (A,S->nseq,S->name);
1584 subA=filter_aln4tree (subA,limit,fraction_gap, mode);
1588 /* while ( subA->len_aln<1)
1590 subA=extract_sub_aln2 (A,S->nseq,S->name);
1591 subA=filter_aln4tree (subA,limit,fraction_gap,mode);
1593 subA=extract_sub_aln2 (A,S->nseq,S->name);
1594 subA=filter_aln4tree (subA,--limit,fraction_gap, mode);
1598 NT=tree2fj_tree (NT);
1600 NT=realloc_tree (NT,A->nseq);
1601 fprintf ( stderr, "Limit:%d Gap: %d Columns: %4d Left: %4d Right %4d BL:%4.2f\n",limit,fraction_gap, subA->len_aln, (NT->right)->leaf,(NT->left)->leaf, (NT->left)->dist+(NT->right)->dist);
1606 NT->parent=T->parent;
1610 free_sequence (S, -1);
1614 Alignment * filter_aln4tree (Alignment *A, int n,int fraction_gap,char *mode)
1617 char *ungaped_aln_file;
1618 char *scored_aln_file;
1619 char *filtered_aln_file;
1621 aln_file=vtmpnam(NULL);
1622 ungaped_aln_file=vtmpnam (NULL);
1623 scored_aln_file=vtmpnam (NULL);
1624 scored_aln_file=vtmpnam(NULL);
1625 filtered_aln_file=vtmpnam(NULL);
1629 output_clustal_aln (aln_file, A);
1630 /* 1: remove columns with too many gaps*/
1631 printf_system ("t_coffee -other_pg seq_reformat -in %s -action +rm_gap %d -output clustalw > %s", aln_file,fraction_gap, ungaped_aln_file);
1633 /* 2: evaluate the alignment*/
1635 printf_system ("t_coffee -other_pg seq_reformat -in %s -action +evaluate %s -output clustalw > %s", ungaped_aln_file,(mode)?mode:"categories", scored_aln_file);
1638 /*3 extract the high scoring columns*/
1639 printf_system("t_coffee -other_pg seq_reformat -in %s -struc_in %s -struc_in_f number_aln -action +use_cons +keep '[%d-8]' +rm_gap -output clustalw > %s", ungaped_aln_file, scored_aln_file,n, filtered_aln_file);
1644 A=main_read_aln ( filtered_aln_file, NULL);
1650 NT_node tree2fj_tree (NT_node T)
1656 L=find_longest_branch (T, NULL);
1657 T=reroot_tree (T, L);
1662 /*********************************************************************/
1664 /* Tree Filters and MAnipulation */
1667 /*********************************************************************/
1668 int tree2star_nodes (NT_node R, int n_max)
1671 else if (!R->left && !R->right)
1673 if (n_max>=1)R->dist=0;
1680 n+=tree2star_nodes (R->right, n_max);
1681 n+=tree2star_nodes (R->left, n_max);
1683 if (n<n_max)R->dist=0;
1688 NT_node aln2tree (Alignment *A)
1693 T=make_nj_tree (A, NULL, 0, 0, A->seq_al, A->name, A->nseq, NULL, NULL);
1694 tree2nleaf (T[3][0]);
1698 NT_node realloc_tree ( NT_node R, int n)
1701 R->right=realloc_tree (R->right,n);
1702 R->left=realloc_tree (R->left,n);
1703 R->bot=realloc_tree (R->bot,n);
1705 R->lseq=vrealloc (R->lseq, n*sizeof (int));
1706 R->lseq2=vrealloc (R->lseq2, n*sizeof (int));
1710 NT_node reset_boot_tree ( NT_node R, int n)
1713 R->right=reset_boot_tree (R->right,n);
1714 R->left=reset_boot_tree (R->left,n);
1715 R->bot=reset_boot_tree (R->bot,n);
1716 R->bootstrap=(float)n;
1720 NT_node tree_dist2normalized_tree_dist ( NT_node R, float max)
1725 tree_dist2normalized_tree_dist ( R->right, max);
1726 tree_dist2normalized_tree_dist ( R->left, max);
1727 R->bootstrap=(int)((R->dist*100)/max);
1731 NT_node reset_dist_tree ( NT_node R, float n)
1734 R->right=reset_dist_tree (R->right,n);
1735 R->left=reset_dist_tree (R->left,n);
1736 R->bot=reset_dist_tree (R->bot,n);
1738 if (R->parent && !(R->parent)->parent && !(R->parent)->bot)R->dist=n/2;
1745 NT_node* free_treelist (NT_node *L)
1748 while (L[n])free_tree (L[n++]);
1752 NT_node free_tree ( NT_node R)
1755 R->right=free_tree (R->right);
1756 R->left=free_tree (R->left);
1757 R->bot=free_tree (R->bot);
1762 NT_node free_tree_node ( NT_node R)
1771 vfree ( R->lseq); vfree ( R->lseq2);
1776 NT_node rename_seq_in_tree ( NT_node R, char ***list)
1778 if ( !R || !list) return R;
1782 R->right=rename_seq_in_tree (R->right, list);
1783 R->left=rename_seq_in_tree (R->left, list);
1784 R->bot=rename_seq_in_tree (R->bot, list);
1789 while ( list[n][0][0])
1791 if ( strm (list[n][0], R->name))sprintf (R->name, "%s",list[n][1]);
1797 Sequence * tree2seq (NT_node R, Sequence *S)
1803 S=declare_sequence (10, 10, tree2nseq (R));
1809 sprintf ( S->name[S->nseq++], "%s", R->name);
1813 S=tree2seq (R->left, S);
1814 S=tree2seq (R->right, S);
1819 NT_node balance_tree (NT_node T)
1825 else if ( T->leaf<=2)return T;
1828 if (!list)list=declare_int (3, 2);
1834 list[0][0]=(T->left)?(T->left)->leaf:0;
1836 list[1][0]=(T->right)?(T->right)->leaf:0;
1838 list[2][0]=(T->bot)?(T->bot)->leaf:0;
1841 sort_int (list,2,0,0,2);
1843 T->left=NL[list[2][1]];
1844 T->right=NL[list[1][1]];
1845 T->bot=NL[list[0][1]];
1847 T->left=balance_tree (T->left);
1848 T->right=balance_tree (T->right);
1849 T->bot=balance_tree (T->bot);
1853 FILE * display_tree (NT_node R, int nseq, FILE *fp)
1861 if ( R->nseq==1)fprintf (stderr,"\n[%s] ", R->name);
1862 else fprintf ( stderr, "\n[%d Node] ",R->nseq);
1863 for ( a=0; a< R->nseq; a++) fprintf ( stderr, "[%d]", R->lseq[a]);
1865 fprintf (fp, "\n %10s N ", R->name);
1866 for ( a=0; a< nseq; a++)fprintf (fp, "%d", R->lseq2[a]);
1867 fprintf (fp, "\n %10s D ", R->name);
1868 for ( a=0; a< nseq; a++)fprintf (fp, "%d", R->idist[a]);
1871 if (R->leaf==1) fprintf (fp, " %s", R->name);
1872 fprintf (fp, " :%.4f", R->dist);
1873 HERE ("\nGo Left");fp=display_tree (R->left, nseq, fp);
1874 HERE ("\nGo Right");fp=display_tree (R->right, nseq, fp);
1875 HERE ("\nGo Bot");fp=display_tree (R->bot, nseq, fp);
1879 int tree2nnode_unresolved (NT_node R, int *l)
1882 else if (R->leaf && R->dist==0){return 1;}
1886 n+=tree2nnode_unresolved (R->right, l);
1887 n+=tree2nnode_unresolved (R->left, l);
1901 int tree2nnode ( NT_node R)
1905 else if ( R->leaf==1){R->node=1;n=1;}
1909 n+=tree2nnode (R->right);
1910 n+=tree2nnode (R->left);
1911 n+=tree2nnode (R->bot);
1916 int tree2nleaf (NT_node R)
1919 else if (R->leaf==1){return 1;}
1920 else if (R->right==NULL && R->left==NULL && R->bot==NULL){R->leaf=1; return 1;}
1924 n+=tree2nleaf (R->right);
1925 n+=tree2nleaf (R->left);
1926 n+=tree2nleaf (R->bot);
1933 int tree2nseq ( NT_node R)
1935 return tree2nleaf(R);
1938 int tree_file2nseq (char *fname)
1944 string=vcalloc (count_n_char_in_file(fname)+1, sizeof (char));
1946 fp=vfopen (fname, "r");
1948 while ( (c=fgetc(fp))!=EOF){if (c=='(' || c==')' || c==',' || c==';') string[n++]=c;}
1949 vfclose (fp);string[n]='\0';
1951 for (n=0, p=1; p<strlen (string); p++)
1956 if ( a=='(' && b==',')n++;
1957 else if ( a==',' && b==')')n++;
1958 else if ( a==',' && b==',')n++;
1960 if ( getenv4debug("DEBUG_TREE"))fprintf (stderr, "\n[DEBUG_TREE:tree_file2nseq]%s", string);
1966 void clear_tree ( NT_node R)
1975 clear_tree ( R->right);
1976 clear_tree ( R->left);
1977 clear_tree ( R->bot);
1980 int display_leaf_below_node (NT_node T, FILE *fp)
1987 fprintf (fp, " %s", T->name);
1992 n+=display_leaf_below_node ( T->right, fp);
1993 n+=display_leaf_below_node ( T->left, fp);
1997 int display_leaf ( NT_node T, FILE *fp)
2001 else if ( T->visited)return 0;
2006 fprintf (fp, " %s", T->name);
2011 n+=display_leaf ( T->right, fp);
2012 n+=display_leaf ( T->left, fp);
2013 n+=display_leaf ( T->bot, fp);
2021 NT_node find_longest_branch ( NT_node T, NT_node L)
2024 if ( !L || T->dist>L->dist)
2030 if ( T->leaf==1)return L;
2033 L=find_longest_branch ( T->right, L);
2034 L=find_longest_branch ( T->left, L);
2038 int node2side (NT_node N);
2039 int test_print (NT_node T);
2040 NT_node straighten_node (NT_node N);
2043 NT_node reroot_tree ( NT_node TREE, NT_node Right)
2045 /*ReRoots the tree between Node R and its parent*/
2049 if (!EMPTY)EMPTY=vcalloc (1, sizeof (NT_node));
2050 if ( !Right->parent)return Right;
2052 TREE=unroot_tree (TREE);
2053 if (Right->parent==NULL && Right->bot)
2056 n1=tree2nleaf (TREE);
2058 NR=declare_tree_node(TREE->maxnseq);
2061 NR->left=Right->parent;
2064 Right->dist=Right->dist/2;
2066 if ((NR->left)->right==Right)(NR->left)->right=EMPTY;
2067 else if ( (NR->left)->left==Right) (NR->left)->left=EMPTY;
2072 NR->left=straighten_node (NR->left);
2076 (NR->left)->parent=NR;
2077 (NR->left)->dist=Right->dist;
2083 if ( n1!=n2){fprintf ( stderr, "\n%d %d", n1, n2);myexit (EXIT_FAILURE);}
2087 NT_node straighten_node ( NT_node N)
2094 if (N->right==EMPTY)N->right=N->parent;
2095 else if ( N->left==EMPTY) N->left=N->parent;
2098 if (Child->right==N)
2102 else if (Child->left==N)
2108 Child=straighten_node (Child);
2110 Child->dist=N->dist;
2113 else if ( N->bot && N->bot!=Previous)
2115 if ( N->right==EMPTY)N->right=N->bot;
2116 else if ( N->left==EMPTY)N->left=N->bot;
2127 int test_print (NT_node T)
2131 fprintf ( stderr, "\nEMPTY");
2133 else if ( !T->left && !T->right)
2135 fprintf ( stderr, "\n%s",T->name);
2139 fprintf ( stderr, "\nGoing Right");
2140 test_print (T->right);
2141 fprintf ( stderr, "\nGoing Left");
2142 test_print (T->left);
2146 int node2side (NT_node C)
2148 if ( !C->parent) return UNKNOWN;
2149 else if ( (C->parent)->left==C)return LEFT;
2150 else if ( (C->parent)->right==C)return RIGHT;
2151 else return UNKNOWN;
2153 NT_node straighten_tree ( NT_node P, NT_node C, float new_dist)
2157 if ( C==NULL)return NULL;
2164 if (C->left && C->right)
2175 else if ( C->left==NULL && C->right==NULL)
2179 else if ( C->right==P)
2184 C=straighten_tree(C, C->right, dist);
2186 else if ( C->left==P)
2190 C=straighten_tree (C, C->left, dist);
2192 else if ( C->parent==NULL)
2201 NT_node unroot_tree ( NT_node T)
2204 if (!T || T->visited) return T;
2207 if (T->parent==NULL)
2210 (T->right)->dist=(T->left)->dist=(T->right)->dist+(T->left)->dist;
2211 (T->right)->parent=T->left;
2212 (T->left)->parent=T->right;
2219 T->parent=unroot_tree (T->parent);
2220 T->right=unroot_tree (T->right);
2221 T->left=unroot_tree (T->left);
2227 FILE * print_tree_list ( NT_node *T, char *format,FILE *fp)
2232 fp=print_tree (T[a], format, fp);
2237 char * tree2string (NT_node T)
2239 if (!T) return NULL;
2245 if (!f)f=vtmpnam (NULL);
2247 print_tree (T, "newick", fp);
2249 return file2string (f);
2252 char * tree2file (NT_node T, char *name, char *mode)
2254 if (!name)name=vtmpnam (NULL);
2255 string2file (name, mode, tree2string(T));
2258 FILE * print_tree ( NT_node T, char *format,FILE *fp)
2263 S=tree2seq(T, NULL);
2267 free_sequence (S, -1);
2268 if ( format && strm (format, "binary"))
2269 fp=display_tree ( T,S->nseq, fp);
2270 else if ( ! format || strm2 (format, "newick_tree","newick"))
2272 /*T=balance_tree (T);*/
2273 fp=rec_print_tree (T, fp);
2274 fprintf ( fp, ";\n");
2278 fprintf ( stderr, "\nERROR: %s is an unknown tree format [FATAL:%s]\n", format, PROGRAM);
2279 myexit (EXIT_FAILURE);
2283 int print_newick_tree ( NT_node T, char *name)
2286 fp=vfopen (name, "w");
2287 fp=rec_print_tree (T,fp);
2288 fprintf (fp, ";\n");
2292 FILE * rec_print_tree ( NT_node T, FILE *fp)
2301 fprintf ( fp, " %s:%.5f",T->name, T->dist);
2305 if (T->left && T->right)
2307 fprintf ( fp, "(");fp=rec_print_tree ( T->left, fp);
2308 fprintf ( fp, ",");fp=rec_print_tree ( T->right, fp);
2310 if (T->parent || T->dist)
2312 if ( T->bootstrap!=0)fprintf (fp, " %d", (int)T->bootstrap);
2313 fprintf (fp, ":%.5f", T->dist);
2316 else if (T->left)fp=rec_print_tree (T->left, fp);
2317 else if (T->right)fp=rec_print_tree(T->right, fp);
2327 /*********************************************************************/
2329 /* Tree Functions */
2332 /*********************************************************************/
2334 int ** make_sub_tree_list ( NT_node **T, int nseq, int n_node)
2338 /*This function produces a list of all the sub trees*/
2351 /* Contains 4 i_nodes */
2352 /* 8 nodes (internal nodes +leaves) */
2363 int **sub_tree_list;
2369 sub_tree_list=declare_int ( (n_node), nseq);
2370 make_all_sub_tree_list (T[3][0],sub_tree_list, &n);
2375 sub_tree_list=declare_int (nseq, nseq);
2376 for ( a=0; a< nseq; a++)sub_tree_list[a][a]=1;
2379 return sub_tree_list;
2382 void make_all_sub_tree_list ( NT_node N, int **list, int *n)
2384 make_one_sub_tree_list (N, list[n[0]++]);
2387 make_all_sub_tree_list (N->left , list, n);
2388 make_all_sub_tree_list (N->right, list, n);
2393 void make_one_sub_tree_list ( NT_node T,int *list)
2402 make_one_sub_tree_list(T->left , list);
2403 make_one_sub_tree_list(T->right, list);
2409 NT_node old_main_read_tree(char *treefile)
2411 /*Reads a tree w/o needing the sequence file*/
2413 T=simple_read_tree (treefile);
2419 NT_node** simple_read_tree(char *treefile)
2423 T=read_tree ( treefile, &tot_node,tree_file2nseq (treefile),NULL);
2427 void free_read_tree ( NT_node **BT)
2434 for (s=0,a=0; a<3; a++)
2438 free_tree (BT[3][0]);
2443 NT_node** read_tree(char *treefile, int *tot_node,int nseq, char **seq_names)
2446 /*The Tree Root is in the TREE[3][0]...*/
2447 /*TREE[0][ntot]--> pointer to each node and leave*/
2453 int nnodes = 0;/*Number of Internal Nodes*/
2454 int ntotal = 0;/*Number of Internal Nodes + Number of Leaves*/
2458 NT_node seq_tree, root,p;
2462 rooted_tree=distance_tree=TRUE;
2464 fp = vfopen(treefile, "r");
2466 ch = (char)getc(fp);
2469 fprintf(stderr, "Error: Wrong format in tree file %s\n", treefile);
2470 myexit (EXIT_FAILURE);
2475 lu_ptr=(NT_node **)vcalloc(4,sizeof(NT_node*));
2476 lu_ptr[0] = (NT_node *)vcalloc(10*nseq,sizeof(NT_node));
2477 lu_ptr[1] = (NT_node *)vcalloc(10*nseq,sizeof(NT_node));
2478 lu_ptr[2] = (NT_node *)vcalloc(10*nseq,sizeof(NT_node));
2479 lu_ptr[3] =(NT_node *) vcalloc(1,sizeof(NT_node));
2481 seq_tree =(NT_node) declare_tree_node(nseq);
2483 set_info(seq_tree, NULL, 0, " ", 0.0, 0);
2486 fp=create_tree(seq_tree,NULL,&nseq_read, &ntotal, &nnodes, lu_ptr, fp);
2490 if (nseq != tot_nseq)
2492 fprintf(stderr," Error: tree not compatible with alignment (%d sequences in alignment and %d in tree\n", nseq,nseq_read);
2493 myexit (EXIT_FAILURE);
2496 if (distance_tree == FALSE)
2498 if (rooted_tree == FALSE)
2500 fprintf(stderr,"Error: input tree is unrooted and has no distances, cannot align sequences\n");
2501 myexit (EXIT_FAILURE);
2505 if (rooted_tree == FALSE)
2507 root = reroot(seq_tree, nseq,ntotal,nnodes, lu_ptr);
2508 lu_ptr[1][nnodes++]=lu_ptr[0][ntotal++]=root;
2521 for ( a=0; a< ntotal; a++)
2523 (lu_ptr[0][a])->isseq=(lu_ptr[0][a])->leaf;
2524 (lu_ptr[0][a])->dp=(lu_ptr[0][a])->dist;
2528 for ( a=0; a< nseq; a++)
2533 (lu_ptr[2][a])->order=(lu_ptr[2][a])->seq=a;
2537 for ( flag=0,b=0; b<nseq; b++)
2539 if ( strncmp ( (lu_ptr[2][a])->name, seq_names[b], MAXNAMES)==0)
2543 (lu_ptr[2][a])->order=(lu_ptr[2][a])->seq=b;
2544 /*vfree ( (lu_ptr[2][a])->name);*/
2545 sprintf ((lu_ptr[2][a])->name, "%s", seq_names[b]);
2550 if ( flag==0 && (lu_ptr[0][a])->leaf==1)
2552 fprintf ( stderr, "\n%s* not in tree",(lu_ptr[2][a])->name);
2553 for ( a=0; a< ntotal; a++)
2555 fprintf ( stderr, "\n%d %s",(lu_ptr[2][a])->leaf, (lu_ptr[2][a])->name);
2567 tnseq=tree_file2nseq(treefile);
2568 tree_names=vcalloc ( tnseq, sizeof (char*));
2569 for (a=0; a<tnseq; a++)
2571 s=(lu_ptr[2][a])->name;
2573 if ( name_is_in_list(s, seq_names, nseq, MAXNAMES+1)==-1)
2575 fprintf (stderr, "\nERROR: Sequence %s in the tree [%s] is not in the alignment[FATAL:%s]\n", s, treefile, PROGRAM);
2579 for (a=0; a<nseq; a++)
2582 if ( name_is_in_list(s, tree_names, nseq, MAXNAMES+1)==-1)
2584 fprintf (stderr, "\nERROR: Sequence %s in the sequences is not in the tree [%s][FATAL:%s]\n", s, treefile, PROGRAM);
2589 if ( fail_flag==1)myexit (EXIT_FAILURE);
2592 for ( a=0; a< nseq; a++)
2599 p->lseq[p->nseq]=c_seq;
2609 FILE * create_linear_tree ( char **name, int n, FILE *fp)
2612 if (!name || n==0 ||!fp) return NULL;
2616 fprintf ( fp, "(%s,%s);",name[0],name[1]);
2618 fprintf ( fp, "((%s,%s),%s);",name[0],name[1], name[2]);
2622 for (a=0; a<n-2; a++)fprintf (fp, "(");
2623 fprintf (fp, "%s, %s),", name[0], name[1]);
2624 for ( a=2; a<n-2; a++)fprintf ( fp, "%s),",name[a]);
2625 fprintf ( fp, "%s,%s);\n", name[n-2], name[n-1]);
2629 FILE * create_tree(NT_node ptree, NT_node parent,int *nseq,int *ntotal,int *nnodes,NT_node **lu, FILE *fp)
2639 name=vcalloc ( MAXNAMES+1, sizeof (char));
2640 sprintf ( name, " ");
2642 ch = (char)getc(fp);
2648 lu[0][ntotal[0]] = lu[1][nnodes[0]] = ptree;
2651 create_tree_node(ptree, parent);
2652 fp=create_tree(ptree->left, ptree, nseq,ntotal,nnodes,lu,fp);
2653 ch = (char)getc(fp);
2656 fp=create_tree(ptree->right, ptree,nseq,ntotal,nnodes,lu,fp);
2657 ch = (char)getc(fp);
2661 ptree = insert_tree_node(ptree);
2662 lu[0][ntotal[0]] = lu[1][nnodes[0]] = ptree;
2665 fp=create_tree(ptree->right, ptree,nseq,ntotal,nnodes,lu,fp);
2666 rooted_tree = FALSE;
2667 if ( getenv4debug ( "DEBUG_TREE")){fprintf ( stderr, "\n[DEBUG_TREE:create_tree] Unrooted Tree");}
2672 ch = (char)getc(fp);
2677 lu[0][ntotal[0]] = lu[2][nseq[0]] = ptree;
2682 ch = (char)getc(fp);
2685 /*This protects names that are between single quotes*/
2688 if (i < MAXNAMES) name[i++] = ch;
2689 ch = (char)getc(fp);
2691 if (i < MAXNAMES) name[i++] = ch;
2692 while ((ch != ':') && (ch != ',') && (ch != ')'))ch = (char)getc(fp);
2696 while ((ch != ':') && (ch != ',') && (ch != ')'))
2698 if (i < MAXNAMES) name[i++] = ch;
2699 ch = (char)getc(fp);
2705 if ( i>=(MAXNAMES+1)){fprintf (stderr, "\nName is too long");myexit (EXIT_FAILURE);}
2706 if (ch != ':' && !isdigit(ch))
2708 /*distance_tree = FALSE*/;
2714 fscanf(fp,"%f",&dist);
2718 /*Tree with Bootstrap information*/
2719 else if (isdigit (ch))
2722 fscanf(fp,"%f",&bootstrap);
2723 if ( fscanf(fp,":%f",&dist)==1);
2733 set_info(ptree, parent, type, name, dist, bootstrap);
2740 NT_node declare_tree_node (int nseq)
2744 p= (NT_node)vcalloc (1, sizeof ( Treenode));
2752 p->name=(char*)vcalloc (MAXNAMES+1,sizeof (char));
2754 p->lseq=(int*)vcalloc ( nseq, sizeof (int));
2759 void set_info(NT_node p, NT_node parent, int pleaf, char *pname, float pdist, float bootstrap)
2764 p->bootstrap=bootstrap;
2768 sprintf (p->name, "%s", pname);
2777 NT_node insert_tree_node(NT_node pptr)
2782 newnode = declare_tree_node( pptr->maxnseq);
2783 create_tree_node(newnode, pptr->parent);
2785 newnode->left = pptr;
2786 pptr->parent = newnode;
2788 set_info(newnode, pptr->parent, 0, "", 0.0, 0);
2793 void create_tree_node(NT_node pptr, NT_node parent)
2795 pptr->parent = parent;
2796 pptr->left =declare_tree_node(pptr->maxnseq) ;
2797 (pptr->left)->parent=pptr;
2799 pptr->right =declare_tree_node(pptr->maxnseq) ;
2800 (pptr->right)->parent=pptr;
2803 FILE * skip_space(FILE *fp)
2812 fprintf ( stderr, "\nEOF");
2813 myexit (EXIT_FAILURE);
2820 NT_node reroot(NT_node ptree, int nseq, int ntotal, int nnodes, NT_node **lu)
2822 NT_node p, rootnode, rootptr;
2823 float diff, mindiff=0, mindepth = 1.0, maxdist;
2831 for (i=0; i<ntotal; i++)
2834 if (p->parent == NULL)
2835 diff = calc_root_mean(p, &maxdist, nseq, lu);
2837 diff = calc_mean(p, &maxdist, nseq, lu);
2839 if ((diff == 0) || ((diff > 0) && (diff < 2 * p->dist)))
2841 if ((maxdist < mindepth) || (first == TRUE))
2851 if (rootptr == ptree)
2853 mindiff = rootptr->left->dist + rootptr->right->dist;
2854 rootptr = rootptr->right;
2857 rootnode = insert_root(rootptr, mindiff);
2858 diff = calc_root_mean(rootnode, &maxdist, nseq, lu);
2863 float calc_root_mean(NT_node root, float *maxdist, int nseq, NT_node **lu)
2865 float dist , lsum = 0.0, rsum = 0.0, lmean,rmean,diff;
2872 dist = (*maxdist) = 0;
2874 for (i=0; i< nseq; i++)
2878 while (p->parent != root)
2883 if (p == root->left) direction = LEFT;
2884 else direction = RIGHT;
2887 if (direction == LEFT)
2898 if (dist > (*maxdist)) *maxdist = dist;
2904 diff = lmean - rmean;
2908 float calc_mean(NT_node nptr, float *maxdist, int nseq,NT_node **lu)
2910 float dist , lsum = 0.0, rsum = 0.0, lmean,rmean,diff;
2911 NT_node p, *path2root;
2913 int depth = 0, i,j , n;
2915 int direction, found;
2918 path2root = (NT_node *)vcalloc(nseq,sizeof(Treenode));
2919 dist2node = (float *)vcalloc(nseq,sizeof(float));
2921 depth = (*maxdist) = dist = 0;
2926 path2root[depth] = p;
2928 dist2node[depth] = dist;
2933 /***************************************************************************
2935 for each leaf, determine whether the leaf is left or right of the node.
2936 (RIGHT = descendant, LEFT = not descendant)
2937 ****************************************************************************/
2938 for (i=0; i< nseq; i++)
2953 while ((found == FALSE) && (p->parent != NULL))
2955 for (j=0; j< depth; j++)
2956 if (p->parent == path2root[j])
2965 if (p == nptr) direction = RIGHT;
2968 if (direction == LEFT)
2971 lsum += dist2node[n-1];
2980 if (dist > (*maxdist)) *maxdist = dist;
2988 if ( nl==0 || nr==0)
2990 myexit (EXIT_FAILURE);
2995 diff = lmean - rmean;
2999 NT_node insert_root(NT_node p, float diff)
3001 NT_node newp, prev, q, t;
3002 float dist, prevdist,td;
3005 newp = declare_tree_node( p->maxnseq);
3015 if (p->dist < 0.0) p->dist = 0.0;
3016 if (p->dist > dist) p->dist = dist;
3018 t->dist = dist - p->dist;
3022 newp->parent = NULL;
3026 if (t->left == p) t->left = t->parent;
3027 else t->right = t->parent;
3036 if (q->left == prev)
3038 q->left = q->parent;
3048 q->right = q->parent;
3059 remove the old root node
3062 if (q->left == NULL)
3067 q->parent = prev->parent;
3068 if (prev->parent->left == prev)
3069 prev->parent->left = q;
3071 prev->parent->right = q;
3079 q->parent = prev->parent;
3080 if (prev->parent->left == prev)
3081 prev->parent->left = q;
3083 prev->parent->right = q;
3093 /*********************************************************************/
3098 /*********************************************************************/
3100 int *aln2seq_chain (Alignment *A, int **sim,int seq1, int seq2, int limit, int max_chain);
3102 Alignment *seq2seq_chain (Alignment *A,Alignment*T, char *arg)
3105 int *buf=NULL, *seq2keep, *list, *tname;
3111 /*Estimate Similarity within the incoming sequences*/
3112 sim=seq2comp_mat (aln2seq(A), "blosum62mt", "sim2");
3114 /*Read and store the list of sequences to keep*/
3115 seq2keep=vcalloc (A->nseq, sizeof (int));
3116 tname=vcalloc (T->nseq, sizeof (int));
3117 for ( a=0; a< T->nseq; a++)
3119 tname[a]=name_is_in_list ( T->name[a], A->name, A->nseq, 100);
3120 if (tname[a]>=0)seq2keep[tname[a]]=1;
3123 /*Consider Every Pair of Sequences within the list of sequences to keep*/
3125 fprintf ( stderr, "\n");
3126 for ( a=0; a< T->nseq-1; a++)
3128 if (tname[a]<0) continue;
3129 for ( b=a+1;b<T->nseq; b++)
3132 if (tname[b]<0) continue;
3134 buf=NULL;sim_limit=90;
3135 while (!buf && sim_limit>min_sim)
3137 buf=aln2seq_chain ( A, sim,tname[a],tname[b],sim_limit, max_chain);
3143 for (c=0; c< A->nseq; c++)seq2keep[c]+=buf[c];
3148 fprintf ( stderr, "\n#Could Not Find any Intermediate sequence [MAx chain %d MinID %d\n", max_chain, min_sim);
3153 list=vcalloc (A->nseq, sizeof (int));
3154 for ( nl=0,a=0; a< A->nseq; a++)
3158 A=extract_sub_aln (A, nl, list);
3164 int max_explore=10000000;/*Limits the number of explorations that tends to increase when id is small*/
3167 int *aln2seq_chain (Alignment *A, int **sim, int seq1, int seq2, int limit, int max_chain)
3171 char output1[10000];
3172 char output2[10000];
3178 output1[0]=output2[0]='\0';
3179 used=vcalloc (A->nseq, sizeof(int));
3182 if (find_seq_chain ( A, sim,used,seq1,seq1, seq2,1,limit, max_chain, &nseq))
3184 list=vcalloc (A->nseq, sizeof (int));
3185 chain=declare_int (A->nseq, 2);
3186 for (n=0, a=0; a< A->nseq; a++)
3190 chain[n][0]=used[a];
3192 list[used[a]-1]=a;n++;
3196 sprintf ( output2, "#%s %s N: %d Lower: %d Sim: %d DELTA: %d\n", A->name[list[0]], A->name[list[n-1]],n, limit,sim[list[0]][list[n-1]],limit-sim[list[0]][list[n-1]]);strcat (output1, output2);
3198 sort_int ( chain, 2, 0, 0, n-1);
3199 sprintf ( output2, "#");strcat(output1, output2);
3201 for ( a=0; a< n-1; a++)
3203 sprintf (output2, "%s -->%d -->", A->name[chain[a][1]],sim[chain[a][1]][chain[a+1][1]]);strcat ( output1, output2);
3205 sprintf ( output2, "%s\n", A->name[chain[n-1][1]]);strcat (output1, output2);
3207 free_int (chain, -1);
3215 /* fprintf ( stdout, "%s", output1);*/
3216 fprintf ( stderr, "%s", output1);
3220 static int ***pw_sim;
3221 int find_seq_chain (Alignment *A, int **sim,int *used,int seq0,int seq1, int seq2,int chain_length, int limit, int max_chain, int *nseq)
3223 int a,b, seq, seq_sim;
3226 if ( n_explore>=max_explore)
3232 pw_sim=declare_arrayN(3, sizeof (int), A->nseq, A->nseq, 3);
3233 for ( a=0; a< A->nseq; a++)
3235 for ( b=0; b<A->nseq; b++)
3238 pw_sim[a][b][1]=sim[a][b];
3239 pw_sim[a][b][2]=sim[b][seq2];
3241 sort_int_inv ( pw_sim[a],3, 1, 0, A->nseq-1);
3245 if ( chain_length>max_chain)return 0;
3246 else if ( sim[seq1][seq2]>=limit)
3248 used[seq2]=chain_length+1;
3255 for ( a=0; a< A->nseq; a++)
3257 seq=pw_sim[seq1][a][0];
3258 seq_sim=pw_sim[seq1][a][1];
3259 delta_seq2=pw_sim[seq1][a][2]-sim[seq1][seq2];
3263 if ( used[seq])continue;
3264 else if ( seq_sim<limit)continue;
3267 used[seq]=chain_length+1;
3269 if ( find_seq_chain( A, sim,used,seq0,seq, seq2, chain_length+1,limit, max_chain, nseq))
3285 /*********************************************************************/
3287 /* New Tree Parsing */
3289 /*********************************************************************/
3290 NT_node new_insert_node (NT_node T);
3291 int scan_name_and_dist ( FILE *fp, char *name, float *dist);
3296 NT_node check_tree (NT_node T);
3297 NT_node main_read_tree (char *treefile)
3307 fp=vfopen (remove_charset_from_file (treefile, " \t\n\r"), "r");
3308 T=new_get_node (NULL,fp);
3311 S=tree2seq(T, NULL);
3313 T=recode_tree(T, S);
3314 free_sequence (S,S->nseq);
3316 T->file=vcalloc ( strlen (treefile)+1, sizeof (char));
3317 sprintf ( T->file, "%s", treefile);
3321 //This function codes the tree into lseq and lseq2
3322 //lseq: list of the N->nseq child sequences of the node
3323 //lsseq2:Array of size Nseq, with lseq[a]=1 if sequence a is child of node N
3324 static int node_index;
3325 NT_node index_tree_node (NT_node T)
3328 if (!T->parent){node_index=tree2nseq (T)+1;}
3330 index_tree_node(T->left);
3331 index_tree_node(T->right);
3333 if (!T->left && !T->right)T->index=T->lseq[0]+1;
3334 else T->index=node_index++;
3340 NT_node simple_recode_tree (NT_node T, int nseq)
3343 //recodes atree wher the leafs are already coded
3361 vfree (T->lseq); T->lseq=vcalloc (nseq, sizeof (int));
3362 vfree (T->lseq2); T->lseq2=vcalloc (nseq, sizeof (int));
3363 vfree (T->idist); T->idist=vcalloc (nseq, sizeof (int));
3364 vfree (T->ldist); T->ldist=vcalloc (nseq, sizeof (int));
3366 R=simple_recode_tree (T->left,nseq);
3368 L=simple_recode_tree (T->right,nseq);
3370 if (R)for (a=0; a<R->nseq; a++)
3372 T->lseq2[R->lseq[a]]=1;
3375 if (L)for (a=0; a<L->nseq; a++)
3377 T->lseq2[L->lseq[a]]=1;
3380 for (a=0; a<nseq; a++)
3382 if (T->lseq2[a])T->lseq[T->nseq++]=a;
3383 if (T->lseq2[a])T->idist[a]=(!R)?0:R->idist[a]+((!L)?0:L->idist[a])+1;
3384 if (T->lseq2[a])T->ldist[a]=(!R)?0:R->ldist[a]+((!L)?0:L->ldist[a])+(int)(T->dist*10000);
3390 NT_node recode_tree (NT_node T, Sequence *S)
3397 vfree (T->lseq); T->lseq=vcalloc (S->nseq, sizeof (int));
3398 vfree (T->lseq2); T->lseq2=vcalloc (S->nseq, sizeof (int));
3399 vfree (T->idist); T->idist=vcalloc (S->nseq, sizeof (int));
3400 vfree (T->ldist); T->ldist=vcalloc (S->nseq, sizeof (int));
3407 i=name_is_in_list (T->name, S->name, S->nseq, -1);
3411 T->lseq[T->nseq++]=i;
3414 T->ldist[i]=(int)(T->dist*10000);;
3418 printf_exit ( EXIT_FAILURE, stderr, "\nERROR: Sequence %s is in the Tree but Not in the Sequence dataset [code_lseq][FATAL:%s]", T->name, PROGRAM);
3427 R=recode_tree (T->left, S);
3429 L=recode_tree (T->right, S);
3432 for (a=0; a<R->nseq; a++)
3434 T->lseq2[R->lseq[a]]=1;
3437 if (L)for (a=0; a<L->nseq; a++)
3439 T->lseq2[L->lseq[a]]=1;
3442 for (a=0; a<S->nseq; a++)
3444 //don't count the root
3447 if ( !(T->parent) || !(T->parent)->parent)d=0;
3448 else if ( T->dist==0)d=0;
3451 if (T->lseq2[a])T->lseq[T->nseq++]=a;
3452 if (T->lseq2[a])T->idist[a]=(!R)?0:(R->idist[a]+((!L)?0:L->idist[a])+d);
3453 if (T->lseq2[a])T->ldist[a]=(!R)?0:R->ldist[a]+((!L)?0:L->ldist[a])+(int)(T->dist*10000);
3459 int tree2split_list (NT_node T, int ns,int **sl, int* n)
3464 tree2split_list (T->right, ns, sl, n);
3465 tree2split_list (T->left , ns, sl, n);
3467 if (!T->right) return 1;
3468 else if (T->parent && !(T->parent)->parent)return 1;
3469 else if ( T->dist==0)return 1;
3472 int t=0,t2=0, c=0, a;
3474 for (a=0; a< ns; a++)
3476 t2+=(a+1)*T->lseq2[a];
3480 if (t2==0) HERE ("0");
3482 sl[n[0]][ns]=t2;//Hash value for quick comparison;
3484 for (a=0; a< ns; a++)sl[n[0]][a]=(c==0)?T->lseq2[a]:(1-T->lseq2[a]);
3491 NT_node display_splits (NT_node T,Sequence *S, FILE *fp)
3496 if (!S)S=tree2seq (T,NULL);
3498 display_splits (T->right,S, fp);
3499 display_splits (T->left, S, fp);
3504 else if (T->parent && !(T->parent)->parent);
3508 for (a=0; a< S->nseq; a++)
3510 fprintf (fp, "%d", T->lseq2[a]);
3514 fprintf ( fp, " %5d \n", MIN(t,((S->nseq)-t)));
3518 NT_node display_leaf_nb (NT_node T, int n, FILE *fp, char * name)
3524 display_leaf_nb (T->right, n, fp, name);
3525 display_leaf_nb (T->left, n, fp, name);
3534 fprintf (fp, "%s ", T->name);
3535 for (a=0; a< n; a++)fprintf (fp, "%d", P->lseq2[a]);
3536 fprintf ( fp," %s\n", name);
3541 NT_node display_code (NT_node T, int n, FILE *fp)
3543 int a, debug=0, t=0;
3551 if (!T->parent && debug) fprintf ( fp, "\nDISPLAY TREE: START");
3552 display_code (T->right, n, fp);
3553 display_code (T->left, n, fp);
3555 fprintf ( fp, "\n");
3556 if (!T->parent) return T;
3557 else if ( !(T->parent)->parent && root4dc==1)return T;
3558 else if ( !(T->parent)->parent && root4dc==0)root4dc=1;
3560 for (a=0; a< n; a++)
3563 for (a=0; a< n; a++)fprintf (fp, "%d", T->lseq2[a]);
3565 for (a=0; a< n; a++)fprintf (fp, "%d", 1-T->lseq2[a]);
3566 if (T->isseq && debug)fprintf (fp, "%s", T->name);
3568 if (!T->parent && debug) fprintf (fp, "\nDISPLAY TREE: FINISHED");
3571 NT_node display_dist (NT_node T, int n, FILE *fp)
3579 display_dist (T->right, n, fp);
3580 display_dist (T->left, n, fp);
3582 fprintf ( stdout, "\n");
3583 for ( a=0; a< n; a++)
3584 fprintf ( stdout, " %2d ", T->idist[a]);
3585 fprintf ( stdout, "\n");
3590 NT_node check_tree (NT_node T)
3592 if (T) HERE("CHECK %s", T->name);
3595 HERE ("ERROR: Empty Group");
3598 else if (T->isseq)return T;
3602 check_tree (T->right);
3604 check_tree (T->left);
3609 NT_node new_reroot_tree( NT_node T)
3614 NT_node new_get_node (NT_node T, FILE *fp)
3621 if (!T)T=declare_tree_node (100);
3627 if (!T->right)T=T->left;
3628 else if (!T->left)T=T->right;
3629 vfree (T->parent);T->parent=NULL;
3635 scan_name_and_dist (fp, T->name, &T->dist);
3636 if (T->name && T->name [0])T->bootstrap=atof (T->name);
3637 return new_get_node (T->parent, fp);
3641 return new_get_node (T, fp);
3645 NN=new_insert_node (T);
3650 return new_get_node (NN, fp);
3655 scan_name_and_dist (fp, NN->name, &NN->dist);
3659 return new_get_node (T, fp);
3663 int scan_name_and_dist ( FILE *fp, char *name, float *dist)
3669 c=fgetc (fp);ungetc (c, fp);
3672 if ( c==';')return 0;
3674 while ((c=fgetc(fp))!=':' && c!=EOF && c!=')' && c!=';' && c!=',')
3687 while (isdigit((c=fgetc(fp))) || c=='.' || c=='-')
3695 dist[0]=atof (number);
3699 NT_node new_insert_node (NT_node T)
3704 NN=new_declare_tree_node ();
3710 else if (T->left==NULL)
3715 else if ( T->right==NULL)
3722 NN2=new_declare_tree_node ();
3724 NN2->right=T->right;
3736 (T->right)->parent=NN;
3740 NN->left=new_declare_tree_node ();
3741 (NN->left)->parent=NN;
3746 This caused a crash when internal undefined nodes, removed 19/02/08
3751 P=NN->parent=T->parent;
3754 if (P && P->right==T)P->right=NN;
3755 else if ( P && P->left==T)P->left=NN;
3757 NN->left=new_declare_tree_node ();
3758 (NN->left)->parent=NN;
3765 NT_node new_declare_tree_node ()
3768 static int node_index;
3769 p= (NT_node)vcalloc (1, sizeof ( Treenode));
3776 p->index=++node_index;
3778 p->name=(char*)vcalloc (MAXNAMES+1,sizeof (char));
3783 int new_display_tree (NT_node T, int n)
3790 if ( T->parent)fprintf (stdout, "\nNode %d: has parents)", in);
3791 else fprintf (stdout, "\nNode %d: NO parents)", in);
3795 fprintf (stdout, "\nNode %d has Right Child", in);
3796 n=new_display_tree (T->right, n+1);
3798 else fprintf ( stdout, "\nNode %d No Right\n", in);
3802 fprintf (stdout, "\nNode %d has Left Child", in);
3803 n=new_display_tree (T->left, n+1);
3805 else fprintf ( stdout, "\nNode %d No Left\n", in);
3809 fprintf (stdout, "\nNode %d has Bot Child", in);
3810 n=new_display_tree (T->bot, n+1);
3812 else fprintf ( stdout, "\nNode %d No Bot\n", in);
3817 fprintf (stdout, "\nNode %d is %s", in, T->name);
3821 int display_tree_duplicates (NT_node T)
3827 free_sequence (S, -1);
3830 S=tree2seq (T, NULL);
3831 dup=vcalloc ( S->nseq, sizeof (int));
3833 for (a=0; a< S->nseq-1; a++)
3834 for ( b=a+1; b<S->nseq; b++)
3836 if ( strm (S->name[a], S->name[b]))
3841 for (a=0; a< S->nseq-1; a++)
3842 for ( b=a+1; b<S->nseq; b++)
3844 if ( strm (S->name[a], S->name[b]) && dup[a])
3846 fprintf ( stderr, "\nSequence %s is duplicated %d Times in the tree", S->name[a], dup[a]);
3852 int tree_contains_duplicates (NT_node T)
3857 free_sequence (S, -1);
3859 S=tree2seq (T, NULL);
3860 for (a=0; a< S->nseq-1; a++)
3861 for ( b=a+1; b<S->nseq; b++)
3863 if ( strm (S->name[a], S->name[b]))return 1;
3868 float display_avg_bootstrap ( NT_node T)
3873 tot=tree2tot_dist (T, BOOTSTRAP);
3874 n=tree2n_branches (T, BOOTSTRAP);
3875 fprintf ( stdout, "\nAVERAGE BOOTSRAP: %.3f on %d Branches\n", (n>0)?tot/n:0, n);
3876 return (n>0)?tot/n:0;
3880 int tree2n_branches(NT_node T, int mode)
3886 else if ((T->isseq && mode !=BOOTSTRAP) || !T->isseq)
3890 n+=tree2n_branches(T->right, mode);
3891 n+=tree2n_branches(T->left, mode);
3896 float tree2tot_dist ( NT_node T, int mode)
3904 else if ((T->isseq && mode !=BOOTSTRAP) || !T->isseq)
3906 if ( mode == BOOTSTRAP && T->bootstrap!=0)t+=T->bootstrap;
3910 t+=tree2tot_dist(T->right, mode);
3911 t+=tree2tot_dist(T->left, mode);
3915 //This function displays all the sequences within the tree sorted by node label
3916 int cmp_tree_array ( const void *vp, const void *vq);
3917 int node_sort ( char *name, NT_node T)
3923 while (T->parent)T=T->parent;
3926 array=declare_int (nseq, 2);
3927 N=tree2node (name, T);
3929 if (N==NULL)printf_exit (EXIT_FAILURE, stderr, "ERROR: %s is not in the tree [FATAL:%s]\n", name, PROGRAM);
3930 array=display_tree_from_node (N,0,0, array);
3931 qsort ( array, nseq, sizeof (int**), cmp_tree_array);
3932 S=tree2seq(T, NULL);
3933 for (a=0; a<nseq; a++)
3934 fprintf ( stdout, ">%s %d %d\n", S->name[array[a][0]], array[a][1], array[a][2]);
3935 myexit (EXIT_SUCCESS);
3938 NT_node tree2root ( NT_node R)
3940 if (R)while (R->parent)R=R->parent;
3944 NT_node tree2node (char *name, NT_node T)
3948 else if (T->leaf && strm (T->name, name)) return T;
3952 T1=tree2node ( name, T->right);
3953 T2=tree2node ( name, T->left);
3954 return (T1>T2)?T1:T2;
3959 NT_node * tree2node_list (NT_node T, NT_node *L)
3963 if (!T) return NULL;
3964 if (!L) {ni=0;L=vcalloc (tree2nnode(T)+1, sizeof (NT_node));}
3965 tree2node_list (T->left, L);
3966 tree2node_list (T->right, L);
3973 int ** display_tree_from_node (NT_node T, int up, int down, int **array)
3976 if (!T || T->visited)return array;
3981 array[T->lseq[0]][0]=T->lseq[0];
3982 array[T->lseq[0]][1]=up;
3983 array[T->lseq[0]][2]=down;
3988 array=display_tree_from_node ( T->left ,up, down+1, array);
3989 array=display_tree_from_node ( T->right,up, down+1, array);
3991 array=display_tree_from_node ( T->parent,up+1, 0, array);
3997 int cmp_tree_array ( const void *p, const void *q)
3999 const int **vp=(const int**)p;
4000 const int **vq=(const int**)q;
4001 if (vp[0][1]>vq[0][1])return 1;
4002 else if ( vp[0][1]<vq[0][1]) return -1;
4003 else if ( vp[0][2]>vq[0][2]) return 1;
4004 else if ( vp[0][2]<vq[0][2]) return -1;
4008 NT_node * read_tree_list (Sequence *S)
4013 T=vcalloc ( S->nseq+1, sizeof (NT_node));
4015 for ( a=0; a<S->nseq; a++)
4018 if (S->seq && S->seq[a] && strlen (S->seq[a])<2)
4021 string2file ((fname=vtmpnam(NULL)), "w", S->seq[a]);
4023 T[a]=main_read_tree (fname);
4024 T[a]->file=vcalloc (strlen (S->name[a])+1, sizeof (char));
4025 sprintf (T[a]->file, "%s", S->name[a]);
4030 int treelist2dmat ( Sequence *S)
4040 T=read_tree_list (S);
4041 TS=tree2seq(T[0], NULL);
4042 fprintf (stdout, "\n%d", S->nseq);
4045 fprintf ( stdout,"\n%-10s ", S->name[a]);
4046 for ( b=0; b<n; b++)
4048 v=100-simple_tree_cmp (T[a], T[b], TS, 1);
4049 fprintf ( stdout, "%.4f ", v);
4054 myexit (EXIT_SUCCESS);
4058 int treelist2leafgroup ( Sequence *S, Sequence *TS, char *taxon)
4061 int n=0,nseq, a, c,s;
4065 char *split_file, *sorted_split_file;
4068 char *name, *fname, *group, *ref_group, *list;
4072 T=read_tree_list (S);
4073 if (!TS)TS=tree2seq(T[0], NULL);
4075 name=vcalloc (1000, sizeof (char));
4076 fname=vcalloc (1000, sizeof (char));
4077 group=vcalloc (TS->nseq*10, sizeof (char));
4078 ref_group=vcalloc (TS->nseq*10, sizeof (char));
4079 list=vcalloc (100*S->nseq, sizeof (char));
4080 split_file=vtmpnam (NULL);
4081 sorted_split_file =vtmpnam (NULL);
4084 used=vcalloc (n, sizeof (int));
4086 T=read_tree_list (S);
4087 if (!TS)TS=tree2seq(T[0], NULL);
4089 fp=vfopen (split_file, "w");
4091 for ( a=0; a< S->nseq; a++)
4094 T[a]=prune_tree (T[a], TS);
4095 T[a]=recode_tree (T[a], TS);
4096 display_leaf_nb (T[a], TS->nseq,fp, S->name[a]);
4101 for (s=0; s< TS->nseq; s++)
4106 if (taxon && !(strm (taxon, TS->name[s]) ))continue;
4108 printf_system ( "cat %s | grep %s| sort > %s::IGNORE_FAILURE::", split_file,TS->name[s], sorted_split_file);
4110 vfopen (sorted_split_file, "r");
4111 ref_group[0]=group[0]='\0';
4113 while ( (c=fgetc (fp))!=EOF)
4117 buf=vfgets (buf, fp);
4118 sscanf (buf, "%s %s %s\n", name, group, fname);
4120 if ( !ref_group[0]|| !strm (group, ref_group))
4124 {fprintf (stdout, "%s %6.2f %s",name, (((float)n*100)/(float)S->nseq), ref_group);
4125 for (i=0,a=0; a<nseq; a++)
4126 if (ref_group[a]=='1' && a!=s)fprintf (stdout, " %s ", TS->name[a]);
4127 fprintf ( stdout, "\n");
4128 fprintf (stdout, "\nLIST: %s\n", list);
4131 sprintf ( ref_group, "%s", group);
4132 list=strcatf (list, " %s", fname);
4138 list=strcatf (list, " %s", fname);
4143 fprintf (stdout, "%s %6.2f %s",name, (((float)n*100)/(float)S->nseq), group);
4144 for (i=0,a=0; a<nseq; a++)
4145 if (group[a]=='1' && a!=s)fprintf (stdout, " %s ", TS->name[a]);
4146 fprintf (stdout, "\nLIST %s\n", list);
4147 fprintf ( stdout, "\n");
4153 int count_tree_groups( Sequence *LIST, char *group_file)
4157 int a, b, c,n, w, wo, ng=0;
4158 int **list, ***rlist, **blist;
4164 T=read_tree_list (LIST);
4165 S=tree2seq(T[0], NULL);
4166 for ( a=0; a< LIST->nseq; a++)
4168 T[a]=prune_tree (T[a], S);
4169 T[a]=recode_tree (T[a], S);
4174 gs=vcalloc (2, sizeof (int));
4175 list=declare_int (LIST->nseq*S->nseq*2, S->nseq+1);
4177 blist=declare_int (2, S->nseq+1);
4178 for ( n=0, a=0; a< LIST->nseq; a++)
4181 tree2split_list (T[a], S->nseq, list+n, &n2);
4185 for (b=0; b<n2; b++)
4187 for ( c=0; c<S->nseq; c++)
4188 list[n+b][c]=1-list[n-n2+b][c];
4195 rlist=declare_arrayN(3, sizeof (int), 2,LIST->nseq*S->nseq, S->nseq+1);
4196 l=file2list (group_file, " ");
4201 if (!strstr (l[ng][1], "group")){ng++;continue;}
4202 g=(strm (l[ng][1], "group2"))?0:1;
4204 for (b=2; b<atoi (l[ng][0]); b++)
4206 if ((i=name_is_in_list(l[ng][b], S->name, S->nseq, 100))!=-1)rlist[g][gs[g]][i]=1;
4214 rlist=vcalloc ( 2, sizeof (int**));
4215 rlist[1]=count_int_strings (list, n, S->nseq);
4216 gs[1]=read_array_size_new (rlist[1]);
4218 rlist[0]=declare_int (S->nseq, S->nseq);
4220 for ( a=0; a<S->nseq; a++)rlist[0][a][a]=1;
4224 for (wo=w=0,a=0; a<gs[0]; a++)
4226 for (c=0; c< gs[1]; c++)
4229 for (b=0; b<S->nseq; b++)
4231 blist[0][b]=blist[1][b]=rlist[1][c][b];
4232 blist[0][b]=(rlist[0][a][b]==1)?1:blist[0][b]; //WITH GROUP 1
4233 blist[1][b]=(rlist[0][a][b]==1)?0:blist[1][b]; //wiTHOUT gROUP 1
4239 x1=(memcmp (blist[0], list[b], sizeof (int)*S->nseq)==0)?1:0;
4241 x2=(memcmp (blist[1], list[b], sizeof (int)*S->nseq)==0)?1:0;
4245 fprintf ( stdout, "\n%d ", MIN(wo, w));
4246 fprintf ( stdout, "(");
4247 for (b=0; b<S->nseq; b++)if (rlist[1][c][b])fprintf ( stdout, "%s ",S->name[b]);
4248 fprintf ( stdout, ") +/- (");
4249 for (b=0; b<S->nseq; b++)if (rlist[0][a][b])fprintf ( stdout, "%s ",S->name[b]);
4251 fprintf (stdout , ") + %d - %d Delta %d", w, wo, FABS((wo-w)));
4256 Split * print_split ( int n, int **list, Sequence *LIST, Sequence *S, char *buf, char *file);
4258 NT_node split2tree ( NT_node RT,Sequence *LIST, char *param)
4262 S=count_splits (RT, LIST, param);
4263 A=seq2aln ((S[0])->S,NULL, KEEP_GAP);
4265 return split2upgma_tree (S,A, A->nseq, "no");
4268 Split** count_splits( NT_node RT,Sequence *LIST, char *param)
4272 int a, b, c, d, n1, n2;
4273 int **list1, **list2;
4277 char *in=NULL,*in2=NULL, *out=NULL, order[100], filter[100];
4279 static char *def_param;
4281 //+count_splits _NB_x_FILTER_<file>
4282 //_<file is a fasta file containing the list of species to keep>
4283 if (!def_param)def_param=vcalloc ( 10, sizeof (char));
4287 if (!param)param=def_param;
4290 strget_param (param, "_NB_", "0", "%d", &nb);
4291 strget_param (param, "_TLIST_", "0", "%d", &tlist);
4292 strget_param (param, "_ORDER_", "NO", "%s", order);
4293 strget_param (param, "_FILTER_", "NO", "%s", filter);
4295 fprintf ( stderr, "\nREAD TREE LIST [%d Trees...", LIST->nseq);
4296 T=read_tree_list (LIST);
4297 fprintf ( stderr, "..]");
4299 if ( !(strm (order, "NO")))
4301 if (is_newick (order))
4303 OrderT=main_read_tree (order);
4307 S=main_read_seq (order);
4312 OrderT=(RT)?RT:T[0];
4314 fprintf ( stderr, "\nTrees Ordered according to: %s", (strm (order, "NO"))?"First Tree":order);
4317 if (!S)S=tree2seq(OrderT, NULL);
4319 for (a=0; a<S->nseq; a++)
4321 fprintf ( stdout, "\n#ORDER %15s : %3d", S->name[a], a+1);
4323 if ( !strm (filter, "NO"))
4328 F=main_read_seq (filter);
4329 cache=vcalloc (S->nseq, sizeof (int));
4330 for ( a=0; a<F->nseq; a++)
4332 if ( (i=name_is_in_list (F->name[a], S->name, S->nseq, 100))!=-1)
4335 free_sequence (F, -1);
4338 main_buf=vcalloc ( S->nseq*(STRING+1), sizeof(int));
4340 list1=declare_int (S->nseq*3, S->nseq+1);
4341 list2=declare_int (S->nseq*3, S->nseq+1);
4343 for ( a=0; a< LIST->nseq; a++)
4345 T[a]=prune_tree (T[a], S);
4346 T[a]=recode_tree (T[a], S);
4356 in=vtmpnam (NULL);in2=vtmpnam(NULL); out=vtmpnam (NULL);
4358 fp=vfopen (in, "w");
4359 fp2=vfopen (in2, "w");
4360 for ( a=0; a< LIST->nseq; a++)
4363 tree2split_list (T[a], S->nseq, list2, &n2);
4364 for ( b=0; b<n2; b++)
4366 for (c=0; c< S->nseq; c++)
4367 {fprintf (fp, "%d", list2[b][c]);}
4369 for (c=0; c< S->nseq; c++)
4370 {fprintf (fp, "%d", 1-list2[b][c]);}
4373 for (c=0; c< S->nseq; c++)
4374 {fprintf (fp2, "%d", list2[b][c]);}
4376 for (c=0; c< S->nseq; c++)
4377 {fprintf (fp2, "%d", 1-list2[b][c]);}
4378 fprintf (fp2, " %s\n",LIST->name[a]);
4384 count_strings_in_file (in, out);
4385 nl=count_n_line_in_file(out);
4386 list1=declare_int (nl+1, S->nseq+2);
4388 fp=vfopen (out, "r");
4390 buf=vcalloc (measure_longest_line_in_file (out)+1, sizeof (char));
4391 while ( fscanf (fp, "%s %d",buf, &i)==2)
4393 for (a=0; a<S->nseq; a++)list1[n1][a]=buf[a]-'0';
4394 list1[n1++][S->nseq+1]=i;
4402 RT=prune_tree (RT, S);
4403 RT=recode_tree (RT, S);
4405 tree2split_list (RT, S->nseq, list1,&n1);
4406 for ( a=0; a< LIST->nseq; a++)
4409 tree2split_list (T[a], S->nseq, list2, &n2);
4410 for (b=0; b<n1; b++)
4412 for ( c=0; c<n2; c++)
4415 for (d=0; d<S->nseq; d++)
4417 if (list1[b][d]!=list2[c][d])di++;
4419 list1[b][S->nseq+1]+=(di==0 || di== S->nseq)?1:0;
4425 SL=vcalloc ( n1+1, sizeof (Split*));
4427 for (a=0; a<n1; a++)
4431 if (nb)fprintf ( stdout, "\nSPLIT: %d and its Neighborhood +/^- %d\n", a+1, nb);
4434 for (b=0; b<S->nseq; b++)if (cache[b]!=list1[a][b])cont=0;
4435 if (!cont) continue;
4437 SL[a]=print_split (a, list1, LIST, S, main_buf, (tlist==1)?in2:NULL);
4438 for (b=0; b<n1; b++)
4444 for (d=s1=s2=0,c=0; c<S->nseq; c++)
4448 d+=(list1[a][c]!=list1[b][c])?1:0;
4452 if (d<=nb &&((s1==s2)|| ((S->nseq-s1)==s2)))print_split (b, list1, LIST, S, main_buf, (tlist==1)?in2:NULL);
4459 Split * declare_split (int nseq, int ntrees);
4460 Split* print_split ( int a, int **list1, Sequence *LIST, Sequence *S, char *buf, char *split_file)
4467 SP=declare_split (S->nseq, LIST->nseq);
4469 fprintf ( stdout, "\n>");
4470 for (t=0,b=0; b<S->nseq; b++){fprintf ( stdout, "%d", list1[a][b]);t+=list1[a][b];SP->split[b]='0'+list1[a][b];}
4471 fprintf ( stdout, " NumberSplit %5d SplitSize %5d Score %5.2f %s ", list1[a][S->nseq+1],t, (float)(list1[a][S->nseq+1]*100)/LIST->nseq, (buf)?buf:"");
4472 SP->n= list1[a][S->nseq+1];
4473 SP->score=(float)(list1[a][S->nseq+1]*100)/LIST->nseq;
4476 for (f1=1,b=0; b< S->nseq; b++)
4481 if (f1==1)fprintf ( stdout, "(");
4482 else fprintf (stdout, ",");
4484 fprintf ( stdout, "%s", S->name [b]);
4487 fprintf ( stdout, ")");
4494 fp=vfopen (split_file, "r");
4495 while ( (c=fgetc(fp))!=EOF)
4499 buf=vfgets (buf, fp);
4500 if ( strstr (buf, SP->split))
4503 list=string2list (buf);
4504 fprintf ( stdout, "\n\t%s %s", SP->split, list[3]);
4505 free_char (list, -1);
4513 Split * declare_split (int nseq, int ntrees)
4516 S=vcalloc (1, sizeof (Split));
4517 S->split=vcalloc ( nseq+1, sizeof (char));
4520 int treelist2splits( Sequence *S, Sequence *TS)
4527 char *split_file, *sorted_split_file;
4528 char *buf=NULL, *ref_buf=NULL;
4531 split_file=vtmpnam (NULL);
4532 sorted_split_file =vtmpnam (NULL);
4535 used=vcalloc (n, sizeof (int));
4537 T=read_tree_list (S);
4538 if (!TS)TS=tree2seq(T[0], NULL);
4540 fp=vfopen (split_file, "w");
4543 for ( a=0; a< S->nseq; a++)
4546 T[a]=prune_tree (T[a], TS);
4547 T[a]=recode_tree (T[a], TS);
4548 display_splits (T[a], TS,fp);
4552 printf_system ("cp %s split_file::IGNORE_FAILURE::", split_file);
4553 printf_system ( "cat %s | grep 1| sort > %s::IGNORE_FAILURE::", split_file, sorted_split_file);
4555 fp=vfopen (sorted_split_file, "r");
4556 fprintf (stdout, "LEGEND: <#occurences> <coded split> <min group size> <(group1,)> <(group2,>\n");
4558 for ( a=0; a<TS->nseq; a++)fprintf ( stdout, "SEQ_INDEX %d %s\n", a+1, TS->name[a]);
4559 while ( (c=fgetc (fp))!=EOF)
4563 buf=vfgets (buf, fp);
4564 buf [strlen(buf)-1]='\0';
4568 ref_buf=vcalloc (strlen (buf)+1, sizeof (char));
4569 sprintf ( ref_buf, "%s", buf);
4572 else if ( !strm (buf, ref_buf))
4575 fprintf ( stdout, "SPLIT_COUNT %3d %s (", n, ref_buf);
4576 for (i=0,a=0; a<nseq; a++)
4577 if (ref_buf[a]=='1')
4579 if (i==1)fprintf(stdout, ",");
4580 fprintf (stdout, "%s", TS->name[a]);
4583 fprintf ( stdout, "),(");
4584 for (i=0,a=0; a<nseq; a++)
4585 if (ref_buf[a]=='0')
4587 if (i==1) fprintf ( stdout, ",");
4588 fprintf (stdout, "%s", TS->name[a]);
4592 fprintf (stdout, ")\n");
4593 sprintf ( ref_buf, "%s", buf);
4607 int treelist2splits_old ( Sequence *S, Sequence *TS)
4614 char *split_file, *sorted_split_file;
4615 char *buf=NULL, *ref_buf=NULL;
4618 split_file=vtmpnam (NULL);
4619 sorted_split_file =vtmpnam (NULL);
4622 used=vcalloc (n, sizeof (int));
4624 T=read_tree_list (S);
4625 if (!TS)TS=tree2seq(T[0], NULL);
4627 fp=vfopen (split_file, "w");
4629 for ( a=0; a< S->nseq; a++)
4632 T[a]=prune_tree (T[a], TS);
4633 T[a]=recode_tree (T[a], TS);
4634 display_leaf_nb (T[a], TS->nseq,fp, S->name[a]);
4637 printf_system ("cp %s split_file::IGNORE_FAILURE::", split_file);myexit (0);
4639 printf_system ( "cat %s | grep 1| sort > %s::IGNORE_FAILURE::", split_file, sorted_split_file);
4641 vfopen (sorted_split_file, "r");
4643 while ( (c=fgetc (fp))!=EOF)
4647 buf=vfgets (buf, fp);
4648 buf [strlen(buf)-1]='\0';
4652 ref_buf=vcalloc (strlen (buf)+1, sizeof (char));
4653 sprintf ( ref_buf, "%s", buf);
4656 else if ( !strm (buf, ref_buf))
4659 fprintf ( stdout, "%3d %s(", n, ref_buf);
4660 for (i=0,a=0; a<nseq; a++)
4661 if (ref_buf[a]=='1')
4663 if (i==1)fprintf(stdout, ",");
4664 fprintf (stdout, "%s", TS->name[a]);
4667 fprintf ( stdout, "),(");
4668 for (i=0,a=0; a<nseq; a++)
4669 if (ref_buf[a]=='0')
4671 if (i==1) fprintf ( stdout, ",");
4672 fprintf (stdout, "%s", TS->name[a]);
4676 fprintf (stdout, ")\n");
4677 sprintf ( ref_buf, "%s", buf);
4691 NT_node *treelist2prune_treelist (Sequence *S, Sequence *TS, FILE *out)
4696 T=read_tree_list (S);
4697 T=vrealloc (T, (S->nseq+1)*sizeof (NT_node));
4698 for (b=0,a=0; a<S->nseq; a++)
4700 T[a]=prune_tree (T[a], TS);
4701 if (tree2nleaf(T[a])<TS->nseq)
4709 T[b]=recode_tree (T[b], TS);
4710 sprintf ( S->name[b], "%s", S->name[a]);
4711 s=tree2string (T[a]);
4712 S->seq[b]=vrealloc (S->seq[b], (strlen (s)+1)*sizeof (char));
4713 sprintf (S->seq[b], "%s",s);
4714 sprintf (S->seq_comment[b], " NSPECIES: %d", TS->nseq);
4727 for (a=0; a<S->nseq; a++)
4729 print_tree (T[a], "newick", out);
4734 int** treelist2lti2 ( Sequence *S, Sequence *TS, int ngb, FILE *out);
4735 int treelist2frame (Sequence *S, Sequence *TS)
4737 int n, a, b, c,d, **r, **order;
4740 temp=duplicate_sequence (S);
4741 order= treelist2lti (temp, TS,0,stdout);
4743 TS=reorder_seq_2 (TS, order, 0, TS->nseq);
4751 temp=duplicate_sequence (S);
4752 r=treelist2groups (temp,TS, NULL, NULL);
4753 fprintf ( stdout, "\n>Tree_%d [%d %%]\n ", a+1,r[0][1]);
4754 tree=main_read_tree (temp->name[r[0][0]]);
4755 tree=prune_tree (tree, TS);
4756 print_tree (tree, "newick",stdout);
4759 free_sequence (temp,-1);
4761 myexit (EXIT_SUCCESS);
4763 int** treelist2lti2 ( Sequence *S, Sequence *TS, int ngb, FILE *out)
4766 int a,b, c, d, ****dist, i;
4767 int **score, **order;
4769 score=declare_int (TS->nseq, 3);
4770 order=declare_int (TS->nseq, 2);
4773 for (a=0; a<50; a++)
4775 Sequence *seq, *trees;
4777 trees=duplicate_sequence (S);
4778 seq=duplicate_sequence (TS);
4779 for (b=0; b<TS->nseq; b++){order[b][0]=b;order[b][1]=rand()%10000;}
4780 sort_int (order, 2, 1, 0, TS->nseq-1);
4781 seq=reorder_seq_2(seq, order, 0,5);
4782 r=treelist2groups (trees,seq, NULL, NULL);
4786 score[order[b][0]][1]+=r[0][1];
4787 score[order[b][0]][2]++;
4789 HERE ("Score=%d", r[0][1]);
4791 free_sequence (seq, -1);
4792 free_sequence (trees, -1);
4796 for ( a=0; a< TS->nseq; a++)
4799 HERE ("%s => %d [%d]",TS->name[a], score[a][1]/score[a][2], score[a][2]);
4800 score[a][1]/=(score[a][2])?score[a][2]:1;
4802 sort_int_inv (score, 3, 1, 0, TS->nseq-1);
4808 int** treelist2lti ( Sequence *S, Sequence *TS, int ngb, FILE *out)
4811 int a,b, c, d, ****dist, i;
4812 float score0=0, score1=0;
4817 T=treelist2prune_treelist (S, TS,NULL);
4819 if (!ngb)ngb=TS->nseq*2;
4820 dist=vcalloc ( S->nseq, sizeof (int****));
4821 result=declare_int (TS->nseq, 2);
4822 for (a=0; a<TS->nseq; a++)
4826 for (b=0; b<TS->nseq;b++)
4830 for (c=0; c<S->nseq; c++)
4832 if (!dist[c])dist[c]=tree2dist(T[c], TS, NULL);
4833 for (d=0; d<S->nseq; d++)
4835 float score, d1, d2;
4837 if (!dist[d])dist[d]=tree2dist(T[d], TS, NULL);
4838 d1=dist[c][0][a][b];
4839 d2=dist[d][0][a][b];
4840 score=FABS((d1-d2));
4841 if (d1>ngb || d2>ngb);
4849 // if (d1 && d2) HERE ("%d %d", (int)d1, (int)d2);
4852 score_pair=(score_pair*100)/(float)n_pair;
4853 if (out)fprintf ( stdout, "\n>%-20s %-20s LTI: %7.3f [Kept %d Trees Out of %d] ", TS->name[a],TS->name[b], score_pair, S->nseq,i);
4856 score_seq=(score_seq*100)/n_seq;
4858 result[a][1]=(int)(100*score_seq);
4859 if (out)fprintf ( stdout, "\n>%-20s %-20s LTI: %7.3f [Kept %d Trees Out of %d] ", TS->name[a],"*", score_seq, S->nseq, i);
4861 sort_int (result,2,1,0, TS->nseq-1);
4866 int ***tree2dist (NT_node T, Sequence *S, int ***d)
4868 int *l0, *r0,*l1, *r1, a, b;
4872 if (!S)S=tree2seq(T, NULL);
4875 d=declare_arrayN (3, sizeof (float),2, S->nseq, S->nseq);
4877 T=recode_tree (T, S);
4880 if (!T->left)return d;
4881 if (!T->right) return d;
4883 l0=(T->left)->idist;
4884 r0=(T->right)->idist;
4886 l1=(T->left)->ldist;
4887 r1=(T->right)->ldist;
4891 for (a=0; a< S->nseq; a++)
4892 for (b=0; b<S->nseq; b++)
4894 if (l0[a]>0 && r0[b]>0)d[0][a][b]=d[0][b][a]=l0[a]+r0[b];
4895 if (l0[a]>0 && r0[b]>0)d[1][a][b]=d[1][b][a]=l1[a]+r1[b];
4898 d=tree2dist (T->left, S, d);
4899 d=tree2dist (T->right, S, d);
4907 int **tree2dist_split ( NT_node T, Sequence *S, int **dist)
4912 char *buf=NULL, **list=NULL, *split_file;
4915 if (!S)S=tree2seq(T, NULL);
4917 T=prune_tree (T, S);
4918 T=recode_tree (T, S);
4920 split_file=vtmpnam (NULL);
4921 fp=vfopen (split_file, "w");
4922 display_code (T, S->nseq,fp);
4925 list=declare_char (2*S->nseq, S->nseq+1);
4926 fp=vfopen (split_file, "r");
4928 while ((buf=vfgets (buf,fp))!=NULL)
4930 if (buf[0]=='1' || buf[0]=='0')sprintf (list[n++], "%s", buf);
4933 dist=declare_int ( S->nseq, S->nseq);
4934 for (a=0; a< S->nseq; a++)
4935 for ( b=0; b<S->nseq; b++)
4937 if (list[c][a]!=list[c][b])dist[a][b]++;
4943 int** treelist2groups (Sequence *S, Sequence *TS, char *star_node, FILE *out)
4956 T=treelist2prune_treelist (S, TS,NULL);
4957 nsn=(star_node)?atoi(star_node):0;
4959 results=declare_int (S->nseq+1, 2);
4963 for (a=0; a< S->nseq; a++)tree2star_nodes(T[a],nsn);
4966 used=vcalloc (S->nseq, sizeof (int));
4967 for (ntop=0,a=0; a<S->nseq; a++)
4973 if (out)fprintf ( out, "\nTree %s:",S->name[a]);
4978 for ( b=0; b<S->nseq; b++)
4982 v=(int)simple_tree_cmp (T[a], T[b], TS, 1);
4987 if (out)fprintf (stdout," %s ", S->name[b]);
4992 if (out)fprintf ( stdout, "__ N=%d\n", tot-1);
4996 for (n=0,a=0; a<S->nseq; a++)
5000 if (out)fprintf ( out, "\n>%-15s %4d %6.2f TOPOLOGY_LIST\n", S->name[a], used[a]-1, (float)(((float)used[a]-1)*100/(float)S->nseq));
5001 if (out)print_tree (T[a], "newick_tree", out);
5003 results[n][1]=((used[a]-1)*100)/i;
5008 for (a=0; a<S->nseq; a++) free_tree(T[a]);
5011 if (out)fprintf ( stdout, "\nTotal Number of different topologies: %d\n", ntop);
5013 sort_int_inv (results,2,1,0, n-1);
5014 for (a=0; a<S->nseq; a++) free_tree(T[a]);
5018 float simple_tree_cmp (NT_node T1, NT_node T2,Sequence *S, int mode)
5020 Tree_sim *TS1, *TS2;
5023 TS1=vcalloc (1, sizeof (Tree_sim));
5024 TS2=vcalloc (1, sizeof (Tree_sim));
5027 T1=recode_tree(T1, S);
5028 T2=recode_tree(T2, S);
5030 n=new_compare_trees ( T1, T2, S->nseq, TS1);
5031 new_compare_trees ( T2, T1, S->nseq, TS2);
5035 t=(TS1->uw+TS2->uw)*100/(TS1->max_uw+TS2->max_uw);
5036 w=(TS1->w+TS2->w)*100/(TS1->max_w+TS2->max_w);
5037 l=(TS1->d+TS2->d)*100/(TS1->max_d+TS2->max_d);
5039 vfree (TS1); vfree (TS2);
5040 if ( mode ==1)return t;
5041 else if (mode ==2) return w;
5044 int treelist2n (NT_node *L)
5050 int **treelist2avg_treecmp (NT_node *L, char *file)
5055 if (file) L=read_tree_list (main_read_seq(file));
5058 score=declare_int (n, 2);
5059 for (a=0; a<n; a++)score[a][0]=a;
5061 for (a=0; a<n-1; a++)
5063 output_completion (stderr,a,n,1, "Tree Cmp");
5064 for (b=a+1; b<n; b++)
5067 ts=tree_cmp (L[a],L[b]);
5068 score[a][1]+=ts->uw;
5069 score[b][1]+=ts->uw;
5073 sort_int_inv (score, 2, 1, 0, n-1);
5074 if (file)free_treelist(L);
5078 int treelist_file2consense (char *tree_file, char *outtree, char *outfile)
5080 static char *command;
5081 static char *tmp_outtree;
5082 static char *tmp_outfile;
5089 command=vtmpnam (NULL);
5090 tmp_outtree=vtmpnam (NULL);
5091 tmp_outfile=vtmpnam (NULL);
5093 if (!check_program_is_installed ("consense",NULL,NULL,"www.phylip.com",NO_REPORT))return 0;
5095 fp=vfopen (command, "w");fprintf ( fp, "%s\nY\n", tree_file);fclose (fp);
5096 if ( check_file_exists ("outtree")){flag1=1;printf_system ("mv outtree %s::IGNORE_FAILURE::", tmp_outtree);}
5097 if ( check_file_exists ("outfile")){flag2=1;printf_system ("mv outfile %s::IGNORE_FAILURE::", tmp_outfile);}
5098 printf_system ("consense <%s > /dev/null 2>/dev/null::IGNORE_FAILURE::", command);
5100 if ( outtree)printf_system ("mv outtree %s::IGNORE_FAILURE::", outtree);
5102 if ( outfile)printf_system ("mv outfile %s::IGNORE_FAILURE::", outfile);
5104 if (flag1)printf_system ("mv %s outtree::IGNORE_FAILURE::", tmp_outtree);
5105 if (flag2)printf_system ("mv %s outfile::IGNORE_FAILURE::", tmp_outfile);
5110 NT_node treelist2filtered_bootstrap ( NT_node *L,char *file, int **score, float t)
5115 if (t==1 || t==0 || !score)return treelist2bootstrap (L, file);
5117 if (file)L=read_tree_list (main_read_seq(file));
5121 if (n==0) return NULL;
5123 L2=vcalloc ( n+1, sizeof (NT_node));
5125 L2[a]=L[score[a][0]];
5127 BT=treelist2bootstrap (L2, NULL);
5130 if (file)free_treelist(L);
5134 NT_node treelist2bootstrap ( NT_node *L, char *file)
5142 file=vtmpnam (NULL);
5143 vfclose (print_tree_list (L,"newick", vfopen (file, "w")));
5146 outfile=vtmpnam (NULL);
5148 printf_system( "msa2bootstrap.pl -i %s -o %s -input tree >/dev/null 2>/dev/null", file, outfile);
5150 T=main_read_tree (outfile);
5151 T=tree_dist2normalized_tree_dist (T,treelist2n(L));
5159 Sequence * treelist2seq (Sequence *S)
5168 name=vcalloc (1, sizeof (char*));
5169 fp=vfopen ((fname=vtmpnam (NULL)), "w");
5171 T=read_tree_list (S);
5172 for (n=0,a=0; a< S->nseq; a++)
5174 TS=tree2seq(T[a], NULL);
5175 for (b=0; b<TS->nseq; b++)
5177 if ( (i=name_is_in_list (TS->name[b], name, n, 100))==-1)
5179 name[n]=vcalloc (100, sizeof (int));
5180 sprintf ( name[n], "%s", TS->name[b]);
5182 name=vrealloc (name, (n+1)*sizeof (char*));
5183 fprintf ( fp, ">%s\n", TS->name[b]);
5186 free_sequence(TS, TS->nseq);
5192 return get_fasta_sequence (fname, NULL);
5196 Sequence * treelist2sub_seq ( Sequence *S, int f)
5199 int a,b,c, s, i, n, maxnseq, tot;
5200 int **count, **grid;
5204 if (!f)return treelist2seq(S);
5207 //keep as many taxons as possible so that f% of the trees are kept
5208 //1: count the frequency of each taxon
5210 FS=treelist2seq (S);
5213 count=declare_int (maxnseq, 3);
5214 grid=declare_int (S->nseq,maxnseq+1);
5215 T=read_tree_list (S);
5219 for (a=0; a<FS->nseq; a++){count[a][0]=a;count[a][2]=1;}
5220 for (n=0,a=0; a< S->nseq; a++)
5222 TS=tree2seq(T[a], NULL);
5223 for (b=0; b<TS->nseq; b++)
5225 i=name_is_in_list (TS->name[b], FS->name, FS->nseq, 100);
5226 if ( i==-1){myexit (EXIT_FAILURE);}
5230 free_sequence(TS, TS->nseq);
5234 sort_int ( count,3,1, 0, maxnseq-1);
5236 for (a=0; a<maxnseq; a++)
5239 for ( b=0; b< S->nseq; b++)grid[b][maxnseq]=1;//prepare to keep everything
5240 for ( tot=S->nseq, b=0; b< S->nseq; b++)
5242 for (c=0; c<maxnseq; c++)
5245 if (count[c][2] && !grid[b][s])
5253 tot=(tot*100)/S->nseq;
5256 if (tot<f)return NULL;
5258 fname=vtmpnam (NULL);
5259 fp=vfopen (fname, "w");
5260 for (a=0; a<maxnseq; a++)
5264 fprintf ( fp, ">%s LIMIT: %d %%\n", FS->name[count[a][0]], f);
5269 free_int (grid, -1); free_int (count, -1);
5270 free_sequence (FS, FS->nseq);
5272 return get_fasta_sequence (fname, NULL);
5274 /******************************COPYRIGHT NOTICE*******************************/
5275 /*© Centro de Regulacio Genomica */
5277 /*Cedric Notredame */
5278 /*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
5279 /*All rights reserved.*/
5280 /*This file is part of T-COFFEE.*/
5282 /* T-COFFEE is free software; you can redistribute it and/or modify*/
5283 /* it under the terms of the GNU General Public License as published by*/
5284 /* the Free Software Foundation; either version 2 of the License, or*/
5285 /* (at your option) any later version.*/
5287 /* T-COFFEE is distributed in the hope that it will be useful,*/
5288 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
5289 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
5290 /* GNU General Public License for more details.*/
5292 /* You should have received a copy of the GNU General Public License*/
5293 /* along with Foobar; if not, write to the Free Software*/
5294 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
5295 /*............................................... |*/
5296 /* If you need some more information*/
5297 /* cedric.notredame@europe.com*/
5298 /*............................................... |*/
5302 /******************************COPYRIGHT NOTICE*******************************/