7 #include "io_lib_header.h"
8 #include "util_lib_header.h"
9 #include "define_header.h"
10 #include "dp_lib_header.h"
12 int count_threshold_nodes (Alignment *A, NT_node P, int t);
13 int set_node_score (Alignment *A, NT_node P, char *mode);
14 char *split_nodes_nseq (Alignment *A, NT_node P, int max_nseq, char *list);
15 char *split_nodes_idmax (Alignment *A, NT_node P, int max_id, char *list);
16 /******************************************************************/
20 /******************************************************************/
23 Constraint_list *profile2list (Job_TC *job, int nprf)
25 int *seqlist, *cache1, *cache2;
27 static CLIST_TYPE *entry;
28 Alignment *A1, *A2, *A;
30 Constraint_list *SCL, *SCL2;
34 int **score, n, s1, s2, si, r1, r2;
37 int cons, cons_thres, max_n_pairs, tot_n_pairs;
47 if ( !buf1) buf1=vcalloc (1000, sizeof (char));
49 /*initialize the structure*/
52 command=M->executable;
54 seq=(job->param)->seq_c;
56 debug=(getenv("DEBUG_TCOFFEE_profile2list")!=NULL)?1:0;
59 if ( debug)print_mem_usage (stderr, "IN");
60 seqlistb=vcalloc (10, sizeof (char));
62 seqlist=string2num_list (seq)+1;
66 if (!entry)entry=vcalloc (CL->entry_len, sizeof (int));
67 entry[SEQ1]=seqlist[1];
68 entry[SEQ2]=seqlist[2];
71 A1=seq2profile(CL->S, seqlist[1]);
72 A2=seq2profile(CL->S, seqlist[2]);
74 SCL=copy_constraint_list ( CL, SOFT_COPY);
76 SCL->L=L;SCL->max_L_len=max_L_len;
81 SCL2=copy_constraint_list ( CL, SOFT_COPY);
82 SCL2->L=L;SCL2->max_L_len=max_L_len;
88 SCL->S=merge_seq (A1->S, NULL );
89 SCL->S=merge_seq (A2->S, SCL->S);
91 /*1: Compare the two profiles and identify the N master pairs*/
92 n=(A1->nseq*A2->nseq);
93 max=(nprf==0 ||method_uses_structure (M))?n:MIN(nprf,n);
98 A=align_two_aln (A1, A2, "blosum62mt",-4,-1, "myers_miller_pair_wise");
99 score=declare_int ( A1->nseq*A2->nseq,3);
101 for (n=0,a=0; a<A1->nseq; a++)
102 for ( b=0; b<A2->nseq; b++,n++)
106 score[n][2]=(int)get_seq_fsim ( A->seq_al[a], A->seq_al[b+A1->nseq], "-", NOGROUP, NOMATRIX, AVERAGE_POSITIONS);
109 sort_int_inv (score,3,2,0, n-1);
113 score=declare_int (n,3);
114 for (n=0,a=0; a<A1->nseq; a++)
115 for ( b=0; b<A2->nseq; b++,n++)
122 iA1=get_name_index (A1->name,A1->nseq, (SCL->S)->name, (SCL->S)->nseq);
123 iA2=get_name_index (A2->name,A2->nseq, (SCL->S)->name, (SCL->S)->nseq);
125 /*submit the N pairs*/
126 for ( n_pairs=1,a=0; a<max; a++)
128 s1=score[a][0];s2=score[a][1];
129 sprintf ( buf1, "2 %d %d", iA1[s1], iA2[s2]);
131 job=print_lib_job (job, "param->seq_c=%s",buf1);
133 /*2: Compute the pairewise library*/
137 /*Unwind the pointer counter:*/job->np--;
139 if (debug)fprintf ( stderr, "\n\tProfile aln %s %s %s [(%d,%d):%d %%id]", (SCL->S)->name[iA1[s1]],(SCL->S)->name[iA2[s2]], command, score[a][0], score[a][1],score[a][2]);
142 /*3: Update the main library with the pairwise library*/
143 cache1=seq2inv_pos (A1->seq_al[s1]);
144 cache2=seq2inv_pos (A2->seq_al[s2]);
147 if (debug)fprintf ( stderr, " =>%d pairs", SCL->ne);
148 n_pairs+=(SCL->ne>0)?1:0;
149 for (c=0; c< SCL->ne; c++)
151 si=vread_clist (SCL, c, SEQ1);
152 r1=vread_clist(SCL, c, (si==iA1[s1])?R1:R2);
153 r2=vread_clist(SCL, c, (si==iA1[s1])?R2:R1);
155 entry[R1]=cache1[r1];
156 entry[R2]=cache2[r2];
158 entry[WE]=vread_clist(SCL,c,WE);
160 add_entry2list(entry, SCL2);
164 vfree ( cache1);vfree ( cache2);
165 compact_list (SCL2, 0, SCL2->ne, "default");
166 if (debug)fprintf ( stderr, " =>%d pairs", SCL2->ne);
170 free_sequence (SCL->S,-1);
171 vfree (iA1); vfree (iA2);
173 if (debug)fprintf ( stderr, "\nNPairs=%d", n_pairs);
176 compact_list (SCL2, 0, SCL2->ne, "default");
177 /*get the concistency distribution*/
178 cons_table=vcalloc ( 101, sizeof (int));
179 for (c=0; c< SCL2->ne; c++)
181 entry[R1]=vread_clist(SCL2,c,R1);
182 entry[R2]=vread_clist(SCL2,c,R2);
183 entry[WE]=vread_clist(SCL2,c,WE);
184 entry[CONS]=vread_clist(SCL2,c,CONS);
185 cons=(entry[CONS]*100)/n_pairs;
189 /*Identify the threshold*/
190 max_n_pairs=(int)((float)(MIN(A1->len_aln, A2->len_aln)*2));
192 for (cons_thres=0,tot_n_pairs=0,c=100; c>=0; c--)
195 tot_n_pairs+=cons_table[c];
196 if ( tot_n_pairs>=max_n_pairs){cons_thres=c;c=-1;}
200 /*Produce the library*/
201 for (c=0; c< SCL2->ne; c++)
203 entry[R1]=vread_clist(SCL2,c,R1);
204 entry[R2]=vread_clist(SCL2,c,R2);
205 entry[WE]=vread_clist(SCL2,c,WE);
206 entry[CONS]=vread_clist(SCL2,c,CONS);
207 entry[WE]/=entry[CONS];
208 cons=(entry[CONS]*100)/n_pairs;
211 if (cons>=cons_thres)add_entry2list(entry, CL);
214 if ( !seq2R_template_profile (CL->S,seqlist[1]))free_aln (A1);
215 if ( !seq2R_template_profile (CL->S,seqlist[2]))free_aln (A2);
219 vfree (SCL->L);free_constraint_list ( SCL);
222 if (debug)print_mem_usage (stderr, "BEF FREE SCL2");
223 vfree(SCL2->L);free_constraint_list ( SCL2);
228 fprintf ( stderr, "\nCL->ne=%d", CL->ne);
229 print_mem_usage (stderr, "OUT");
236 int method_uses_structure(TC_method *M)
238 if ( strchr (M->seq_type, 'P'))return 1;
241 int method_uses_profile(TC_method *M)
243 if ( strchr (M->seq_type, 'R'))return 1;
248 Constraint_list *seq2list ( Job_TC *job)
252 Constraint_list *PW_CL;
253 Constraint_list *RCL=NULL;
261 static char *s1, *s2;
278 seq=(job->param)->seq_c;
281 STL=(CL)?CL->STRUC_LIST:NULL;
286 seqlist=string2num_list (seq)+1;
288 if (!s1)s1=vcalloc ( MAXNAMES+1, sizeof (char));
289 if (!s2)s2=vcalloc ( MAXNAMES+1, sizeof (char));
292 sprintf (s1, "%s", (CL->S)->name[seqlist[1]]);
293 sprintf (s2, "%s", (CL->S)->name[seqlist[2]]);
298 if ( strncmp (CL->profile_comparison, "full", 4)==0)
301 if ( CL->profile_comparison[4])nprf=atoi ( CL->profile_comparison+4);
310 if ((method_uses_structure (M)) && profile2P_template_file (CL->S, seqlist[1]) && profile2P_template_file (CL->S, seqlist[2]))
312 RCL=profile2list (job, nprf);
314 else if ( strm (mode, "ktup_msa"))
316 RCL=hasch2constraint_list (CL->S, CL);
318 else if ( strm (mode, "test_pair") || strm ( mode,"fast_pair") || strm (mode, "ifast_pair") \
319 || strm ( mode, "diag_fast_pair")|| strm (mode, "idiag_fast_pair")\
320 || strm ( mode, "blast_pair") || strm (mode, "lalign_blast_pair") \
321 || strm ( mode, "viterbi_pair") || strm (mode, "slow_pair") || strm(mode, "glocal_pair") || strm (mode, "biphasic_pair") \
322 || strm ( mode, "islow_pair") || strm (mode, "tm_slow_pair") || strm (mode, "r_slow_pair") \
323 || strm ( mode, "lalign_id_pair")|| strm (mode, "tm_lalign_id_pair") || strm (mode , "lalign_len_pair") \
324 || strm (mode, "prrp_aln") || strm ( mode, "test_pair") \
325 || strm (mode, "cdna_fast_pair") || strm (mode, "diaa_slow_pair") || strm (mode, "monoaa_slow_pair")\
326 || strncmp (mode,"cdna_fast_pair",14)==0 \
331 RCL=aln2constraint_list ((A->A)?A->A:A, CL,weight);
334 else if ( strm ( mode, "subop1_pair") || strm ( mode, "subop2_pair") )
340 else if ( strm ( mode, "proba_pair") )
345 else if ( strm ( mode, "best_pair4prot"))
347 RCL=best_pair4prot (job);
349 else if ( strm ( mode, "best_pair4rna"))
351 RCL=best_pair4rna (job);
353 else if ( strm ( mode, "exon2_pair"))
359 sprintf ( weight2, "%s_subset_objOBJ-",weight);
360 RCL=aln2constraint_list (A, CL,weight2);
362 else if ( strm ( mode, "exon_pair"))
366 RCL=aln2constraint_list (A, CL,weight);
369 else if ( strm ( mode, "exon3_pair"))
375 sprintf ( weight2, "%s_subset_objOBJ-",weight);
376 RCL=aln2constraint_list (A, CL,weight2);
379 /*STRUCTURAL METHODS*/
381 else if ( strm (mode, "seq_msa"))
383 RCL=seq_msa(M, seq, CL);
385 /*STRUCTURAL METHODS*/
386 else if (strm (mode, "profile_pair") || strm (mode, "hh_pair"))
388 RCL=profile_pair (M, seq, CL);
391 else if ( strm (mode, "sap_pair"))
393 RCL=sap_pair (seq, weight, CL);
395 else if ( strm (mode, "thread_pair"))
397 RCL=thread_pair (M,seq, CL);
399 else if ( strm (mode, "pdb_pair"))
401 RCL=pdb_pair (M,seq, CL);
403 else if (strm (mode, "rna_pair"))
405 RCL=rna_pair(M, seq, CL);
407 else if ( strm (mode, "pdbid_pair"))
409 RCL=pdbid_pair (M,seq, CL);
411 else if ( strm (mode, "fugue_pair"))
413 RCL=thread_pair (M,seq, CL);
415 else if ( strm (mode, "lsqman_pair"))
417 RCL=lsqman_pair(seq, CL);
419 else if ( strm ( mode, "align_pdb_pair"))
421 RCL=align_pdb_pair ( seq,"gotoh_pair_wise", CL->align_pdb_hasch_mode,CL->align_pdb_param_file,CL, job);
423 else if ( strm ( mode, "lalign_pdb_pair"))
425 RCL=align_pdb_pair ( seq,"sim_pair_wise_lalign", CL->align_pdb_hasch_mode,CL->align_pdb_param_file,CL, job);
427 else if ( strm ( mode, "align_pdb_pair_2"))
429 RCL=align_pdb_pair_2 ( seq, CL);
433 fprintf ( CL->local_stderr, "\nERROR: THE FUNCTION %s DOES NOT EXIST [FATAL:%s]\n", mode, PROGRAM);crash("");
435 add_method_output2method_log (NULL,NULL, (A&&A->len_aln)?A:NULL,RCL, NULL);
436 RCL=(RCL==NULL)?CL:RCL;
444 Constraint_list *method2pw_cl (TC_method *M, Constraint_list *CL)
447 Constraint_list *PW_CL=NULL;
455 PW_CL=copy_constraint_list ( CL, SOFT_COPY);
456 PW_CL->pw_parameters_set=1;
460 S=(PW_CL)?PW_CL->S:NULL;
463 m=PW_CL->method_matrix;
464 if ( strm ((PW_CL->S)->type, "PROTEIN"))
467 sprintf ( mat, "%s", (strm(m, "default"))?"blosum62mt":m);
468 sprintf (group_mat, "vasiliky");
472 else if ( strm ((PW_CL->S)->type, "DNA") || strm ((PW_CL->S)->type, "RNA") )
475 sprintf(group_mat, "idmat");
476 sprintf ( mat, "%s", (strm(m, "default"))?"dna_idmat":m);
480 if ( M->matrix[0])sprintf ( mat, "%s", M->matrix);
482 PW_CL->M=read_matrice (mat);
485 if ( M->gop!=UNDEFINED) {PW_CL->gop=M->gop;}
488 PW_CL->gop= get_avg_matrix_mm (PW_CL->M, AA_ALPHABET)*10;
491 if ( M->gep!=UNDEFINED)PW_CL->gep=M->gep;
496 if ( strm2 ( mode,"fast_pair", "ifast_pair"))
500 PW_CL->use_fragments=0;
501 if ( !PW_CL->use_fragments)PW_CL->diagonal_threshold=0;
502 else PW_CL->diagonal_threshold=6;
504 sprintf (PW_CL->dp_mode, "fasta_pair_wise");
505 sprintf (PW_CL->matrix_for_aa_group, group_mat);
507 if ( strm ( mode, "fast_pair"))
511 PW_CL->get_dp_cost=slow_get_dp_cost;
512 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
516 else if ( strm2 ( mode,"diag_fast_pair","idiag_fast_pair"))
523 PW_CL->use_fragments=1;
524 PW_CL->diagonal_threshold=3;
526 sprintf (PW_CL->dp_mode, "fasta_pair_wise");
528 sprintf (PW_CL->matrix_for_aa_group, group_mat);
530 PW_CL->get_dp_cost=slow_get_dp_cost;
531 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
535 else if ( strm ( mode,"blast_pair"))
541 PW_CL->use_fragments=0;
543 PW_CL->pair_wise=gotoh_pair_wise;
544 PW_CL->evaluate_residue_pair=evaluate_blast_profile_score;
545 sprintf (PW_CL->matrix_for_aa_group, group_mat);
548 else if ( strm ( mode,"lalign_blast_pair"))
554 PW_CL->use_fragments=0;
555 PW_CL->pair_wise=sim_pair_wise_lalign;
556 PW_CL->evaluate_residue_pair=evaluate_blast_profile_score;
557 PW_CL->lalign_n_top=10;
559 sprintf (PW_CL->matrix_for_aa_group, group_mat);
562 else if ( strm ( mode,"viterbi_pair"))
566 PW_CL->use_fragments=0;
567 sprintf (PW_CL->dp_mode, "viterbi_pair_wise");
569 PW_CL->get_dp_cost=slow_get_dp_cost;
570 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
573 else if ( strm ( mode,"glocal_pair"))
577 PW_CL->use_fragments=0;
578 sprintf (PW_CL->dp_mode, "glocal_pair_wise");
579 sprintf (PW_CL->matrix_for_aa_group, group_mat);
582 PW_CL->get_dp_cost=slow_get_dp_cost;
583 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
586 else if ( strm ( mode,"test_pair"))
590 PW_CL->use_fragments=0;
591 sprintf (PW_CL->dp_mode, "test_pair_wise");
593 else if ( strm ( mode,"sticky_pair"))
597 PW_CL->use_fragments=0;
599 PW_CL->get_dp_cost=cw_profile_get_dp_cost;
600 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
602 sprintf (PW_CL->dp_mode, "gotoh_pair_wise_lgp_sticky");
603 sprintf (PW_CL->matrix_for_aa_group, group_mat);
606 else if ( strm ( mode,"slow_pair")|| strm (mode, "islow_pair" ) )
611 PW_CL->use_fragments=0;
612 sprintf (PW_CL->dp_mode, "myers_miller_pair_wise");
613 sprintf (PW_CL->matrix_for_aa_group, group_mat);
615 if ( strm ( "islow_pair", mode))
617 PW_CL->get_dp_cost=slow_get_dp_cost;
618 PW_CL->evaluate_residue_pair=residue_pair_extended_list;
621 else if ( strm ("slow_pair", mode) )
624 PW_CL->get_dp_cost=cw_profile_get_dp_cost;
625 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
629 else if ( strm (mode, "subop1_pair"))
634 PW_CL->use_fragments=0;
635 sprintf (PW_CL->dp_mode, "subop1_pair_wise");
636 sprintf (PW_CL->matrix_for_aa_group, group_mat);
638 PW_CL->get_dp_cost=slow_get_dp_cost;
639 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
642 else if ( strm (mode, "biphasic_pair"))
646 PW_CL->use_fragments=0;
647 sprintf (PW_CL->dp_mode, "biphasic_pair_wise");
648 sprintf (PW_CL->matrix_for_aa_group, group_mat);
650 PW_CL->get_dp_cost=cw_profile_get_dp_cost;
651 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
654 else if ( strm (mode, "proba_pair"))
659 PW_CL->use_fragments=0;
660 sprintf (PW_CL->dp_mode, "proba_pair_wise");
661 sprintf (PW_CL->matrix_for_aa_group, group_mat);
663 PW_CL->get_dp_cost=slow_get_dp_cost;
664 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
668 else if ( strm (mode, "diaa_slow_pair"))
673 PW_CL->use_fragments=0;
674 sprintf (PW_CL->dp_mode, "gotoh_pair_wise_lgp");
675 sprintf (PW_CL->matrix_for_aa_group, group_mat);
677 PW_CL->get_dp_cost=slow_get_dp_cost;
678 PW_CL->evaluate_residue_pair=evaluate_diaa_matrix_score;
681 else if ( strm (mode, "r_slow_pair"))
685 PW_CL->use_fragments=0;
686 sprintf (PW_CL->dp_mode, "gotoh_pair_wise_lgp");
687 sprintf (PW_CL->matrix_for_aa_group, group_mat);
689 PW_CL->get_dp_cost=slow_get_dp_cost;
690 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
692 PW_CL->reverse_seq=1;
694 else if ( strm (mode, "tm_slow_pair"))
698 PW_CL->use_fragments=0;
699 sprintf (PW_CL->dp_mode, "myers_miller_pair_wise");
700 sprintf (PW_CL->matrix_for_aa_group, group_mat);
702 PW_CL->get_dp_cost=slow_get_dp_cost;
703 PW_CL->evaluate_residue_pair=evaluate_tm_matrix_score;
706 else if ( strm (mode, "monoaa_slow_pair"))
711 PW_CL->use_fragments=0;
712 sprintf (PW_CL->dp_mode, "gotoh_pair_wise_lgp");
713 sprintf (PW_CL->matrix_for_aa_group, group_mat);
715 PW_CL->get_dp_cost=slow_get_dp_cost;
716 PW_CL->evaluate_residue_pair=evaluate_monoaa_matrix_score;
719 else if ( strm (mode, "subop2_pair"))
724 PW_CL->use_fragments=0;
725 sprintf (PW_CL->dp_mode, "subop2_pair_wise");
726 sprintf (PW_CL->matrix_for_aa_group, group_mat);
728 PW_CL->get_dp_cost=slow_get_dp_cost;
729 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
733 else if (strm ( mode, "exon2_pair"))
739 PW_CL->use_fragments=0;
740 sprintf (PW_CL->dp_mode, "myers_miller_pair_wise");
741 sprintf (PW_CL->matrix_for_aa_group, group_mat);
744 for ( a=0; a<60; a++)
746 PW_CL->M['x'-'A'][a]=0;
747 PW_CL->M[a]['x'-'A']=0;
748 PW_CL->M['X'-'A'][a]=0;
749 PW_CL->M[a]['X'-'A']=0;
751 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
754 else if (strm ( mode, "exon3_pair"))
760 PW_CL->use_fragments=0;
761 sprintf (PW_CL->dp_mode, "myers_miller_pair_wise");
762 sprintf (PW_CL->matrix_for_aa_group, group_mat);
765 for ( a=0; a<60; a++)
767 PW_CL->M['x'-'A'][a]=0;
768 PW_CL->M[a]['x'-'A']=0;
769 PW_CL->M['X'-'A'][a]=0;
770 PW_CL->M[a]['X'-'A']=0;
772 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
775 else if (strm ( mode, "exon_pair"))
781 PW_CL->use_fragments=0;
782 sprintf (PW_CL->dp_mode, "myers_miller_pair_wise");
783 sprintf (PW_CL->matrix_for_aa_group, group_mat);
786 for ( a=0; a<60; a++)
788 PW_CL->M['x'-'A'][a]=0;
789 PW_CL->M[a]['x'-'A']=0;
790 PW_CL->M['X'-'A'][a]=0;
791 PW_CL->M[a]['X'-'A']=0;
793 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
796 else if ( strm ( mode , "lalign_len_pair"))
801 PW_CL->use_fragments=0;
802 PW_CL->pair_wise=sim_pair_wise_lalign;
803 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
804 PW_CL->get_dp_cost=slow_get_dp_cost;
805 PW_CL->lalign_n_top=CL->lalign_n_top;
806 sprintf (PW_CL->matrix_for_aa_group, group_mat);
809 else if ( strm ( mode , "lalign_id_pair"))
814 PW_CL->use_fragments=0;
815 PW_CL->pair_wise=sim_pair_wise_lalign;
816 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
817 PW_CL->get_dp_cost=slow_get_dp_cost;
818 PW_CL->lalign_n_top=CL->lalign_n_top;
819 sprintf (PW_CL->matrix_for_aa_group, group_mat);
822 else if ( strm ( mode , "tm_lalign_id_pair"))
827 PW_CL->use_fragments=0;
828 PW_CL->pair_wise=sim_pair_wise_lalign;
829 PW_CL->evaluate_residue_pair=evaluate_tm_matrix_score;
830 PW_CL->get_dp_cost=slow_get_dp_cost;
831 PW_CL->lalign_n_top=CL->lalign_n_top;
832 sprintf (PW_CL->matrix_for_aa_group, group_mat);
836 else if ( strm ( mode, "cdna_cfast_pair"))
842 PW_CL->use_fragments=0;
843 sprintf (PW_CL->dp_mode, "cfasta_cdna_pair_wise");
845 PW_CL->M=read_matrice (strcpy ( mat, "blosum62mt"));
848 PW_CL->f_gop=CL->f_gop;
849 PW_CL->f_gep=CL->f_gep;
850 PW_CL->get_dp_cost=get_dp_cost;
851 PW_CL->evaluate_residue_pair=evaluate_cdna_matrix_score;
854 else if ( strm ( mode, "cdna_fast_pair") || strncmp (mode,"cdna_fast_pair",14)==0)
860 PW_CL->use_fragments=0;
861 sprintf (PW_CL->dp_mode, "fasta_cdna_pair_wise");
869 PW_CL->get_dp_cost=get_dp_cost;
870 PW_CL->evaluate_residue_pair=evaluate_cdna_matrix_score;
875 free_constraint_list (PW_CL);
880 if (!strm (CL->method_evaluate_mode, "default"))
882 choose_extension_mode (CL->method_evaluate_mode, PW_CL);
886 /******************************************************************/
887 /* MULTIPLE ALIGNMENTS */
890 /******************************************************************/
891 Alignment * compute_prrp_aln (Alignment *A, Constraint_list *CL)
898 tmpseq=vtmpnam(NULL);
899 tmpaln=vtmpnam(NULL);
902 A=seq2aln ( CL->S, A, 1);
903 output_gotoh_seq (tmpseq, A);
904 sprintf ( command, "prrp -E/dev/null -o%s -F9 %s >/dev/null", tmpaln, tmpseq);
906 if (!check_file_exists(tmpaln)){return NULL;}
907 S=get_fasta_sequence (tmpaln, NULL);
911 free_sequence (S, S->nseq);
916 Alignment *seq2clustalw_aln (Sequence *S)
918 return aln2clustalw_aln (seq2aln (S, NULL,RM_GAP), NULL);
921 Alignment * aln2clustalw_aln (Alignment *B, Constraint_list *CL)
923 char *seq=NULL,*aln=NULL, command[1000];
925 output_fasta_seq (seq=vtmpnam (NULL), B);
926 sprintf ( command, "clustalw -infile=%s -outorder=input -outfile=%s %s", seq, aln=vtmpnam (NULL), TO_NULL_DEVICE);
929 if (!check_file_exists(aln))
931 else{B->nseq=0;return main_read_aln(aln,B);}
933 Alignment * compute_tcoffee_aln_quick (Alignment *A, Constraint_list *CL)
940 tmpseq=vtmpnam(NULL);
941 tmpaln=vtmpnam(NULL);
943 if ( CL)A=seq2aln ( CL->S, A, 1);
944 output_fasta_seq (tmpseq, A);
946 sprintf ( command, "t_coffee -seq %s -very_fast -outfile %s -quiet ",tmpseq,tmpaln);
949 if (!check_file_exists(tmpaln))return NULL;
951 A=main_read_aln(tmpaln,A);
958 Alignment * compute_clustalw_aln (Alignment *A, Constraint_list *CL)
965 tmpseq=vtmpnam(NULL);
966 tmpaln=vtmpnam(NULL);
968 A=seq2aln ( CL->S, A, 1);
969 output_fasta_seq (tmpseq, A);
971 sprintf ( command, "clustalw %sinfile=%s %soutfile=%s %s",CWF, tmpseq,CWF, tmpaln,TO_NULL_DEVICE);
974 if (!check_file_exists(tmpaln))return NULL;
976 A=main_read_aln(tmpaln,A);
983 Alignment * realign_block ( Alignment *A, int col1, int col2, char *pg)
985 /*Uses pg: (pg -infile=<infile> -outfile=<outfile> to realign the block [col1 col2[
986 Only guaranteed if pg can handle empty sequences
987 set pg to NULL to use the default program
991 Alignment *L, *M, *R;
994 char command[1000], script[1000];
998 seq_name=vtmpnam(NULL);
999 aln_name=vtmpnam (NULL);
1001 L=copy_aln (A, NULL);
1002 M=copy_aln (A, NULL);
1003 R=copy_aln (A, NULL);
1005 L=extract_aln ( L, 0, col1);
1006 M=extract_aln ( M, col1, col2);
1007 R=extract_aln ( R, col2, A->len_aln);
1008 output_fasta_seq (seq_name, M);
1010 sprintf ( script, "%s", (pg==NULL)?"t_coffee":pg);
1012 sprintf ( command, "%s -infile=%s -outfile=%s %s", script,seq_name, aln_name, TO_NULL_DEVICE);
1013 my_system ( command);
1015 M=main_read_aln (aln_name, NULL);
1018 M=reorder_aln (M, L->name,L->nseq);
1022 free_aln (L);free_aln (M); free_aln (R);
1031 /******************************************************************/
1035 /******************************************************************/
1038 /******************************************************************/
1042 /******************************************************************/
1048 Constraint_list * align_pdb_pair_2 (char *seq, Constraint_list *CL)
1050 char *tmp_name=NULL;
1054 static char *command;
1055 static char *program;
1057 tmp_name=vtmpnam ( NULL);
1059 if ( !program)program=vcalloc ( LONG_STRING, sizeof (char));
1060 if ( !command)command=vcalloc ( LONG_STRING, sizeof (char));
1062 #ifndef ALIGN_PDB_4_TCOFFEE
1063 if ( getenv ( "ALIGN_PDB_4_TCOFFEE")==NULL)crash ("ALIGN_PDB_4_TCOFFEE IS NOT DEFINED");
1064 else sprintf ( program, "%s", (getenv ( "ALIGN_PDB_4_TCOFFEE")));
1066 if ( getenv ( "ALIGN_4_TCOFFEE")==NULL)sprintf (program, "%s", ALIGN_PDB_4_TCOFFEE);
1067 else sprintf ( program, "%s", (getenv ( "ALIGN_PDB_4_TCOFFEE")));
1070 atoi(strtok (seq,SEPARATORS));
1071 s1=atoi(strtok (NULL,SEPARATORS));
1072 s2=atoi(strtok (NULL,SEPARATORS));
1074 sprintf ( command , "%s -in P%s P%s -gapopen=-40 -max_delta=2.5 -gapext=0 -scale=0 -hasch_mode=hasch_ca_trace_bubble -maximum_distance=10 -output pdb_constraint_list -outfile stdout> %s%s",program, (CL->S)->file[s1], (CL->S)->file[s2], get_cache_dir(),tmp_name);
1076 my_system ( command);
1077 CL=read_constraint_list_file(CL, tmp_name);
1080 vremove ( tmp_name);
1086 Constraint_list *align_pdb_pair (char *seq_in, char *dp_mode,char *evaluate_mode, char *file, Constraint_list *CL, Job_TC *job)
1094 Constraint_list *PWCL;
1097 sprintf ( seq, "%s",seq_in);
1098 atoi(strtok (seq,SEPARATORS));
1099 s1=atoi(strtok (NULL,SEPARATORS));
1100 s2=atoi(strtok (NULL,SEPARATORS));
1104 sprintf (name1, "%s%s_%s.%s.align_pdb", get_cache_dir(),(CL->S)->name[s1], (CL->S)->name[s2], dp_mode);
1105 sprintf (name2, "%s%s_%s.%s.align_pdb", get_cache_dir(),(CL->S)->name[s2], (CL->S)->name[s1], dp_mode);
1108 if ( check_file_exists (name1) && is_lib(name1))CL=read_constraint_list_file(CL,name1);
1109 else if ( check_file_exists (name2) && is_lib(name2))CL=read_constraint_list_file(CL,name2);
1112 PWCL=set_constraint_list4align_pdb ( CL,s1,dp_mode, evaluate_mode, NULL);
1113 PWCL=set_constraint_list4align_pdb ( CL,s2,dp_mode, evaluate_mode, NULL);
1114 ((job->param)->TCM)->PW_CL=PWCL;
1116 output_constraints (name1, "100", F);
1117 CL=aln2constraint_list (F, CL, "100");
1123 Constraint_list * profile_pair (TC_method *M , char *in_seq, Constraint_list *CL)
1128 char *result,*prf1_file, *prf2_file;
1129 Alignment *F=NULL, *A1, *A2;
1131 char command[10000];
1134 if ( M->executable2[0]=='\0')
1135 fprintf ( stderr, "\nERROR: profile_pair requires a method: thread_pair@EP@executable2@<method> [FATAL:%s]\n", PROGRAM);
1138 sprintf ( seq, "%s", in_seq);
1139 atoi(strtok (seq,SEPARATORS));
1140 s1=atoi(strtok (NULL,SEPARATORS));
1141 s2=atoi(strtok (NULL,SEPARATORS));
1143 A1=seq2R_template_profile(CL->S,s1);
1144 A2=seq2R_template_profile(CL->S,s2);
1147 prf1_file=vtmpnam (NULL);
1148 fp=vfopen (prf1_file, "w");
1151 fprintf (fp, ">%s\n%s\n",(CL->S)->name[s1], aln2cons_seq_mat(A1, "blosum62mt"));
1152 for ( a=0; a< A1->nseq; a++)fprintf (fp, ">prf_seq1_%d\n%s\n", a, A1->seq_al[a]);
1156 fprintf ( fp, ">%s\n%s\n", (CL->S)->name[s1], (CL->S)->seq[s1]);
1160 prf2_file=vtmpnam (NULL);
1161 fp=vfopen (prf2_file, "w");
1164 fprintf (fp, ">%s\n%s\n",(CL->S)->name[s2], aln2cons_seq_mat(A2, "blosum62mt"));
1165 for ( a=0; a< A2->nseq; a++)fprintf (fp, ">prf_seq2_%d\n%s\n", a, A2->seq_al[a]);
1169 fprintf ( fp, ">%s\n%s\n", (CL->S)->name[s2], (CL->S)->seq[s2]);
1173 result=vtmpnam (NULL);
1176 param=vcalloc(strlen (M->param)+1, sizeof (char));
1177 sprintf ( param, "%s", M->param);
1178 param=substitute ( param, " ", "");
1179 param=substitute ( param, "\n", "");
1182 sprintf ( command, "tc_generic_method.pl -mode=profile_pair -method=%s %s%s %s%s %s%s -param=%s -tmpdir=%s", M->executable2,M->in_flag,prf1_file, M->in_flag2,prf2_file,M->out_flag, result, param, get_tmp_4_tcoffee());
1184 my_system ( command);
1188 if ( !check_file_exists (result))
1190 fprintf ( stderr, "\n\tprofile_pair/%s failed:\n\t%s\n",M->executable2, command);
1191 myexit (EXIT_FAILURE);
1193 else if ( is_lib (result))
1195 CL=read_constraint_list_file(CL,result);
1197 else if ( is_aln (result))
1199 F=main_read_aln (result, NULL);
1200 char *name1, *name2;
1201 name1=(CL->S)->name[s1];
1202 name2=(CL->S)->name[s2];
1204 fp=vfopen (result, "w");
1205 for ( a=0; a< F->nseq; a++)
1206 if (strm ( F->name[a], name1) || strm (F->name[a], name2))
1207 fprintf ( fp, ">%s\n%s\n", F->name[a], F->seq_al[a]);
1210 F=main_read_aln (result, NULL);
1211 CL=aln2constraint_list (F, CL, "sim");
1216 Constraint_list * pdbid_pair (TC_method *M , char *in_seq, Constraint_list *CL)
1220 char *alternative_method;
1221 char *current_,method;
1224 char *result, *pdb1,*pdb1_file, *pdb2, *pdb2_file;
1229 if ( M->executable2[0]=='\0')
1231 fprintf ( stderr, "\nERROR: pdbid_pair requires a structural alignment method: pdb_pair@EP@EXECUTABLE2@<method> [FATAL:%s]\n", PROGRAM);
1232 myexit (EXIT_FAILURE);
1234 sprintf ( seq, "%s", in_seq);
1237 atoi(strtok (seq,SEPARATORS));
1238 s1=atoi(strtok (NULL,SEPARATORS));
1239 s2=atoi(strtok (NULL,SEPARATORS));
1241 pdb1=seq2P_pdb_id(CL->S,s1);
1242 pdb2=seq2P_pdb_id(CL->S,s2);
1244 if (!is_pdb_name (pdb1) || !is_pdb_name(pdb2))
1250 result=vtmpnam (NULL);
1251 command = vcalloc ( 1000, sizeof (char));
1253 sprintf ( command, "tc_generic_method.pl -mode=pdbid_pair -method=%s %s%s %s%s %s%s -email=%s -cache=%s -tmpdir=%s", M->executable2,M->in_flag,pdb1, M->in_flag2,pdb2,M->out_flag, result, Email(ENV,SET),get_cache_dir(), get_tmp_4_tcoffee());
1254 my_system ( command);
1256 if (file_is_empty (result))return CL;
1259 F=main_read_aln (result, NULL);
1263 fprintf ( stderr, "\n\tpdb_pair/%s failed:\n\t%s\n",M->executable2, command);
1268 sprintf ( F->name[0],"%s", (CL->S)->name[s1]);
1269 sprintf ( F->name[1],"%s", (CL->S)->name[s2]);
1270 F=fix_aln_seq (F, CL->S);
1271 CL=aln2constraint_list (F, CL, "sim");
1278 Constraint_list * pdb_pair (TC_method *M , char *in_seq, Constraint_list *CL)
1283 char *result, *pdb1,*pdb1_file, *pdb2, *pdb2_file;
1287 char command[10000];
1289 if ( M->executable2[0]=='\0')
1291 fprintf ( stderr, "\nERROR: pdb_pair requires a structural alignment method: pdb_pair@EP@EXECUTABLE2@<method> [FATAL:%s]\n", PROGRAM);
1292 myexit (EXIT_FAILURE);
1295 sprintf ( seq, "%s", in_seq);
1298 atoi(strtok (seq,SEPARATORS));
1299 s1=atoi(strtok (NULL,SEPARATORS));
1300 s2=atoi(strtok (NULL,SEPARATORS));
1302 pdb1=seq2P_template_file(CL->S,s1);
1303 pdb2=seq2P_template_file(CL->S,s2);
1304 if ( !pdb1 || !pdb2) return CL;
1307 pdb1_file=vtmpnam (NULL);
1308 pdb2_file=vtmpnam (NULL);
1312 sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb1, pdb1_file);
1313 my_system (command);
1317 sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb2, pdb2_file);
1318 my_system (command);
1321 result=vtmpnam (NULL);
1323 sprintf ( command, "tc_generic_method.pl -mode=pdb_pair -method=%s %s%s %s%s %s%s -tmpdir=%s", M->executable2,M->in_flag,pdb1_file, M->in_flag2,pdb2_file,M->out_flag, result, get_tmp_4_tcoffee());
1324 my_system ( command);
1326 F=main_read_aln (result, NULL);
1330 fprintf ( stderr, "\n\tpdb_pair/%s failed:\n\t%s\n",M->executable2, command);
1335 sprintf ( F->name[0],"%s", (CL->S)->name[s1]);
1336 sprintf ( F->name[1],"%s", (CL->S)->name[s2]);
1337 F=fix_aln_seq (F, CL->S);
1338 CL=aln2constraint_list (F, CL, "sim");
1346 Constraint_list * seq_msa (TC_method *M , char *in_seq, Constraint_list *CL)
1349 char *infile, *outfile;
1356 infile=vtmpnam (NULL);
1357 outfile=vtmpnam (NULL);
1359 sprintf ( seq, "%s", in_seq);
1361 n=atoi(strtok (seq,SEPARATORS));
1363 fp=vfopen (infile, "w");
1364 for ( a=0; a<n; a++)
1366 s=atoi(strtok (NULL,SEPARATORS));
1368 fprintf (fp, ">%s\n%s\n", (CL->S)->name[s], (CL->S)->seq[s]);
1372 sprintf ( command, "t_coffee -other_pg tc_generic_method.pl -mode=seq_msa -method=%s %s%s %s%s -tmpdir=%s", M->executable2, M->in_flag, infile, M->out_flag, outfile, get_tmp_4_tcoffee());
1373 my_system (command);
1376 if ( strm (M->out_mode, "aln") || strm (M->out_mode, "A"))
1378 F=main_read_aln (outfile, NULL);
1381 fprintf ( stderr, "\n\tseq_msa/%s failed:\n\t%s\n", M->executable2,command);
1385 CL=aln2constraint_list (F, CL, "sim");
1389 else if ( strm (M->out_mode, "fL")|| strm (M->out_mode, "lib"))
1391 Constraint_list *NCL;
1392 NCL=read_constraint_list_file(CL,outfile);
1395 fprintf ( stderr, "\n\tseq_msa/%s failed:\n\t%s\n", M->executable2,command);
1404 Constraint_list * thread_pair (TC_method *M , char *in_seq, Constraint_list *CL)
1410 if ( M->executable2[0]=='\0')
1412 fprintf ( stderr, "\nERROR: thread_pair requires a threading method: pdb_pair@EP@EXECUTABLE2@<method> [FATAL:%s]\n", PROGRAM);
1413 myexit (EXIT_FAILURE);
1416 sprintf ( seq, "%s", in_seq);
1417 atoi(strtok (seq,SEPARATORS));
1418 s1=atoi(strtok (NULL,SEPARATORS));
1419 s2=atoi(strtok (NULL,SEPARATORS));
1421 CL=thread_pair2(M,s1, s2, CL);
1422 CL=thread_pair2(M,s2, s1, CL);
1429 Constraint_list* thread_pair2 ( TC_method *M, int s1, int s2, Constraint_list *CL)
1431 char *result, *pep, *pdb, *pdb1;
1435 char command[10000];
1437 STL=(CL)?CL->STRUC_LIST:NULL;
1440 if ( !(CL->S) || !((CL->S)->T[s1]) || !((CL->S)->T[s1])->P || !seq2P_template_file(CL->S,s1))return CL;
1441 else pdb1=seq2P_template_file(CL->S,s1);
1445 result=vtmpnam (NULL);
1448 sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb1, pdb);
1449 my_system (command);
1453 fp=vfopen (pep, "w");
1454 fprintf ( fp, ">%s\n%s\n",(CL->S)->name[s2],(CL->S)->seq[s2] );
1456 sprintf ( command, "tc_generic_method.pl -mode=thread_pair -method=%s %s%s %s%s %s%s -tmpdir=%s", M->executable2,M->in_flag,pep, M->in_flag2,pdb,M->out_flag, result, get_tmp_4_tcoffee());
1457 my_system ( command);
1458 F=main_read_aln (result, NULL);
1462 fprintf ( stderr, "\n\tthread_pair/%s failed:\n\t%s\n", M->executable2,command);
1466 sprintf ( F->name[0],"%s", (CL->S)->name[s1]);
1467 sprintf ( F->name[1],"%s", (CL->S)->name[s2]);
1468 F=fix_aln_seq (F, CL->S);
1469 CL=aln2constraint_list (F, CL, "sim");
1477 Constraint_list * lsqman_pair ( char *in_seq, Constraint_list *CL)
1480 static CLIST_TYPE *entry;
1481 char command[STRING];
1484 char *seq_file, *lsqman_result, *tmp_name1;
1488 sprintf ( seq, "%s", in_seq);
1491 if ( !entry)entry=vcalloc ( LIST_N_FIELDS, sizeof ( CLIST_TYPE ));
1493 atoi(strtok (seq,SEPARATORS));
1494 s1=atoi(strtok (NULL,SEPARATORS));
1495 s2=atoi(strtok (NULL,SEPARATORS));
1498 tmp_name1=vcalloc (100, sizeof (char));
1499 sprintf ( tmp_name1, "%s_%s.lsqman_aln", (CL->S)->name[s1], (CL->S)->name[s2]);
1500 if ( check_file_exists ( tmp_name1) && (F=main_read_aln(tmp_name1, NULL))!=NULL)
1503 lsqman_result=tmp_name1;
1508 seq_file=vtmpnam (NULL);
1509 lsqman_result=tmp_name1;
1510 fp=vfopen (seq_file, "w");
1511 fprintf ( fp, ">%s\n%s\n",(CL->S)->name[s1],(CL->S)->seq[s2] );
1513 sprintf ( command, "%s -pdb %s -pep %s > %s%s", LSQMAN_4_TCOFFEE, (CL->S)->name[s1], seq_file,get_cache_dir(), lsqman_result);
1517 my_system ( command);
1518 F=main_read_aln (lsqman_result, NULL);
1521 fprintf ( stderr, "\n\tlsqman failed: will be retried");
1522 if ( n_failure==0)fprintf ( stderr, "\n\t%s", command);
1526 fprintf ( stderr, "\nCould not run Fugue: will replace it with slow_pair\n");
1527 vremove (lsqman_result);
1538 F=main_read_aln(lsqman_result, NULL);
1539 /*sprintf ( F->name[0],"%s", (CL->S)->name[s1]);
1540 sprintf ( F->name[1],"%s", (CL->S)->name[s2]);
1542 F=fix_aln_seq (F, CL->S);
1548 CL=aln2constraint_list (F, CL, "100");
1553 Constraint_list * sap_pair (char *seq, char *weight, Constraint_list *CL)
1557 char full_name[FILENAMELEN];
1558 char *tmp_pdb1, *tmp_pdb2;
1559 char *sap_seq1, *sap_seq2;
1560 char *sap_lib, *tmp_name, *tmp_name1, *tmp_name2;
1562 int s1, s2, r1=0, r2=0;
1563 int sim=0, tot=0, score=0;
1565 char program[STRING];
1566 char *string1, *string2, *string3, *string4, *string5;
1567 int max_struc_len=10000;
1568 char *template1, *template2;
1570 /*check_program_is_installed ( "sap" ,SAP_4_TCOFFEE, "SAP_4_TCOFFEE",MAIL, IS_FATAL);*/
1575 atoi(strtok (seq,SEPARATORS));
1576 s1=atoi(strtok (NULL,SEPARATORS));
1577 s2=atoi(strtok (NULL,SEPARATORS));
1579 template1=seq2T_value(CL->S,s1, "template_name", "_P_");
1580 template2=seq2T_value(CL->S,s2, "template_name", "_P_");
1583 if (!template1 || !template2) return CL;
1586 declare_name (string1);
1587 declare_name (string2);
1588 declare_name (string3);
1589 declare_name (string4);
1590 declare_name (string5);
1592 #ifndef SAP_4_TCOFFEE
1593 if ( getenv ( "SAP_4_TCOFFEE")==NULL)crash ("SAP_4_TCOFFEE IS NOT DEFINED");
1594 else sprintf ( program, "%s", (getenv ( "SAP_4_TCOFFEE")));
1596 if ( getenv ( "SAP_4_TCOFFEE")==NULL)sprintf (program, "%s", SAP_4_TCOFFEE);
1597 else sprintf ( program, "%s", (getenv ( "SAP_4_TCOFFEE")));
1602 tmp_name1=vcalloc (100, sizeof (char));
1603 sprintf ( tmp_name1, "%s_%s.sap_results",template1,template2);
1604 tmp_name2=vcalloc (100, sizeof (char));
1605 sprintf ( tmp_name2, "%s_%s.sap_results",template2,template1);
1609 if (is_sap_file (tmp_name1))
1613 else if ( is_sap_file (tmp_name2))
1622 tmp_pdb1=normalize_pdb_file(seq2P_template_file(CL->S,s1),(CL->S)->seq[s1], vtmpnam (NULL));
1623 tmp_pdb2=normalize_pdb_file(seq2P_template_file(CL->S,s2),(CL->S)->seq[s2], vtmpnam (NULL));
1624 sprintf ( full_name, "%s%s", get_cache_dir (), tmp_name);
1625 printf_system ("%s %s %s >%s",program,tmp_pdb1,tmp_pdb2, full_name);
1627 if ( !check_file_exists (full_name) || !is_sap_file(full_name))
1629 add_warning ( stderr, "WARNING: SAP failed to align: %s against %s [%s:WARNING]\n", seq2P_template_file(CL->S,s1),seq2P_template_file(CL->S,s2), PROGRAM);
1630 if ( check_file_exists (full_name))add2file2remove_list (full_name);
1633 if ( flag_file2remove_is_on())add2file2remove_list (full_name);
1634 remove ("super.pdb");
1639 sap_seq1=vcalloc (max_struc_len, sizeof (char));
1640 sap_seq2=vcalloc (max_struc_len, sizeof (char));
1643 fp=find_token_in_file ( tmp_name, NULL, "Percent");
1644 fp=find_token_in_file ( tmp_name, fp , "Percent");
1645 while ( (fgetc (fp))!='\n');
1646 while ((buf=vfgets (buf, fp)))
1649 if ( !strstr (buf, "eighted"))
1651 remove_charset (buf, "!alnum");
1653 r2=buf[strlen(buf)-1];
1657 if ( tot>max_struc_len)
1658 {max_struc_len+=max_struc_len;
1659 sap_seq1=vrealloc ( sap_seq1, sizeof(char)*max_struc_len);
1660 sap_seq2=vrealloc ( sap_seq2, sizeof(char)*max_struc_len);
1669 if ( is_number (weight))score=atoi(weight);
1670 else if ( strstr ( weight, "OW"))
1673 sscanf ( weight, "OW%d", &ow);
1683 sap_seq1[tot]=sap_seq2[tot]='\0';
1686 fp=vfopen ( sap_lib=vtmpnam(NULL), "w");
1687 fprintf (fp, "! TC_LIB_FORMAT_01\n");
1688 fprintf (fp, "2\n");
1689 fprintf (fp, "%s %d %s\n", (CL->S)->name[s2],(int)strlen (sap_seq1), sap_seq1);
1690 fprintf (fp, "%s %d %s\n", (CL->S)->name[s1],(int)strlen (sap_seq2), sap_seq2);
1691 fprintf (fp, "#1 2\n");
1693 for ( a=0; a< tot; a++)
1696 fprintf (fp, "%d %d %d 1 0\n", a+1, a+1, score);
1699 fprintf (fp, "! CPU 0\n");
1700 fprintf (fp, "! SEQ_1_TO_N\n");
1702 CL=read_constraint_list_file(CL,sap_lib);
1705 vfree ( string1);vfree ( string2);vfree ( string3);vfree ( string4);vfree ( string5);
1706 vfree (sap_seq1); vfree(sap_seq2);vfree (tmp_name1); vfree(tmp_name2);
1713 Constraint_list *rna_pair (TC_method *M ,
1715 Constraint_list *CL)
1719 char *result, *pdb1, *pdb2, *pdb1_file, *pdb2_file;
1724 char command[10000];
1726 if ( M->executable2[0]=='\0')
1728 fprintf ( stderr, "\nERROR: rna_pair requires a structural alignment method: pdb_pair@EP@EXECUTABLE2@<method> [FATAL:%s]\n", PROGRAM);
1729 myexit (EXIT_FAILURE);
1732 sprintf ( seq, "%s", in_seq);
1735 atoi(strtok (seq,SEPARATORS));
1736 s1=atoi(strtok (NULL,SEPARATORS));
1737 s2=atoi(strtok (NULL,SEPARATORS));
1739 pdb1=seq2P_template_file(CL->S,s1);
1740 pdb1_file=vtmpnam (NULL);
1741 sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb1, pdb1_file);
1742 my_system (command);
1744 pdb2=seq2P_template_file(CL->S,s2);
1745 pdb2_file=vtmpnam (NULL);
1746 sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb2, pdb2_file);
1747 my_system (command);
1749 result=vtmpnam (NULL);
1751 sprintf ( command, "tc_generic_method.pl -mode=rna_pair -method=%s %s%s %s%s %s%s -tmpdir=%s", M->executable2,M->in_flag,pdb1_file, M->in_flag2,pdb2_file,M->out_flag, result, get_tmp_4_tcoffee());
1753 my_system ( command);
1755 F=main_read_aln (result, NULL);
1760 fprintf ( stderr, "\n\trna_pair/%s failed:\n\t%s\n",M->executable2, command);
1764 sprintf ( F->name[0],"%s", (CL->S)->name[s1]);
1765 sprintf ( F->name[1],"%s", (CL->S)->name[s2]);
1766 F=fix_aln_seq (F, CL->S);
1767 CL=aln2constraint_list (F, CL, "sim");
1776 /******************************************************************/
1777 /* GENERIC PAIRWISE METHODS */
1780 /******************************************************************/
1784 Constraint_list * best_pair4rna(Job_TC *job)
1791 Constraint_list *PW_CL;
1792 Constraint_list *CL, *RCL;
1795 TC_method *M, *sara_pairM, *proba_pairM;
1801 struct X_template *r1, *r2, *p1, *p2;
1802 static int **blosum;
1804 if (!seq)seq=vcalloc (100, sizeof (char));
1807 M=(job->param)->TCM;
1810 for (a=0; a<S->nseq; a++)ml=MAX(ml, strlen (S->name[a]));
1813 if ( !strm ( retrieve_seq_type(), "RNA") )
1814 printf_exit (EXIT_FAILURE, stderr, "ERROR: RNA Sequences Only with best4rna_pair [FATAL:%s]\n",PROGRAM);
1817 seq_in=(job->param)->seq_c;
1818 sprintf (seq, "%s", seq_in);
1819 seqlist=string2num_list (seq);
1821 if ( n!=2){fprintf ( stderr, "\nERROR: best_pair can only handle two seq at a time [FATAL]\n");myexit (EXIT_FAILURE);}
1832 PW_CL=((job->param)->TCM)->PW_CL;
1835 if (!blosum)blosum=read_matrice ("blosum62mt");
1837 // id=idscore_pairseq (S->seq[s1], S->seq[s2],-10,-1,blosum, "sim");
1840 proba_pairM=method_file2TC_method(method_name2method_file ("proba_pair"));
1841 proba_pairM->PW_CL=method2pw_cl(proba_pairM, CL);
1843 sara_pairM=method_file2TC_method(method_name2method_file ("sara_pair"));
1844 sara_pairM->PW_CL=method2pw_cl(sara_pairM, CL);
1848 //Avoid Structural Tem
1851 fprintf ( stderr, "\n\t%-*s %-*s: Structure Based Alignment\n", ml,S->name[s1], ml,S->name[s2]);
1852 (job->param)->TCM=sara_pairM;
1856 fprintf ( stderr, "\n\t%-*s %-*s: Direct Sequence Alignment\n", ml,S->name[s1], ml,S->name[s2]);
1857 (job->param)->TCM=proba_pairM;
1881 Constraint_list * best_pair4prot (Job_TC *job)
1888 Constraint_list *PW_CL;
1889 Constraint_list *CL, *RCL;
1892 TC_method *M, *sap_pairM, *proba_pairM;
1898 struct X_template *r1, *r2, *p1, *p2;
1899 static int **blosum;
1901 if (!seq)seq=vcalloc (100, sizeof (char));
1904 M=(job->param)->TCM;
1907 for (a=0; a<S->nseq; a++)ml=MAX(ml, strlen (S->name[a]));
1910 if ( strm ( retrieve_seq_type(), "DNA") ||strm ( retrieve_seq_type(), "RNA") )printf_exit (EXIT_FAILURE, stderr, "ERROR: Protein Sequences Only with bestprot_pair [FATAL:%s]\n",PROGRAM);
1913 seq_in=(job->param)->seq_c;
1914 sprintf (seq, "%s", seq_in);
1915 seqlist=string2num_list (seq);
1917 if ( n!=2){fprintf ( stderr, "\nERROR: best_pair can only handle two seq at a time [FATAL]\n");myexit (EXIT_FAILURE);}
1928 PW_CL=((job->param)->TCM)->PW_CL;
1931 if (!blosum)blosum=read_matrice ("blosum62mt");
1933 id=idscore_pairseq (S->seq[s1], S->seq[s2],-10,-1,blosum, "sim");
1936 proba_pairM=method_file2TC_method(method_name2method_file ("proba_pair"));
1937 proba_pairM->PW_CL=method2pw_cl(proba_pairM, CL);
1939 sap_pairM=method_file2TC_method(method_name2method_file ("sap_pair"));
1940 sap_pairM->PW_CL=method2pw_cl(sap_pairM, CL);
1947 fprintf ( stderr, "\n\t%-*s %-*s: Direct Sequence Alignment\n", ml,S->name[s1], ml,S->name[s2]);
1948 (job->param)->TCM=proba_pairM;
1952 //Avoid Structural Tem
1955 fprintf ( stderr, "\n\t%-*s %-*s: Structure Based Alignment\n", ml,S->name[s1], ml,S->name[s2]);
1956 (job->param)->TCM=sap_pairM;
1960 fprintf ( stderr, "\n\tt%-*s %-*s: PSIBLAST Profile Alignment\n", ml,S->name[s1], ml,S->name[s2]);
1961 (job->param)->TCM=proba_pairM;
1965 fprintf ( stderr, "\n\t%-*s %-*s: Direct Sequence Alignment (No Profile)\n", ml,S->name[s1], ml,S->name[s2]);
1966 (job->param)->TCM=proba_pairM;
1977 Alignment * fast_pair (Job_TC *job)
1984 Constraint_list *PW_CL;
1985 Constraint_list *CL;
1994 M=(job->param)->TCM;
1995 PW_CL=((job->param)->TCM)->PW_CL;
1997 seq_in=(job->param)->seq_c;
2000 sprintf (seq, "%s", seq_in);
2001 seqlist=string2num_list (seq);
2003 if ( n!=2){fprintf ( stderr, "\nERROR: fast_pw_aln can only handle two seq at a time [FATAL]\n");myexit (EXIT_FAILURE);}
2007 if (!A) {A=declare_aln (CL->S);}
2010 ns=vcalloc ( 2, sizeof (int));
2011 l_s=declare_int (2,(CL->S)->nseq);
2013 buf=vcalloc ( S->nseq, sizeof (char*));
2015 for ( a=0; a< n; a++)
2018 if ( strm (M->seq_type, "G"))
2021 S->seq[s]=((((S->T[s])->G)->VG)->S)->seq[0];
2027 sprintf ( A->seq_al[a], "%s",S->seq[s]);
2028 sprintf ( A->name[a], "%s", (CL->S)->name[s]);
2040 if (PW_CL->reverse_seq)
2042 invert_string2(A->seq_al[0]);
2043 invert_string2(A->seq_al[1]);
2044 invert_string2 ((CL->S)->seq[A->order[0][0]]);
2045 invert_string2 ((CL->S)->seq[A->order[1][0]]);
2049 pair_wise ( A, ns, l_s, PW_CL);
2051 if (PW_CL->reverse_seq)
2054 invert_string2(A->seq_al[0]);
2055 invert_string2(A->seq_al[1]);
2056 invert_string2 ((CL->S)->seq[A->order[0][0]]);
2057 invert_string2 ((CL->S)->seq[A->order[1][0]]);
2061 for ( a=0; a<S->nseq; a++)
2063 if ( !buf[a] || buf[a]==S->seq[a]);
2064 else S->seq[a]=buf[a];
2066 vfree (buf);vfree (seqlist);
2070 Alignment * align_two_aln ( Alignment *A1, Alignment *A2, char *in_matrix, int gop, int gep, char *in_align_mode)
2073 Constraint_list *CL;
2078 static char *matrix;
2079 static char *align_mode;
2081 if (!matrix)matrix=vcalloc ( 100, sizeof (char));
2082 if (!align_mode)align_mode=vcalloc ( 100, sizeof (char));
2083 sprintf ( matrix, "%s", in_matrix);
2084 sprintf ( align_mode, "%s", in_align_mode);
2086 CL=vcalloc ( 1, sizeof (Constraint_list));
2087 CL->pw_parameters_set=1;
2088 CL->M=read_matrice (matrix);
2089 CL->matrices_list=declare_char (10, 10);
2091 CL->evaluate_residue_pair=evaluate_matrix_score;
2092 CL->get_dp_cost=consensus_get_dp_cost;
2100 sprintf (CL->matrix_for_aa_group, "vasiliky");
2101 CL->use_fragments=0;
2103 if ( !CL->use_fragments)CL->diagonal_threshold=0;
2104 else CL->diagonal_threshold=6;
2106 sprintf (CL->dp_mode, "%s", align_mode);
2109 A=stack_aln (A, A2);
2110 CL->S=fill_sequence_struc(A->nseq, A->seq_al,A->name);
2112 ns=vcalloc ( 2, sizeof(int));
2113 ls=declare_int ( 2,A->nseq);
2116 for ( a=0; a<ns[0]; a++)
2118 for ( a=0; a< ns[1]; a++)
2119 ls[1][a]=a+A1->nseq;
2121 A->score_aln=pair_wise (A, ns, ls,CL);
2125 S=free_constraint_list (CL);
2126 free_sequence (S,-1);
2131 static int align_two_seq_keep_case;
2132 void toggle_case_in_align_two_sequences(int value)
2134 align_two_seq_keep_case=value;
2136 Alignment * align_two_sequences ( char *seq1, char *seq2, char *in_matrix, int gop, int gep, char *in_align_mode)
2138 static Alignment *A;
2139 Constraint_list *CL;
2147 static char *matrix;
2150 static char *align_mode;
2152 if (!matrix)matrix=vcalloc ( 100, sizeof (char));
2153 if (!align_mode)align_mode=vcalloc ( 100, sizeof (char));
2154 sprintf ( align_mode, "%s", in_align_mode);
2156 CL=vcalloc ( 1, sizeof (Constraint_list));
2158 CL->pw_parameters_set=1;
2160 CL->matrices_list=declare_char (10, 10);
2163 if ( !strm (matrix, in_matrix))
2165 sprintf ( matrix,"%s", in_matrix);
2166 M=CL->M=read_matrice (matrix);
2174 if (strstr (in_align_mode, "cdna"))
2175 CL->evaluate_residue_pair=evaluate_cdna_matrix_score;
2177 CL->evaluate_residue_pair=evaluate_matrix_score;
2179 CL->get_dp_cost=get_dp_cost;
2185 sprintf (CL->matrix_for_aa_group, "vasiliky");
2186 CL->use_fragments=0;
2188 if ( !CL->use_fragments)CL->diagonal_threshold=0;
2189 else CL->diagonal_threshold=6;
2191 sprintf (CL->dp_mode, "%s", align_mode);
2193 seq_array=declare_char ( 2, MAX(strlen(seq1), strlen (seq2))+1);
2194 sprintf (seq_array[0], "%s",seq1);
2195 sprintf (seq_array[1],"%s", seq2);
2196 ungap_array(seq_array,2);
2197 if (align_two_seq_keep_case !=KEEP_CASE)string_array_lower(seq_array,2);
2199 name_array=declare_char (2, STRING);
2200 sprintf ( name_array[0], "A");
2201 sprintf ( name_array[1], "B");
2204 ns=vcalloc ( 2, sizeof(int));
2205 l_s=declare_int ( 2, 1);
2212 CL->S=fill_sequence_struc(2, seq_array, name_array);
2214 A=seq2aln(CL->S, NULL, 1);
2216 ungap (A->seq_al[0]);
2217 ungap (A->seq_al[1]);
2221 A->score_aln=pair_wise (A, ns, l_s,CL);
2225 free_char (name_array, -1);free_char ( seq_array,-1);
2228 S=free_constraint_list (CL);
2229 free_sequence (S,-1);
2235 NT_node make_root_tree ( Alignment *A,Constraint_list *CL,int gop, int gep,Sequence *S, char *tree_file,int maximise)
2238 T=make_tree (A, CL, gop, gep,S,tree_file,maximise);
2239 (T[3][0])->nseq=S->nseq;
2242 NT_node ** make_tree ( Alignment *A,Constraint_list *CL,int gop, int gep,Sequence *S, char *tree_file,int maximise)
2248 char **out_seq_name;
2252 if ( !CL || !CL->tree_mode || !CL->tree_mode[0])
2254 fprintf ( stderr, "\nERROR: No CL->tree_mode specified (make_tree::util_dp_drivers.c [FATAL:%s]", PROGRAM);
2255 myexit (EXIT_FAILURE);
2258 fprintf (CL->local_stderr , "\nMAKE GUIDE TREE \n\t[MODE=%s][",CL->tree_mode);
2266 fprintf (CL->local_stderr, "---Two Sequences Only: Make Dummy Pair-Tree ---]");
2268 fp=vfopen (tmp,"w");
2269 fprintf ( fp, "(%s:0.1, %s:0.1):0.1;\n",S->name[0], S->name[1]);
2271 T=read_tree (tmp, &tot_node, (CL->S)->nseq,(CL->S)->name);
2275 else if ( strm (CL->tree_mode, "cwph"))
2277 return seq2cw_tree ( S, tree_file);
2279 else if (strm ( CL->tree_mode, "upgma") || strm ( CL->tree_mode, "nj"))
2282 out_seq_name=S->name;
2285 CL->DM=cl2distance_matrix (CL, NOALN,NULL,NULL,0);
2289 /*Shrink the distance matrix so that it only contains the required sequences*/
2290 distances=declare_int (S->nseq, S->nseq);
2291 for (a=0; a< S->nseq; a++)
2293 ra=name_is_in_list ((S)->name[a],(CL->S)->name, (CL->S)->nseq, 100);
2294 for ( b=0; b< S->nseq; b++)
2296 rb=name_is_in_list ((S)->name[b],(CL->S)->name, (CL->S)->nseq, 100);
2297 distances[a][b]=(CL->DM)->score_similarity_matrix[ra][rb];
2303 distances=duplicate_int ( (CL->DM)->score_similarity_matrix, -1, -1);
2308 distances=sim_array2dist_array (distances, MAXID*SCORE_K);
2309 distances=normalize_array (distances, MAXID*SCORE_K, 100);
2310 if ( strm (CL->tree_mode, "order"))
2312 for ( a=0; a< S->nseq; a++)
2313 for ( b=0; b< S->nseq; b++)
2314 distances[b][a]=100;
2315 T=make_nj_tree (A,distances,gop,gep,out_seq,out_seq_name,out_nseq, tree_file, CL->tree_mode);
2317 else if ( strm (CL->tree_mode, "nj"))
2319 T=make_nj_tree (A,distances,gop,gep,out_seq,out_seq_name,out_nseq, tree_file, CL->tree_mode);
2321 else if ( strm (CL->tree_mode, "upgma"))
2322 T=make_upgma_tree (A,distances,gop,gep,out_seq,out_seq_name,out_nseq, tree_file, CL->tree_mode);
2325 printf_exit (EXIT_FAILURE, stderr, "ERROR: %s is an unknown tree computation mode [FATAL:%s]", CL->tree_mode, PROGRAM);
2327 free_int (distances, out_nseq);
2331 fprintf (CL->local_stderr , "DONE]\n");
2336 Alignment *recompute_local_aln (Alignment *A, Sequence *S,Constraint_list *CL, int scale, int gep)
2342 sort_constraint_list (CL, 0, CL->ne);
2343 coor=declare_int (A->nseq, 3);
2344 for ( a=0; a< A->nseq; a++)
2346 coor[a][0]=A->order[a][0];
2347 coor[a][1]=A->order[a][1]+1;
2348 coor[a][2]=strlen(S->seq[A->order[a][0]])-coor[a][1];
2350 B=stack_progressive_nol_aln_with_seq_coor(CL,0,0,S,coor,A->nseq);
2353 CL=compact_list (CL, 0,CL->ne, "shrink");
2359 Alignment *stack_progressive_nol_aln_with_seq_coor(Constraint_list *CL,int gop, int gep,Sequence *S, int **seq_coor, int nseq)
2362 static int ** local_coor1;
2363 static int ** local_coor2;
2364 if ( local_coor1!=NULL)free_int (local_coor1, -1);
2365 if ( local_coor2!=NULL)free_int (local_coor2, -1);
2367 local_coor1=get_nol_seq ( CL,seq_coor, nseq, S);
2368 local_coor2=minimise_repeat_coor ( local_coor1, nseq, S);
2370 return stack_progressive_aln_with_seq_coor(CL,gop, gep,S, local_coor2,nseq);
2374 Alignment *stack_progressive_aln_with_seq_coor (Constraint_list*CL,int gop, int gep, Sequence *S, int **coor, int nseq)
2378 A=seq_coor2aln (S,NULL, coor, nseq);
2380 return stack_progressive_aln ( A,CL, gop, gep);
2383 Alignment *est_progressive_aln(Alignment *A, Constraint_list *CL, int gop, int gep)
2389 n_groups=vcalloc ( 2, sizeof (int));
2390 group_list=declare_int ( 2, A->nseq);
2400 fprintf ( stderr, "\n");
2401 for ( a=1; a<n; a++)
2403 sprintf ( A->seq_al[1], "%s", A->seq_al[a]);
2404 fprintf ( stderr, "\t[%30s]->[len=%5d]", A->name[a],(int)strlen ( A->seq_al[0]));
2405 pair_wise ( A,n_groups, group_list, CL);
2407 seq=dna_aln2cons_seq(A);
2409 sprintf ( A->seq_al[0], "%s", seq);
2411 fprintf ( stderr, "\n");
2418 void analyse_seq ( Alignment *A, int s)
2430 for ( a=0; a< A->len_aln; a++)
2432 for ( b=0, c=0; b< s; b++)
2433 if ( !is_gap(A->seq_al[b][a])){c=1; break;}
2435 r=!is_gap(A->seq_al[s][a]);
2437 if ( r && c) state=1;
2438 else if ( !r && !c) state=2;
2439 else if ( !r && c) state=3;
2440 else if ( r && !c) state=4;
2442 if ( state !=pstate)
2450 score=score/(float)(((A->S)->len[s]*(A->S)->len[s]));
2451 fprintf ( stderr, "[%.2f]", score);
2456 Alignment *realign_aln ( Alignment*A, Constraint_list *CL)
2460 A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq);
2462 ns=vcalloc (2, sizeof(int));
2463 ls=declare_int ( 2, A->nseq);
2465 for (a=0; a< A->nseq; a++)
2468 for (c=0,b=0; b<A->nseq; b++)if (b!=a)ls[0][c++]=b;
2469 ungap_sub_aln ( A, ns[0], ls[0]);
2473 ungap_sub_aln ( A, ns[1], ls[1]);
2474 A->score_aln=pair_wise (A, ns, ls,CL);
2477 vfree (ns); free_int (ls, -1);
2481 Alignment *realign_aln_random_bipart ( Alignment*A, Constraint_list *CL)
2488 ns=vcalloc (2, sizeof (int));
2489 l_s=declare_int (2,A->nseq);
2491 for ( a=0; a< A->nseq; a++)
2497 fprintf ( stderr, "\n");
2498 ungap_sub_aln ( A, ns[0], l_s[0]);
2499 ungap_sub_aln ( A, ns[1], l_s[1]);
2503 for (b=0; b<ns[a]; b++)
2504 fprintf ( stderr, "[%d-%d]", a, l_s[a][b]);
2506 A->score_aln=pair_wise (A, ns, l_s,CL);
2508 vfree(ns);free_int(l_s, -1);
2511 Alignment *realign_aln_random_bipart_n ( Alignment*A, Constraint_list *CL, int n)
2519 if (n>=A->nseq)n=A->nseq/2;
2520 used=vcalloc (A->nseq, sizeof (int));
2525 if (!used[p]){used[p]=1;c++;}
2527 ns=vcalloc (2, sizeof (int));
2528 ls=declare_int (2,A->nseq);
2531 for (b=0; b<A->nseq; b++)
2532 if (used[b]==a)ls[a][ns[a]++]=b;
2534 ungap_sub_aln ( A, ns[0], ls[0]);
2535 ungap_sub_aln ( A, ns[1], ls[1]);
2538 A->score_aln=pair_wise (A, ns, ls,CL);
2539 vfree(ns);free_int(ls, -1);vfree (used);
2542 int ** seq2ecl_mat (Constraint_list *CL);
2543 int ** seq2ecl_mat (Constraint_list *CL)
2552 ns=vcalloc (2, sizeof (int));
2553 ls=declare_int ((CL->S)->nseq, 2);
2555 A=seq2aln (CL->S,NULL, RM_GAP);
2557 dm=declare_int (n, n);
2558 for (a=0; a<(CL->S)->nseq-1; a++)
2559 for (b=a+1; b<(CL->S)->nseq; b++)
2564 ungap (A->seq_al[a]);
2565 ungap (A->seq_al[b]);
2566 dm[a][b]=dm[b][a]=linked_pair_wise (A, ns, ls, CL);
2571 Alignment *realign_aln_clust ( Alignment*A, Constraint_list *CL)
2577 static int **rm, **dm, **target;
2584 free_int (dm, -1); free_int (rm, -1);free_int (target, -1);
2589 if (!rm)rm=seq2ecl_mat(CL);
2590 if (!dm)dm=declare_int (A->nseq, A->nseq);
2591 if (!target)target=declare_int (A->nseq*A->nseq, 3);
2593 ns=vcalloc (2, sizeof (int));
2594 ls=declare_int (2,A->nseq);
2597 for (a=0; a<A->nseq-1; a++)
2598 for (b=a+1; b<A->nseq; b++)
2603 score=sub_aln2ecl_raw_score (A, CL, ns[0], ls[0]);
2604 dm[a][b]=dm[b][a]=MAX(0,(rm[a][b]-score));
2606 for (n=0,a=0; a<A->nseq; a++)
2608 for (b=a; b<A->nseq; b++, n++)
2613 for ( c=0; c<A->nseq; c++)
2615 if (c!=a && c!=b)target[n][2]+=dm[a][c]+dm[b][c];
2619 sort_int_inv (target,3, 2, 0, n-1);
2621 for (a=0; a<A->nseq; a++)
2623 if (target[a][0]==target[a][1])
2626 ls[0][0]=target[a][0];
2631 ls[0][0]=target[a][0]; ls[0][1]=target[a][1];
2634 for (ns[1]=0,b=0; b<A->nseq; b++)
2636 if (b!=target[a][0] && b!=target[a][1])ls[1][ns[1]++]=b;
2639 ungap_sub_aln (A, ns[0], ls[0]);
2640 ungap_sub_aln (A, ns[1], ls[1]);
2642 A->score_aln=pair_wise (A, ns, ls,CL);
2643 fprintf ( stderr, "\nSEQ: %d %d SCORE=%d\n",target[a][0],target[a][1], aln2ecl_raw_score(A, CL));
2648 int get_best_group ( int **used, Constraint_list *CL);
2649 int seq_aln_thr1(Alignment *A, int **used, int threshold, Constraint_list *CL);
2650 int seq_aln_thr2( Alignment*A, int **used, int threshold, int g, Constraint_list *CL);
2652 int get_best_group ( int **used, Constraint_list *CL)
2654 int a,b,c,d,n, tot,stot, best_tot, best_seq, nseq;
2659 nseq=((CL->S)->nseq);
2661 for (a=0; a<nseq; a++)
2663 if ( used[a][0]==-1)continue;
2664 for ( tot=0,b=0; b< nseq; b++)
2666 if ( a==b) continue;
2667 if ( used[b][0]==-1)continue;
2672 for (stot=0, n=0,c=0; c<ns[0]; c++)
2673 for (d=0; d<ns[1]; d++, n++)
2675 stot+=(CL->DM)->similarity_matrix[ls[0][c]][ls[1][d]];
2690 char ** list_file2dpa_list_file (char **list, int *len,int maxnseq, Sequence *S)
2692 char **nlist, **profile_list;
2693 int nl, l, a, np, has_lib;
2694 char *seq, *profile;
2698 nlist=declare_char (read_array_size_new ((void *)list), read_array_size_new ((void *)list[0]));
2701 profile_list=declare_char (read_array_size_new ((void *)list), read_array_size_new ((void *)list[0]));
2705 for (a=0; a<l; a++)if (list[a][0]!='S')sprintf ( nlist[nl++], "%s", list[a]);
2707 profile=vtmpnam (NULL);
2711 output_fasta_seq (seq, A=seq2aln (S,NULL,RM_GAP));
2712 free_aln (A); A=NULL;
2713 printf_system ( "t_coffee -seq %s -msa_mode groups -outfile=%s -maxnseq=0 -in Xblosum62mt -dp_mode gotoh_pair_wise_lgp -dpa_maxnseq=%d", seq, profile, maxnseq);
2715 if ( !check_file_exists(profile))
2717 char command[10000];
2718 sprintf( command,"t_coffee -seq %s -msa_mode groups -outfile=%s -quiet=xxtest -in Xblosum62mt -distance_matrix_mode ktup -dpa_maxnseq=%d -maxnseq=0", seq, profile, maxnseq);
2719 printf_exit ( EXIT_FAILURE,stderr, "Attempt to switch to DPA failed: Could not run\n%s\n[FATAL:%s]\n", command, PROGRAM);
2721 F=A=input_conc_aln (profile, NULL);
2728 file=vtmpnam (NULL);
2729 fp=vfopen (file, "w");
2730 for (a=0; a<A->nseq; a++)fprintf ( fp, ">%s %s\n%s\n", A->name[a],A->seq_comment[a], A->seq_al[a]);
2733 sprintf (profile_list[np++], "%s", file);
2738 for (has_lib=0,a=0; a<l; a++)
2740 if ( list[a][0]=='A' || list[a][0]=='L')
2745 nlist=reindex_constraint_list ( profile_list, np, nlist, &nl, S);
2747 for (a=0; a<np; a++)
2749 sprintf (nlist[nl++], "R%s", profile_list[a]);
2751 for (a=0; a< nl; a++)
2753 sprintf (list[a], "%s", nlist[a]);
2756 free_char (profile_list, -1);
2757 free_char (nlist, -1);
2761 Alignment * seq2aln_group (Alignment *A, int N, Constraint_list *CL)
2767 char *list, **list2;
2771 fprintf (CL->local_stderr, "\n##### DPA ##### Compute Fast Alignment");
2772 A=iterative_tree_aln (A,1, CL);
2773 fprintf (CL->local_stderr, "\n##### DPA ##### Identify Nodes");
2774 P=make_root_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1);
2775 set_node_score (A, P, "idmat_sim");
2776 fprintf (CL->local_stderr, "\n##### DPA ##### Split Nodes");
2777 list=split_nodes_nseq (A,P,N, list=vcalloc (P->nseq*200, sizeof (char)));
2779 list2=string2list (list);
2780 fprintf (CL->local_stderr, "\n##### DPA ##### Save Nodes");
2783 for (a=1; a<atoi (list2[0]); a++)
2785 A->A=main_read_aln(list2[a], NULL);
2788 fprintf (CL->local_stderr, "\n##### DPA ##### Finished");
2789 vfree (list); free_char (list2, -1);
2803 Alignment * seq_aln ( Alignment*A, int n,Constraint_list *CL)
2806 int **used, a, t,n1, nseq;
2809 n1=nseq=(CL->S)->nseq;
2810 used=declare_int (nseq, nseq+3);
2813 for (a=0; a< nseq; a++)
2820 for (t=50; t>=0 && nseq>1; t-=5)
2822 nseq=seq_aln_thr1 (A, used,t, CL);
2829 int seq_aln_thrX(Alignment *A, int **used, int threshold, Constraint_list *CL)
2832 seq_aln_thr1(A,used,threshold,CL);
2833 for ( a=0; a< (CL->S)->nseq; a++)
2834 n+=(used[a][1]>0)?1:0;
2838 int seq_aln_thr1(Alignment *A, int **used, int threshold, Constraint_list *CL)
2840 int a,g, nseq, n_groups;
2843 g=get_best_group(used, CL);
2849 while ( seq_aln_thr2 (A, used, threshold,g, CL)!=0)
2851 g=get_best_group (used, CL);
2855 for (n_groups=0,a=0; a< nseq; a++)
2865 int seq_aln_thr2( Alignment*A, int **used, int threshold, int g, Constraint_list *CL)
2869 int nseq, n_members;
2874 nseq=((CL->S)->nseq);
2879 for ( a=0; a< nseq; a++)
2887 ungap_sub_aln (A, ns[0], ls[0]);
2888 ungap_sub_aln (A, ns[1], ls[1]);
2890 A->score_aln=pair_wise (A, ns, ls,CL);
2892 for (sim=0,b=0; b<ns[0]; b++)
2894 for (c=0; c<ns[1]; c++)
2896 sim+=generic_get_seq_sim (A->seq_al[ls[0][b]], A->seq_al[ls[1][c]], NULL,"idmat_sim2");
2899 sim/=(double)(ns[0]*ns[1]);
2904 for (d=0; d<ns[1]; d++)
2905 ls[0][ns[0]++]=ls[1][d];
2915 if (n_members>0)used[g][0]=-1;
2918 /****************************************************************************/
2921 /* Alignment Methods */
2924 /****************************************************************************/
2926 Alignment * tsp_aln (Alignment *A, Constraint_list *CL, Sequence *S)
2933 A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq);
2934 ns=vcalloc (2, sizeof (int));
2935 ls=declare_int (2, (CL->S)->nseq);
2936 used=declare_int ( A->nseq, 2);
2939 CL->DM=cl2distance_matrix (CL, NOALN,NULL,NULL,0);
2940 distances=declare_int (A->nseq+1, A->nseq+1);
2941 distances=duplicate_int ( (CL->DM)->score_similarity_matrix, -1, -1);
2943 for (a=0; a< A->nseq; a++)
2946 for (b=0; b< A->nseq; b++)
2948 used[a][1]+=distances[a][b];
2952 sort_int_inv (used,2,1,0,(CL->S)->nseq-1);
2954 ls[0][ns[0]++]=used[0][0];
2957 for (a=1; a< S->nseq; a++)
2959 fprintf ( stderr, "\n%s %d", (CL->S)->name[used[a][0]], used[a][1]);
2960 ls[1][0]=used[a][0];
2961 pair_wise ( A,ns,ls, CL);
2962 ls[0][ns[0]++]=used[a][0];
2965 A->nseq=(CL->S)->nseq;
2970 Alignment *stack_progressive_aln(Alignment *A, Constraint_list *CL, int gop, int gep)
2978 sprintf ( dp_mode, "%s", CL->dp_mode);
2979 sprintf (CL->dp_mode, "gotoh_pair_wise");
2981 n_groups=vcalloc ( 2, sizeof (int));
2982 group_list=declare_int ( 2, A->nseq);
2986 for ( a=0; a<n; a++)ungap(A->seq_al[a]);
2987 for ( a=1; a<n; a++)
2991 group_list[0][a-1]=a-1;
2992 group_list[1][0] =a;
2994 pair_wise ( A,n_groups, group_list, CL);
2995 fprintf ( stderr, "\n\t[%d]->[%d]", a,(int)strlen ( A->seq_al[0]));
2997 fprintf (stderr, "\n");
2999 free_int ( group_list, -1);
3000 sprintf (CL->dp_mode, "%s",dp_mode);
3004 Alignment *realign_aln_clust ( Alignment*A, Constraint_list *CL);
3005 Alignment *realign_aln_random_bipart_n ( Alignment*A, Constraint_list *CL, int n);
3006 Alignment *iterate_aln ( Alignment*A, int nit, Constraint_list *CL)
3010 int score, iscore, delta;
3011 fprintf ( CL->local_stderr, "Iterated Refinement: %d cycles START: score= %d\n", nit,iscore=aln2ecl_raw_score (A, CL) );
3014 if ( nit==-1)nit=A->nseq*2;
3015 if ( A->len_aln==0)A=very_fast_aln (A, A->nseq, CL);
3016 A=reorder_aln (A,(CL->S)->name, A->nseq);
3018 for (it=0; it< nit; it++)
3020 //CL->local_stderr=output_completion (CL->local_stderr,it, nit,1, "");
3021 if (mode==0)A=realign_aln (A, CL);
3022 else if (mode ==1)A=realign_aln_random_bipart (A, CL);
3023 else if (mode ==2)A=realign_aln_clust (A, CL);
3024 else if (mode ==3)A=realign_aln_random_bipart_n (A, CL,2);
3027 score=aln2ecl_raw_score (A, CL);
3029 fprintf (CL->local_stderr, "\n\tIteration Cycle: %d Score=%d Improvement= %d", it+1,score, delta);
3031 fprintf ( CL->local_stderr, "\nIterated Refinement: Completed Improvement=%d\n", delta);
3035 int get_next_best (int seq, int nseq, int *used, int **dm);
3036 int get_next_best (int seq, int nseq, int *used, int **dm)
3038 int a,set, d, bd, bseq;
3040 for (set=0,a=0; a< nseq; a++)
3042 if (used[a] || seq==a)continue;
3053 Alignment * full_sorted_aln (Alignment *A, Constraint_list *CL)
3056 A=sorted_aln_seq (0, A, CL);
3058 for (a=1; a<A->nseq; a++)
3060 A=A->A=copy_aln (A, NULL);
3061 for (b=0; b<A->nseq; b++)ungap(A->seq_al[b]);
3062 A=sorted_aln_seq (a, A, CL);
3067 Alignment * sorted_aln (Alignment *A, Constraint_list *CL)
3069 return sorted_aln_seq (-1, A, CL);
3071 Alignment * sorted_aln_seq (int new_seq, Alignment *A, Constraint_list *CL)
3074 int *ns, **ls, **score, *used, **dm;
3077 dm=(CL->DM)->score_similarity_matrix;
3079 score=declare_int (nseq, 3);
3080 used=vcalloc (nseq, sizeof (int));
3081 ls=declare_int (2, nseq);
3082 ns=vcalloc (2, sizeof (int));
3087 for (a=0; a<nseq; a++)
3091 for ( b=0; b<nseq; b++)
3092 score[a][2]+=dm[a][b];
3094 sort_int ( score,3, 2, 0, nseq-1);
3095 old_seq=new_seq=score[nseq-1][0];
3098 for (a=1; a< nseq; a++)
3101 ls[0][ns[0]++]=new_seq;
3103 ls[1][0]=get_next_best(new_seq,nseq, used,dm);
3107 A->score_aln=pair_wise (A, ns, ls,CL);
3113 Alignment * ungap_aln4tree (Alignment *A);
3114 Alignment * ungap_aln4tree (Alignment *A)
3116 int t, n, max_sim, sim;
3126 B=copy_aln (A, NULL);
3127 B=ungap_aln_n(B, n);
3130 sim=aln2sim (B, "idmat");
3131 while (B->len_aln<t && sim>max_sim && n>0)
3135 B=ungap_aln_n(B, n);
3136 sim=aln2sim (B, "idmat");
3138 if ( B->len_aln<t && sim>max_sim)B=copy_aln (A, B);
3144 Alignment * iterative_tree_aln (Alignment *A,int n, Constraint_list *CL)
3149 T=make_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1);
3150 tree_aln ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL);
3151 for ( a=0; a< n; a++)
3156 B=copy_aln (A, NULL);
3157 B=ungap_aln_n (B, 20);
3158 sprintf ( CL->distance_matrix_mode, "aln");
3160 CL->DM=cl2distance_matrix ( CL,B,NULL,NULL, 1);
3164 T=make_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1);
3166 tree_aln ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL);
3171 Alignment *profile_aln (Alignment *A, Constraint_list *CL)
3177 ls=declare_int (2, nseq2);
3178 ns=vcalloc (2, sizeof (int));
3180 A=realloc_aln2(A,nseq2, A->len_aln);
3181 for (a=0; a< nseq; a++)
3183 for ( a=0; a<nseq;a++)
3185 sprintf (A->seq_al[a+nseq], "%s", (CL->S)->seq[a]);
3186 sprintf (A->name[a+nseq], "%s", (CL->S)->name[a]);
3187 A->order[a+nseq][0]=a;
3191 for (a=0; a<nseq; a++)
3195 A->score_aln=pair_wise (A, ns, ls,CL);
3197 ls[0][ns[0]++]=a+nseq;
3199 for (a=0; a< nseq; a++)
3201 sprintf (A->seq_al[a], "%s", A->seq_al[a+nseq]);
3207 Alignment * iterative_aln ( Alignment*A, int n,Constraint_list *CL)
3209 int *ns,**ls, **score, **dm;
3211 ls=declare_int (2, A->nseq);
3212 ns=vcalloc (2, sizeof (int));
3220 score=declare_int (nseq,2);
3221 dm=(CL->DM)->score_similarity_matrix;
3222 for (a=0; a<nseq; a++)
3225 for ( b=0; b<nseq; b++)
3226 score[a][1]+=dm[a][b];
3229 sort_int ( score,2, 1, 0, nseq-1);
3232 for (a=0; a<max; a++)
3235 for (ns[0]=0,b=0; b<nseq; b++)
3236 if (b!=score[a][0])ls[0][ns[0]++]=b;
3238 fprintf (stderr, "[%s %s %d]",(CL->S)->name[score[a][0]],A->name[score[a][0]], score[a][1]);
3240 ls[1][0]=score[a][0];
3241 ungap_sub_aln ( A, ns[0], ls[0]);
3242 ungap_sub_aln ( A, ns[1], ls[1]);
3243 A->score_aln=pair_wise (A, ns, ls,CL);
3249 Alignment *simple_progressive_aln (Sequence *S, NT_node **T, Constraint_list *CL, char *mat)
3255 A=seq2aln (S, NULL, RM_GAP);
3260 CL=declare_constraint_list (S, NULL, NULL, 0, NULL, NULL);
3261 sprintf ( CL->dp_mode, "myers_miller_pair_wise");
3262 sprintf ( CL->tree_mode, "nj");
3263 sprintf ( CL->distance_matrix_mode, "idscore");
3264 CL=choose_extension_mode ("matrix", CL);
3267 if (mat)CL->M=read_matrice (mat);
3268 CL->pw_parameters_set=1;
3269 CL->local_stderr=stderr;
3272 if ( !T)T=make_tree (A, CL, CL->gop, CL->gep,S, NULL,MAXIMISE);
3273 for ( a=0; a< A->nseq; a++)ungap (A->seq_al[a]);
3275 tree_aln ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL);
3276 A=reorder_aln ( A,A->tree_order,A->nseq);
3281 Alignment *very_fast_aln ( Alignment*A, int nseq, Constraint_list *CL)
3283 char command[10000];
3288 if ( CL && CL->local_stderr)fp=CL->local_stderr;
3291 fprintf (fp, "\n[Computation of an Approximate MSA...");
3292 tmp_seq= vtmpnam (NULL);
3293 tmp_aln= vtmpnam (NULL);
3294 output_fasta_seq ((tmp_seq=vtmpnam (NULL)), A);
3295 sprintf ( command, "t_coffee -infile=%s -special_mode quickaln -outfile=%s %s -outorder=input", tmp_seq, tmp_aln, TO_NULL_DEVICE);
3296 my_system ( command);
3298 A=main_read_aln (tmp_aln,A);
3299 fprintf (fp, "]\n");
3303 static NT_node* SNL;
3304 NT_node* tree_aln ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL)
3306 if ( strm ((CL->TC)->use_seqan, "NO"))return local_tree_aln (LT, RT, A, nseq, CL);
3307 else return seqan_tree_aln (LT, RT, A, nseq, CL);
3310 NT_node* seqan_tree_aln ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL)
3317 char *tree, *lib, *seq, *new_aln;
3321 tree=vtmpnam (NULL);
3322 print_newick_tree (LT->parent, tree);
3326 main_output_fasta_seq (seq=vtmpnam (NULL),B=seq2aln (CL->S,NULL,RM_GAP), NO_HEADER);
3330 new_aln=vtmpnam (NULL);
3331 vfclose (save_constraint_list ( CL, 0, CL->ne,lib=vtmpnam(NULL), NULL, "ascii",CL->S));
3333 fprintf (CL->local_stderr, "\n********* USE EXTERNAL ALIGNER: START:\n\tCOMMAND: %s -lib %s -seq %s -usetree %s -outfile %s\n", (CL->TC)->use_seqan,lib, seq, tree, new_aln);
3334 printf_system ( "%s -lib %s -seq %s -usetree %s -outfile %s", (CL->TC)->use_seqan,lib, seq, tree, new_aln);
3335 fprintf (CL->local_stderr, "\n********* USE EXTERNAL ALIGNER: END\n");
3338 main_read_aln (new_aln, A);
3339 return tree2ao (LT,RT, A, A->nseq, CL);
3344 NT_node rec_local_tree_aln ( NT_node P, Alignment*A, Constraint_list *CL, int print);
3345 NT_node* local_tree_aln ( NT_node l, NT_node r, Alignment*A,int nseq, Constraint_list *CL)
3351 if (!r && !l) return NULL;
3356 fprintf ( CL->local_stderr, "\nPROGRESSIVE_ALIGNMENT [Tree Based]\n");
3358 //1: make sure the Alignment and the Sequences are labeled the same way
3359 if (CL->translation)vfree (CL->translation);
3360 CL->translation=vcalloc ( (CL->S)->nseq, sizeof (int));
3361 for ( a=0; a< (CL->S)->nseq; a++)
3362 CL->translation[a]=name_is_in_list ( (CL->S)->name[a], (CL->S)->name, (CL->S)->nseq, MAXNAMES);
3363 A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq);
3364 A->nseq=(CL->S)->nseq;
3366 //2 Make sure the tree is in the same order
3367 recode_tree (P, (CL->S));
3370 if ( get_nproc()>1 && strstr (CL->multi_thread, "msa"))
3373 max_fork=get_nproc()/2;//number of nodes forked, one node =>two jobs
3375 NL=tree2node_list (P, NULL);
3376 min=declare_int (P->node+1,3);
3377 for (a=0; a<=P->node; a++)
3383 else if (N && N->nseq==1)min[a][1]=0;
3386 min[a][1]=MIN(((N->left)->nseq),((N->right)->nseq))*A->nseq+MAX(((N->left)->nseq),((N->right)->nseq));//sort on min and break ties on max
3387 min[a][2]=MIN(((N->left)->nseq),((N->right)->nseq));
3390 sort_int_inv (min,3, 1, 0, P->node);
3391 for (a=0; a<=P->node && a<max_fork; a++)
3393 if (min[a][2]>1)(NL[min[a][0]])->fork=1;
3397 rec_local_tree_aln (P, A,CL, 1);
3398 for (a=0; a<P->nseq; a++)sprintf (A->tree_order[a], "%s", (CL->S)->name[P->lseq[a]]);
3399 A->len_aln=strlen (A->seq_al[0]);
3401 fprintf ( CL->local_stderr, "\n\n");
3406 NT_node rec_local_tree_aln ( NT_node P, Alignment*A, Constraint_list *CL,int print)
3412 if (!P || P->nseq==1) return NULL;
3413 R=P->right;L=P->left;
3419 tmp1=vtmpnam (NULL);
3420 tmp2=vtmpnam (NULL);
3426 if (L->nseq>R->nseq)print=1;
3428 initiate_vtmpnam (NULL);
3429 rec_local_tree_aln (L, A, CL, print);
3430 dump_msa (tmp1,A, L->nseq, L->lseq);
3431 exit (EXIT_SUCCESS);
3439 if (L->nseq>R->nseq)print=0;
3442 initiate_vtmpnam (NULL);
3443 rec_local_tree_aln (R, A, CL, print);
3444 dump_msa (tmp2, A, R->nseq, R->lseq);
3445 exit (EXIT_SUCCESS);
3448 vwaitpid (pid1, &s, 0);
3449 vwaitpid (pid2, &s, 0);
3452 undump_msa (A,tmp1);
3453 undump_msa (A,tmp2);
3457 rec_local_tree_aln (L, A, CL, print);
3458 rec_local_tree_aln (R, A, CL, print);
3461 P->score=A->score_aln=score=profile_pair_wise (A,L->nseq, L->lseq,R->nseq,R->lseq,CL);
3462 A->len_aln=strlen (A->seq_al[P->lseq[0]]);
3463 score=node2sub_aln_score (A, CL, CL->evaluate_mode,P);
3464 if (print)fprintf(CL->local_stderr, "\n\tGroup %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->[Score=%4d][Len=%5d][PID:%d]%s",P->index,R->index,R->nseq,L->index,L->nseq,score, A->len_aln,getpid(),(P->fork==1)?"[Forked]":"" );
3471 NT_node* tree2ao ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL)
3477 static int n_groups_done, do_split=0;
3487 if (n_groups_done==0)
3490 SNL=vcalloc ( (CL->S)->nseq, sizeof (NT_node));
3492 if (CL->translation)vfree(CL->translation);
3493 CL->translation=vcalloc ( (CL->S)->nseq, sizeof (int));
3495 for ( a=0; a< (CL->S)->nseq; a++)
3496 CL->translation[a]=name_is_in_list ( (CL->S)->name[a], (CL->S)->name, (CL->S)->nseq, MAXNAMES);
3498 n_groups_done=(CL->S)->nseq;
3499 A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq);
3503 translation=CL->translation;
3504 n_s=vcalloc (2, sizeof ( int));
3505 l_s=declare_int ( 2, nseq);
3508 if ( RT->parent !=LT->parent)fprintf ( stderr, "Tree Pb [FATAL:%s]", PROGRAM);
3511 if ( LT->leaf==1 && RT->leaf==0)
3512 tree2ao ( RT->left, RT->right,A, nseq,CL);
3514 else if ( RT->leaf==1 && LT->leaf==0)
3515 tree2ao ( LT->left, LT->right,A,nseq,CL);
3517 else if (RT->leaf==0 && LT->leaf==0)
3519 tree2ao ( LT->left, LT->right,A,nseq,CL);
3520 tree2ao ( RT->left, RT->right,A,nseq,CL);
3523 if ( LT->leaf==1 && RT->leaf==1)
3525 /*1 Identify the two groups of sequences to align*/
3527 nseq2align=LT->nseq+RT->nseq;
3529 for ( a=0; a< LT->nseq; a++)l_s[0][a]=translation[LT->lseq[a]];
3530 if ( LT->nseq==1)LT->group=l_s[0][0];
3533 for ( a=0; a< RT->nseq; a++)l_s[1][a]=translation[RT->lseq[a]];
3534 if ( RT->nseq==1)RT->group=l_s[1][0];
3537 P->group=n_groups_done++;
3539 if (nseq2align==nseq)
3541 for (b=0, a=0; a< n_s[0]; a++, b++)sprintf ( A->tree_order[b],"%s", (CL->S)->name[l_s[0][a]]);
3542 for (a=0; a< n_s[1] ; a++, b++)sprintf ( A->tree_order[b], "%s",(CL->S)->name[l_s[1][a]]);
3546 if (P->parent)P->leaf=1;
3547 if ( LT->isseq==0)LT->leaf=0;
3548 if ( RT->isseq==0)RT->leaf=0;
3550 if (RT->isseq){SNL[translation[RT->lseq[0]]]=RT;RT->score=100;}
3551 if (LT->isseq){SNL[translation[LT->lseq[0]]]=LT;LT->score=100;}
3553 do_split=split_condition (nseq2align,A->score_aln,CL);
3554 if (CL->split && do_split)
3557 for (a=0; a< P->nseq; a++)SNL[CL->translation[P->lseq[a]]]=NULL;
3558 SNL[CL->translation[RT->lseq[0]]]=P;
3568 NT_node* tree_realn ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL)
3574 static int n_groups_done;
3584 if (n_groups_done==0)
3587 SNL=vcalloc ( (CL->S)->nseq, sizeof (NT_node));
3589 if (CL->translation)vfree(CL->translation);
3590 CL->translation=vcalloc ( (CL->S)->nseq, sizeof (int));
3592 for ( a=0; a< (CL->S)->nseq; a++)
3593 CL->translation[a]=name_is_in_list ( (CL->S)->name[a], (CL->S)->name, (CL->S)->nseq, MAXNAMES);
3594 if (nseq>2)fprintf ( CL->local_stderr, "\nPROGRESSIVE_ALIGNMENT [Tree Based]\n");
3595 else fprintf ( CL->local_stderr, "\nPAIRWISE_ALIGNMENT [No Tree]\n");
3596 n_groups_done=(CL->S)->nseq;
3597 A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq);
3601 translation=CL->translation;
3602 n_s=vcalloc (2, sizeof ( int));
3603 l_s=declare_int ( 2, nseq);
3609 l_s[0][0]=name_is_in_list ((CL->S)->name[0],(CL->S)->name, (CL->S)->nseq, MAXNAMES);
3610 l_s[1][0]=name_is_in_list ((CL->S)->name[1],(CL->S)->name, (CL->S)->nseq, MAXNAMES);
3611 A->score_aln=score=pair_wise (A, n_s, l_s,CL);
3619 if ( RT->parent !=LT->parent)fprintf ( stderr, "Tree Pb [FATAL:%s]", PROGRAM);
3622 if ( LT->leaf==1 && RT->leaf==0)
3623 tree_realn ( RT->left, RT->right,A, nseq,CL);
3625 else if ( RT->leaf==1 && LT->leaf==0)
3626 tree_realn ( LT->left, LT->right,A,nseq,CL);
3628 else if (RT->leaf==0 && LT->leaf==0)
3630 tree_realn ( LT->left, LT->right,A,nseq,CL);
3631 tree_realn ( RT->left, RT->right,A,nseq,CL);
3634 if ( LT->leaf==1 && RT->leaf==1 && (RT->nseq+LT->nseq)<nseq)
3636 /*1 Identify the two groups of sequences to align*/
3637 int *list, s, id1, id2;
3638 list=vcalloc (nseq, sizeof (int));
3639 for (a=0; a<LT->nseq; a++)
3641 s=translation[LT->lseq[a]];
3644 for (a=0; a<RT->nseq; a++)
3646 s=translation[RT->lseq[a]];
3649 for (a=0; a<nseq; a++)
3657 id1=sub_aln2sim (A, n_s, l_s, "idmat_sim");
3660 ungap_sub_aln (A, n_s[0],l_s[0]);
3661 ungap_sub_aln (A, n_s[1],l_s[1]);
3662 P->score=A->score_aln=score=pair_wise (A, n_s, l_s,CL);
3663 id2=sub_aln2sim (A, n_s, l_s, "idmat_sim");
3668 if (nseq2align==nseq)
3670 for (b=0, a=0; a< n_s[0]; a++, b++)sprintf ( A->tree_order[b],"%s", (CL->S)->name[l_s[0][a]]);
3671 for (a=0; a< n_s[1] ; a++, b++)sprintf ( A->tree_order[b], "%s",(CL->S)->name[l_s[1][a]]);
3675 if (P->parent)P->leaf=1;
3677 if ( LT->isseq==0)LT->leaf=0;
3678 if ( RT->isseq==0)RT->leaf=0;
3680 if (RT->isseq){SNL[translation[RT->lseq[0]]]=RT;RT->score=100;}
3681 if (LT->isseq){SNL[translation[LT->lseq[0]]]=LT;LT->score=100;}
3693 Alignment* profile_tree_aln ( NT_node P,Alignment*A,Constraint_list *CL, int threshold)
3695 int *ns, **ls, a, sim;
3696 NT_node LT, RT, D, UD;
3699 static int n_groups_done;
3703 //Sequences must be in the same order as the tree sequences
3707 n_groups_done=P->nseq+1;
3713 if (LT->leaf==0)A=delayed_tree_aln1 (LT, A,CL, threshold);
3714 if (RT->leaf==0)A=delayed_tree_aln1 (RT, A,CL, threshold);
3716 ns=vcalloc (2, sizeof (int));
3717 ls=declare_int ( 2,R->nseq);
3721 ls[0][ns[0]++]=LT->lseq[0];
3722 LT->group=ls[0][0]+1;
3725 node2seq_list (LT,&ns[0], ls[0]);
3729 ls[1][ns[1]++]=RT->lseq[0];
3730 RT->group=ls[1][0]+1;
3733 node2seq_list (RT,&ns[1], ls[1]);
3736 P->group=++n_groups_done;
3737 fprintf (CL->local_stderr, "\n\tGroup %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->",P->group,RT->group, ns[1],LT->group, ns[0]);
3739 P->score=A->score_aln=pair_wise (A, ns, ls,CL);
3740 sim=sub_aln2sim(A, ns, ls, "idmat_sim1");
3744 UD=(ns[0]<=ns[1])?RT:LT;
3745 D= (ns[0]<=ns[1])?LT:RT;
3750 fprintf (CL->local_stderr, "[Delayed (Sim=%4d). Kept Group %4d]",sim,UD->group);
3753 ungap_sub_aln (A, ns[0],ls[0]);
3754 ungap_sub_aln (A, ns[1],ls[1]);
3755 A->nseq=MAX(ns[0],ns[1]);
3759 F->A=main_read_aln (output_fasta_sub_aln (NULL, A, ns[(D==LT)?0:1], ls[(D==LT)?0:1]), NULL);
3763 F->A=main_read_aln (output_fasta_sub_aln (NULL, A, ns[(D==LT)?1:0], ls[(D==LT)?1:0]), NULL);
3767 printf_exit (EXIT_FAILURE, stderr, "\nError: Empty group");
3772 LT->aligned=1; RT->aligned=1;
3773 fprintf (CL->local_stderr, "[Score=%4d][Len=%5d]",sub_aln2sub_aln_score (A, CL, CL->evaluate_mode,ns, ls), strlen ( A->seq_al[ls[0][0]]));
3774 A->nseq=ns[0]+ns[1];
3779 F->A=main_read_aln (output_fasta_sub_aln2 (NULL, A, ns, ls), NULL);
3783 for (a=0; a<LT->nseq;a++)P->lseq[P->nseq++]=LT->lseq[a];
3784 for (a=0; a<RT->nseq;a++)P->lseq[P->nseq++]=RT->lseq[a];
3791 }////////////////////////////////////////////////////////////////////////////////////////
3795 ////////////////////////////////////////////////////////////////////////////////////////
3797 //Alignment *frame_tree_aln (Alignment *A, Constraint_list *CL)
3801 ////////////////////////////////////////////////////////////////////////////////////////
3805 ////////////////////////////////////////////////////////////////////////////////////////
3806 int delayed_pair_wise (Alignment *A, int *ns, int **ls,Constraint_list *CL);
3807 NT_node* delayed_tree_aln_mode1 ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL);
3808 NT_node* delayed_tree_aln_mode2 ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL);
3809 int paint_nodes2aligned ( NT_node P,char **list, int n);
3811 int reset_visited_nodes ( NT_node P);
3812 int reset_visited_nodes2 ( NT_node P);
3813 Alignment * make_delayed_tree_aln (Alignment *A,int n, Constraint_list *CL)
3818 T=make_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1);
3819 delayed_tree_aln_mode1 ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL);
3821 for ( a=0; a< n; a++)
3824 sprintf ( CL->distance_matrix_mode, "aln");
3825 CL->DM=cl2distance_matrix ( CL,A,NULL,NULL, 1);
3827 T=make_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1);
3828 delayed_tree_aln_mode1 ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL);
3833 NT_node* delayed_tree_aln_mode1 ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL)
3839 P=LT->parent;P->nseq=nseq;
3840 paint_nodes2aligned (P, NULL, 0);
3842 A=delayed_tree_aln1 (P, A, CL,50);
3843 A=delayed_tree_aln2 (P, A, CL, 0);
3847 NT_node* delayed_tree_aln_mode2 ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL)
3853 P=LT->parent;P->nseq=nseq;
3855 A=delayed_tree_aln1 (P, A, CL,thr);
3859 A=delayed_tree_aln2 (P, A, CL, thr);
3865 Alignment* delayed_tree_aln1 ( NT_node P,Alignment*A,Constraint_list *CL, int threshold)
3867 int *ns, **ls, a, sim;
3868 NT_node LT, RT, D, UD;
3871 static int n_groups_done;
3875 //Sequences must be in the same order as the tree sequences
3879 n_groups_done=P->nseq+1;
3885 if (LT->leaf==0)A=delayed_tree_aln1 (LT, A,CL, threshold);
3886 if (RT->leaf==0)A=delayed_tree_aln1 (RT, A,CL, threshold);
3888 ns=vcalloc (2, sizeof (int));
3889 ls=declare_int ( 2,R->nseq);
3892 node2seq_list (LT,&ns[0], ls[0]);
3893 if ( LT->nseq==1)LT->group=LT->lseq[0]+1;
3895 node2seq_list (RT,&ns[1], ls[1]);
3896 if ( RT->nseq==1)RT->group=RT->lseq[0]+1;
3899 P->group=++n_groups_done;
3902 if ( ns[0]==0 || ns[1]==0)
3904 fprintf (CL->local_stderr, "\n\tF-Group %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->Skipped",P->group,RT->group, ns[1],LT->group, ns[0]);
3906 LT->aligned=(ns[0]==0)?0:1;
3907 RT->aligned=(ns[1]==0)?0:1;
3911 fprintf (CL->local_stderr, "\n\tF-Group %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->",P->group,RT->group, ns[1],LT->group, ns[0]);
3912 P->score=A->score_aln=pair_wise (A, ns, ls,CL);
3913 sim=sub_aln2max_sim(A, ns, ls, "idmat_sim1");
3918 UD=(ns[0]<=ns[1])?RT:LT;
3919 D= (ns[0]<=ns[1])?LT:RT;
3924 fprintf (CL->local_stderr, "[Delayed (Sim=%4d). Kept Group %4d]",sim,UD->group);
3926 ungap_sub_aln (A, ns[0],ls[0]);
3927 ungap_sub_aln (A, ns[1],ls[1]);
3928 A->nseq=MAX(ns[0],ns[1]);
3932 LT->aligned=1; RT->aligned=1;
3933 fprintf (CL->local_stderr, "[Score=%4d][Len=%5d]",sub_aln2sub_aln_score (A, CL, CL->evaluate_mode,ns, ls), strlen ( A->seq_al[ls[0][0]]));
3934 A->nseq=ns[0]+ns[1];
3937 for (a=0; a<LT->nseq;a++)P->lseq[P->nseq++]=LT->lseq[a];
3938 for (a=0; a<RT->nseq;a++)P->lseq[P->nseq++]=RT->lseq[a];
3947 Alignment* delayed_tree_aln2 ( NT_node P,Alignment*A,Constraint_list *CL, int thr)
3960 fprintf (CL->local_stderr, "\n");
3962 if (!LT->aligned && !RT->aligned)
3964 printf_exit (EXIT_FAILURE, stderr, "ERROR: Unresolved Node On Groups %d [FATAL:%s]\n", P->group,PROGRAM);
3966 else if (!LT->aligned || !RT->aligned)
3969 ns=vcalloc (2, sizeof (int));
3970 ls=declare_int (2, R->nseq);
3972 node2seq_list (R,&ns[0], ls[0]);
3974 D=(!LT->aligned)?LT:RT;
3976 node2seq_list (D,&ns[1], ls[1]);
3978 fprintf (CL->local_stderr, "\tS-Delayed Group %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->",P->group,D->group, ns[1],R->group, ns[0]);
3979 P->score=A->score_aln=pair_wise (A, ns, ls,CL);
3980 sim=sub_aln2max_sim(A, ns, ls, "idmat_sim1");
3983 fprintf (CL->local_stderr, " [Further Delayed]\n");
3984 ungap_sub_aln (A, ns[0],ls[0]);
3985 ungap_sub_aln (A, ns[1],ls[1]);
3990 fprintf (CL->local_stderr, "[Score=%4d][Len=%5d][thr=%d]\n",sub_aln2sub_aln_score (A, CL, CL->evaluate_mode,ns, ls), (int)strlen ( A->seq_al[ls[0][0]]), thr);
3993 vfree (ns);free_int (ls, -1);
4000 if (LT->leaf==0)A=delayed_tree_aln2 (LT, A,CL, thr);
4001 if (RT->leaf==0)A=delayed_tree_aln2 (RT, A,CL, thr);
4006 int delayed_pair_wise (Alignment *A, int *ns, int **ls,Constraint_list *CL)
4012 pair_wise (A, ns, ls, CL);
4014 sim=fast_aln2sim_list (A, "sim3", ns, ls);
4016 sort_int_inv ( sim,3, 2,0, ns[0]*ns[1]-1);
4018 for (a=0; a< 2; a++)
4019 for ( b=0; b< ns[a]; b++)
4020 A->order[ls[a][b]][4]=-1;
4022 for (a=0; a< 10 && sim[a][0]!=-1; a++)
4030 ungap_sub_aln (A, ns[0],ls[0]);
4031 ungap_sub_aln (A, ns[1],ls[1]);
4033 s=pair_wise (A, ns, ls, CL);
4035 for (a=0; a< 2; a++)
4036 for ( b=0; b< ns[a]; b++)
4037 A->order[ls[a][b]][4]=0;
4043 int node2seq_list2 (NT_node P, int *ns, int *ls)
4046 if ( !P || P->visited ) return ns[0];
4051 ls[ns[0]++]=P->lseq[0];
4054 if (P->left && (P->left) ->aligned)node2seq_list2 (P->left, ns,ls);
4055 if (P->right && (P->right)->aligned)node2seq_list2 (P->right,ns,ls);
4056 if (P->aligned && P->parent)node2seq_list2 (P->parent,ns,ls);
4062 int node2seq_list (NT_node P, int *ns, int *ls)
4065 if ( P->isseq && P->aligned)
4067 ls[ns[0]++]=P->lseq[0];
4071 if (P->left && (P->left) ->aligned)node2seq_list (P->left, ns,ls);
4072 if (P->right && (P->right)->aligned)node2seq_list (P->right,ns,ls);
4076 int paint_nodes2aligned ( NT_node P,char **list, int n)
4083 else if ( name_is_in_list ( P->name, list, n, 100)!=-1)
4091 r+=paint_nodes2aligned (P->left, list, n);
4092 r+=paint_nodes2aligned (P->right, list, n);
4097 int reset_visited_nodes ( NT_node P)
4099 while (P->parent)P=P->parent;
4100 return reset_visited_nodes2 (P);
4102 int reset_visited_nodes2 ( NT_node P)
4105 if (P->left)r+=reset_visited_nodes2(P->left);
4106 if (P->right)r+=reset_visited_nodes2(P->right);
4112 ////////////////////////////////////////////////////////////////////////////////////////
4116 ////////////////////////////////////////////////////////////////////////////////////////
4118 Alignment* dpa_msa2 ( NT_node P,Alignment*A,Constraint_list *CL);
4119 Alignment *dpa_align_node (NT_node P,Alignment*A,Constraint_list *CL);
4120 char *node2profile_list (NT_node P,Alignment*A,Constraint_list *CL, char *list);
4121 char * output_node_aln (NT_node P, Alignment *A, char *name);
4122 int node2nleaf ( NT_node P);
4124 Alignment* dpa_aln (Alignment*A,Constraint_list *CL)
4128 A=iterative_tree_aln (A,1, CL);
4129 P=make_root_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1);
4132 A=dpa_msa2(P, A, CL);
4136 int node2nleaf ( NT_node P)
4139 if ( P->leaf) return 1;
4142 n+=node2nleaf ( P->left);
4143 n+=node2nleaf ( P->right);
4147 Alignment* dpa_msa2 ( NT_node P,Alignment*A,Constraint_list *CL)
4156 n_l=node2nleaf (P->left);
4157 n_r=node2nleaf (P->right);
4160 return dpa_msa2 (P->left, A, CL);
4164 return dpa_msa2 (P->right, A, CL);
4167 A=dpa_align_node (P, A, CL);
4172 Alignment *dpa_align_node (NT_node P,Alignment*A,Constraint_list *CL)
4175 char *list, *tmp_aln;
4180 list=vcalloc ( P->nseq*100, sizeof (char));
4181 list=node2profile_list (P,A, CL, list);
4183 printf_system ( "t_coffee -profile %s -outfile=%s -dp_mode gotoh_pair_wise_lgp -msa_mode iterative_tree_aln -quiet", list,tmp_aln=vtmpnam (NULL));
4184 B=main_read_aln (tmp_aln, NULL);
4185 A=realloc_aln (A, B->len_aln+1);
4186 for ( a=0; a< B->nseq; a++)
4187 if ( (b=name_is_in_list (B->name[a], A->name, A->nseq, 100))!=-1)
4188 sprintf (A->seq_al[b], "%s", B->seq_al[a]);
4189 A->len_aln=B->len_aln;
4194 char *node2profile_list (NT_node P,Alignment*A,Constraint_list *CL, char *list)
4198 list=node2profile_list (P->left, A, CL, list);
4199 list=node2profile_list (P->right, A, CL, list);
4204 list=strcatf (list," %s", output_node_aln (P, A, NULL));
4205 if ( !P->isseq)P->leaf=0;
4209 char * output_node_aln (NT_node P, Alignment *A, char *name)
4213 if (name==NULL) name=vtmpnam (NULL);
4214 fp=vfopen (name, "w");
4216 for (a=0; a< P->nseq; a++)
4217 fprintf ( fp, ">%s\n%s", A->name[P->lseq[a]], A->seq_al[P->lseq[a]]);
4221 ////////////////////////////////////////////////////////////////////////////////////////
4225 ////////////////////////////////////////////////////////////////////////////////////////
4227 Alignment * new_dpa_aln (Alignment *A,Constraint_list *CL)
4234 A=make_delayed_tree_aln (A,1, CL);
4235 P=make_root_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1);
4236 set_node_score (A, P, "idmat_sim");
4239 list=split_nodes_nseq (A,P,15, list=vcalloc (P->nseq*200, sizeof (char)));
4240 printf_system ( "t_coffee -profile %s -outfile=%s -dp_mode gotoh_pair_wise_lgp -msa_mode iterative_tree_aln", list,tmp_aln=vtmpnam (NULL));
4241 return main_read_aln (tmp_aln, NULL);
4244 char *split_nodes_nseq (Alignment *A, NT_node P, int nseq, char *list)
4253 n=count_threshold_nodes (A, P, a);
4256 return split_nodes_idmax (A, P, a,list);
4258 char *split_nodes_idmax (Alignment *A, NT_node P, int t, char *list)
4260 if (P->isseq || P->score>=t)
4262 list=strcatf (list," %s", output_node_aln (P, A, NULL));
4264 else if ( P->score<t)
4266 list=split_nodes_idmax (A, P->left,t, list);
4267 list=split_nodes_idmax (A, P->right,t,list);
4271 int count_threshold_nodes (Alignment *A, NT_node P, int t)
4275 if (P->isseq || P->score>=t)
4279 else if ( P->score<t)
4281 s+=count_threshold_nodes (A, P->left,t);
4282 s+=count_threshold_nodes (A, P->right,t);
4287 int set_node_score (Alignment *A, NT_node P, char *mode)
4292 if (P->isseq) return 0;
4296 N=(a==0)?P->left:P->right;
4300 P->score=sub_aln2max_sim(A, ns, ls,mode);
4301 set_node_score (A,P->left, mode);
4302 set_node_score (A,P->right, mode);
4305 /////////////////////////////////////////////////////////////////////////////////////////
4306 int split_condition (int nseq, int score, Constraint_list *CL)
4308 int cond1=1, cond2=1;
4311 if ( CL->split_nseq_thres)cond1 =(nseq<=CL->split_nseq_thres)?1:0;
4312 if ( CL->split_score_thres)cond2=(score>=CL->split_score_thres)?1:0;
4314 return (cond1 && cond2);
4316 int profile_pair_wise (Alignment *A, int n1, int *l1, int n2, int *l2, Constraint_list *CL)
4323 ns=vcalloc (2, sizeof (int));
4324 ls=vcalloc (2, sizeof (int*));
4330 return pair_wise (A, ns, ls, CL);
4332 int pair_wise (Alignment *A, int*ns, int **l_s,Constraint_list *CL )
4347 /*Make sure evaluation functions update their cache if needed*/
4348 A=update_aln_random_tag (A);
4350 if (! CL->pw_parameters_set)
4352 fprintf ( stderr, "\nERROR pw_parameters_set must be set in pair_wise [FATAL]\n" );crash("");
4356 function=get_pair_wise_function(CL->pair_wise, CL->dp_mode,&glocal);
4357 if ( CL->get_dp_cost==NULL)CL->get_dp_cost=get_dp_cost;
4359 if (strlen ( A->seq_al[l_s[0][0]])==0 || strlen ( A->seq_al[l_s[1][0]])==0)
4360 score=empty_pair_wise ( A, ns, l_s, CL, glocal);
4362 score=function ( A, ns, l_s, CL);
4367 int empty_pair_wise ( Alignment *A, int *ns, int **l_s, Constraint_list *CL, int glocal)
4374 if ( glocal==GLOBAL)
4376 l0=strlen (A->seq_al[l_s[0][0]]);
4377 l1=strlen (A->seq_al[l_s[1][0]]);
4380 if ( len==0)return 0;
4381 else if (l0>l1){n=ns[1];l=l_s[1];}
4382 else if (l0<l1){n=ns[0];l=l_s[0];}
4383 string=generate_null (len);
4384 for ( a=0; a< n; a++)
4385 sprintf ( A->seq_al[l[a]], "%s", string);
4386 A->score=A->score_aln=0;
4391 else if ( glocal==LALIGN)
4393 A->A=declare_aln (A->S);
4395 for ( a=0; a< 2; a++)
4396 for ( b=0; b<ns[a]; b++)
4397 A->seq_al[l_s[a][b]][0]='\0';
4398 (A->A)->score_aln=(A->A)->score=0;
4407 Pwfunc get_pair_wise_function (Pwfunc pw,char *dp_mode, int *glocal)
4409 /*Returns a function and a mode (Glogal, Local...)*/
4419 /*The first time: initialize the list of pairwse functions*/
4422 pwl=vcalloc ( 100, sizeof (Pwfunc));
4423 dpl=declare_char (100, 100);
4424 dps=vcalloc ( 100, sizeof (int));
4426 pwl[npw]=fasta_cdna_pair_wise;
4427 sprintf (dpl[npw], "fasta_cdna_pair_wise");
4431 pwl[npw]=cfasta_cdna_pair_wise;
4432 sprintf (dpl[npw], "cfasta_cdna_pair_wise");
4436 pwl[npw]=idscore_pair_wise;
4437 sprintf (dpl[npw], "idscore_pair_wise");
4441 pwl[npw]=gotoh_pair_wise;
4442 sprintf (dpl[npw], "gotoh_pair_wise");
4446 pwl[npw]=gotoh_pair_wise_lgp;
4447 sprintf (dpl[npw], "gotoh_pair_wise_lgp");
4451 pwl[npw]=gotoh_pair_wise_lgp_sticky;
4452 sprintf (dpl[npw], "gotoh_pair_wise_lgp_sticky");
4456 pwl[npw]=proba_pair_wise;
4457 sprintf (dpl[npw], "proba_pair_wise");
4461 pwl[npw]=biphasic_pair_wise;
4462 sprintf (dpl[npw], "biphasic_pair_wise");
4466 pwl[npw]=subop1_pair_wise;
4467 sprintf (dpl[npw], "subop1_pair_wise");
4471 pwl[npw]=subop2_pair_wise;
4472 sprintf (dpl[npw], "subop2_pair_wise");
4476 pwl[npw]=myers_miller_pair_wise;
4477 sprintf (dpl[npw], "myers_miller_pair_wise");
4481 pwl[npw]=test_pair_wise;
4482 sprintf (dpl[npw], "test_pair_wise");
4486 pwl[npw]=fasta_gotoh_pair_wise;
4487 sprintf (dpl[npw], "fasta_pair_wise");
4490 pwl[npw]=cfasta_gotoh_pair_wise;
4491 sprintf (dpl[npw], "cfasta_pair_wise");
4495 pwl[npw]=very_fast_gotoh_pair_wise;
4496 sprintf (dpl[npw], "very_fast_pair_wise");
4500 pwl[npw]=gotoh_pair_wise_sw;
4501 sprintf (dpl[npw], "gotoh_pair_wise_sw");
4505 pwl[npw]=cfasta_gotoh_pair_wise_sw;
4506 sprintf (dpl[npw], "cfasta_sw_pair_wise");
4510 pwl[npw]=gotoh_pair_wise_lalign;
4511 sprintf (dpl[npw], "gotoh_pair_wise_lalign");
4515 pwl[npw]=sim_pair_wise_lalign;
4516 sprintf (dpl[npw], "sim_pair_wise_lalign");
4520 pwl[npw]=domain_pair_wise;
4521 sprintf (dpl[npw], "domain_pair_wise");
4525 pwl[npw]=gotoh_pair_wise;
4526 sprintf (dpl[npw], "ssec_pair_wise");
4530 pwl[npw]=ktup_pair_wise;
4531 sprintf (dpl[npw], "ktup_pair_wise");
4535 pwl[npw]=precomputed_pair_wise;
4536 sprintf (dpl[npw], "precomputed_pair_wise");
4540 pwl[npw]=myers_miller_pair_wise;
4541 sprintf (dpl[npw], "default");
4545 pwl[npw]=viterbi_pair_wise;
4546 sprintf (dpl[npw], "viterbi_pair_wise");
4550 pwl[npw]=viterbiL_pair_wise;
4551 sprintf (dpl[npw], "viterbiL_pair_wise");
4555 pwl[npw]=viterbiD_pair_wise;
4556 sprintf (dpl[npw], "viterbiD_pair_wise");
4560 pwl[npw]=seq_viterbi_pair_wise;
4561 sprintf (dpl[npw], "seq_viterbi_pair_wise");
4565 pwl[npw]=pavie_pair_wise;
4566 sprintf (dpl[npw], "pavie_pair_wise");
4570 pwl[npw]=glocal_pair_wise;
4571 sprintf (dpl[npw], "glocal_pair_wise");
4575 pwl[npw]=linked_pair_wise;
4576 sprintf (dpl[npw], "linked_pair_wise");
4580 pwl[npw]=clinked_pair_wise;
4581 sprintf (dpl[npw], "clinked_pair_wise");
4586 pwl[npw]=viterbiDGL_pair_wise;
4587 sprintf (dpl[npw], "viterbiDGL_pair_wise");
4593 for ( a=0; a< npw; a++)
4595 if ( (dp_mode && strm (dpl[a], dp_mode)) || pwl[a]==pw)
4598 if (dp_mode)sprintf (dp_mode,"%s", dpl[a]);
4603 fprintf ( stderr, "\n[%s] is an unknown mode for dp_mode[FATAL]\n", dp_mode);
4609 /*******************************************************************************/
4612 /* Util Functions */
4615 /*******************************************************************************/
4617 char *build_consensus ( char *seq1, char *seq2, char *dp_mode)
4626 if ( !mat) mat=vcalloc ( STRING, sizeof (char));
4629 A=align_two_sequences (seq1, seq2, strcpy(mat,"idmat"), 0, 0,dp_mode);
4630 buf=vcalloc ( A->len_aln+1, sizeof (char));
4632 for ( a=0; a< A->len_aln; a++)
4636 if (is_gap(c1) && is_gap(c2))buf[a]='-';
4637 else if (is_gap(c1))buf[a]=c2;
4638 else if (is_gap(c2))buf[a]=c1;
4639 else if (c1!=c2){vfree (buf);buf=NULL;free_aln(A);return NULL;}
4643 free_sequence (free_aln (A), -1);
4651 combine_profile () Comobine two profiles into one, using the edit sequence produce by the DP
4652 edit_sequence () insert the gaps using the
4654 int fastal (int argv, char **arg)
4661 S=get_fasta_sequence (arg[1], NULL);
4665 for (a=0; a<S->nseq-1; a++)
4667 for (b=a+1; b<S->nseq; b++)
4669 mat[b][a]=mat[a][b]addrand()%100;
4673 int_dist2nj_tree (s, S->name, S->nseq, tree_name);
4674 T=main_read_tree (BT);
4675 =fastal_tree_aln (T->L,T->R,S);
4681 NT_node fastal_tree_aln ( NT_node P, Sequence *S)
4686 if (!P || P->nseq==1) return NULL;
4687 R=P->right;L=P->left;
4689 fastal_tree_aln (P->left,S);
4690 fastal_tree_aln (P->right,S);
4691 fastal_pair_wise (P);
4696 NT_node fastal_pair_wise (NT_node P)
4703 tb=fastal_align_profile ((P->right)->prf, (P->left)->prf);
4706 for (a=0; a< l; a++)
4709 if (tb[a]== 1 || tb[a] ==3)pr1=1;
4710 if (tb[a]== 2 || tb[a] ==3)pr2=1;
4712 for (b=0; b<20; b++)
4713 P->prf[a][b]=((pr1==1)?(P->right)->prf[ppr1][b]:0) + ((pr2==1)?(P->left)->prf[ppr2][b]:0);
4717 free_int ((P->left)->prf, -1);
4718 free_int ((P->right)->prf, -1);
4721 /*********************************COPYRIGHT NOTICE**********************************/
4722 /*© Centro de Regulacio Genomica */
4724 /*Cedric Notredame */
4725 /*Tue Oct 27 10:12:26 WEST 2009. */
4726 /*All rights reserved.*/
4727 /*This file is part of T-COFFEE.*/
4729 /* T-COFFEE is free software; you can redistribute it and/or modify*/
4730 /* it under the terms of the GNU General Public License as published by*/
4731 /* the Free Software Foundation; either version 2 of the License, or*/
4732 /* (at your option) any later version.*/
4734 /* T-COFFEE is distributed in the hope that it will be useful,*/
4735 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
4736 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
4737 /* GNU General Public License for more details.*/
4739 /* You should have received a copy of the GNU General Public License*/
4740 /* along with Foobar; if not, write to the Free Software*/
4741 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
4742 /*............................................... |*/
4743 /* If you need some more information*/
4744 /* cedric.notredame@europe.com*/
4745 /*............................................... |*/
4749 /*********************************COPYRIGHT NOTICE**********************************/