7 #include "io_lib_header.h"
8 #include "util_lib_header.h"
9 #include "define_header.h"
10 #include "dp_lib_header.h"
12 int count_threshold_nodes (Alignment *A, NT_node P, int t);
13 int set_node_score (Alignment *A, NT_node P, char *mode);
14 char *split_nodes_nseq (Alignment *A, NT_node P, int max_nseq, char *list);
15 char *split_nodes_idmax (Alignment *A, NT_node P, int max_id, char *list);
16 /******************************************************************/
20 /******************************************************************/
21 int method_uses_structure(TC_method *M)
23 if ( strchr (M->seq_type, 'P'))return 1;
26 Constraint_list *profile2list (Job_TC *job, int nprf)
28 int a,b, s1, s2, r1, r2,w2;
29 Constraint_list *CL, *RCL;
43 if (!entry)entry=vcalloc ( ICHUNK+3, sizeof (int));
59 seqlist=string2num_list ((job->param)->seq_c)+1;
60 A1=seq2profile(CL->S, seqlist[1]);
61 A2=seq2profile(CL->S, seqlist[2]);
63 S=merge_seq (A1->S, NULL );
64 S=merge_seq (A2->S, S);
66 iA1=get_name_index (A1->name,A1->nseq, S->name, S->nseq);
67 iA2=get_name_index (A2->name,A2->nseq, S->name, S->nseq);
68 cache=vcalloc ( S->nseq, sizeof (int*));
69 for (a=0; a<A1->nseq; a++)cache[iA1[a]]=seq2inv_pos (A1->seq_al[a]);
70 for (a=0; a<A2->nseq; a++)cache[iA2[a]]=seq2inv_pos (A2->seq_al[a]);
74 CL->residue_index=declare_residue_index(S);
78 for (a=0; a<A1->nseq; a++)
79 for ( b=0; b<A2->nseq; b++)
82 sprintf ( buf, "2 %d %d",iA1[a], iA2[b]);
83 job=print_lib_job (job, "param->seq_c=%s", buf);
89 NRI=CL->residue_index;
96 entry[SEQ1]=seqlist[1];
97 entry[SEQ2]=seqlist[2];
98 for (a=0; a<A1->nseq; a++)
102 for (r1=1; r1<=S->len[s1];r1++)
104 for (b=1; b<NRI[s1][r1][0]; b+=ICHUNK)
106 int s2=NRI[s1][r1][b+SEQ2];
107 int r2=NRI[s1][r1][b+R2];
108 int w2=NRI[s1][r1][b+WE];
110 entry[R1]=cache[s1][r1];
111 entry[R2]=cache[s2][r2];
115 add_entry2list (entry,CL);
120 free_int (cache, -1);
121 free_arrayN (NRI, 3);
123 free_sequence (S, S->nseq);
124 return (job->io)->CL=CL;
127 Constraint_list *seq2list ( Job_TC *job)
131 Constraint_list *PW_CL;
132 Constraint_list *RCL=NULL;
154 seq=(job->param)->seq_c;
159 STL=(CL)?CL->STRUC_LIST:NULL;
161 seqlist=string2num_list (seq)+1;
168 if ( strncmp (CL->profile_comparison, "full", 4)==0)
171 if ( CL->profile_comparison[4])nprf=atoi ( CL->profile_comparison+4);
180 if ((method_uses_structure (M)) && profile2P_template_file (CL->S, seqlist[1]) && profile2P_template_file (CL->S, seqlist[2]))
182 RCL=profile2list (job, nprf);
184 else if ( strm (mode, "ktup_msa"))
186 RCL=hasch2constraint_list (CL->S, CL);
188 else if ( strm (mode, "test_pair") || strm ( mode,"fast_pair") || strm (mode, "ifast_pair") \
189 || strm ( mode, "diag_fast_pair")|| strm (mode, "idiag_fast_pair")\
190 || strm ( mode, "blast_pair") || strm (mode, "lalign_blast_pair") \
191 || strm ( mode, "viterbi_pair") || strm (mode, "slow_pair") || strm(mode, "glocal_pair") || strm (mode, "biphasic_pair") \
192 || strm ( mode, "islow_pair") || strm (mode, "tm_slow_pair") || strm (mode, "r_slow_pair") \
193 || strm ( mode, "lalign_id_pair")|| strm (mode, "tm_lalign_id_pair") || strm (mode , "lalign_len_pair") \
194 || strm (mode, "prrp_aln") || strm ( mode, "test_pair") \
195 || strm (mode, "cdna_fast_pair") || strm (mode, "diaa_slow_pair") || strm (mode, "monoaa_slow_pair")\
196 || strncmp (mode,"cdna_fast_pair",14)==0 \
201 RCL=aln2constraint_list ((A->A)?A->A:A, CL,weight);
204 else if ( strm ( mode, "subop1_pair") || strm ( mode, "subop2_pair") )
209 else if ( strm ( mode, "proba_pair") )
214 else if ( strm ( mode, "best_pair4prot"))
216 RCL=best_pair4prot (job);
218 else if ( strm ( mode, "best_pair4rna"))
220 RCL=best_pair4rna (job);
222 else if ( strm ( mode, "exon2_pair"))
227 sprintf ( weight2, "%s_subset_objOBJ-",weight);
228 RCL=aln2constraint_list (A, CL,weight2);
230 else if ( strm ( mode, "exon_pair"))
233 RCL=aln2constraint_list (A, CL,weight);
236 else if ( strm ( mode, "exon3_pair"))
241 sprintf ( weight2, "%s_subset_objOBJ-",weight);
242 RCL=aln2constraint_list (A, CL,weight2);
245 /*STRUCTURAL METHODS*/
247 else if ( strm (mode, "seq_msa"))
249 RCL=seq_msa(M, seq, CL);
251 /*STRUCTURAL METHODS*/
252 else if (strm (mode, "profile_pair") || strm (mode, "hh_pair"))
254 RCL=profile_pair (M, seq, CL);
257 else if ( strm (mode, "sap_pair"))
259 RCL=sap_pair (seq, weight, CL);
261 else if ( strm (mode, "thread_pair"))
263 RCL=thread_pair (M,seq, CL);
265 else if ( strm (mode, "pdb_pair"))
267 RCL=pdb_pair (M,seq, CL);
269 else if (strm (mode, "rna_pair"))
271 RCL=rna_pair(M, seq, CL);
273 else if ( strm (mode, "pdbid_pair"))
275 RCL=pdbid_pair (M,seq, CL);
277 else if ( strm (mode, "fugue_pair"))
279 RCL=thread_pair (M,seq, CL);
281 else if ( strm (mode, "lsqman_pair"))
283 RCL=lsqman_pair(seq, CL);
285 else if ( strm ( mode, "align_pdb_pair"))
287 RCL=align_pdb_pair ( seq,"gotoh_pair_wise", CL->align_pdb_hasch_mode,CL->align_pdb_param_file,CL, job);
289 else if ( strm ( mode, "lalign_pdb_pair"))
291 RCL=align_pdb_pair ( seq,"sim_pair_wise_lalign", CL->align_pdb_hasch_mode,CL->align_pdb_param_file,CL, job);
293 else if ( strm ( mode, "align_pdb_pair_2"))
295 RCL=align_pdb_pair_2 ( seq, CL);
299 fprintf ( CL->local_stderr, "\nERROR: THE FUNCTION %s DOES NOT EXIST [FATAL:%s]\n", mode, PROGRAM);crash("");
301 add_method_output2method_log (NULL,NULL, (A&&A->len_aln)?A:NULL,RCL, NULL);
302 RCL=(RCL==NULL)?CL:RCL;
310 Constraint_list *method2pw_cl (TC_method *M, Constraint_list *CL)
313 Constraint_list *PW_CL=NULL;
321 PW_CL=copy_constraint_list ( CL, SOFT_COPY);
322 PW_CL->pw_parameters_set=1;
326 S=(PW_CL)?PW_CL->S:NULL;
329 m=PW_CL->method_matrix;
330 if ( strm ((PW_CL->S)->type, "PROTEIN"))
333 sprintf ( mat, "%s", (strm(m, "default"))?"blosum62mt":m);
334 sprintf (group_mat, "vasiliky");
338 else if ( strm ((PW_CL->S)->type, "DNA") || strm ((PW_CL->S)->type, "RNA") )
341 sprintf(group_mat, "idmat");
342 sprintf ( mat, "%s", (strm(m, "default"))?"dna_idmat":m);
346 if ( M->matrix[0])sprintf ( mat, "%s", M->matrix);
348 PW_CL->M=read_matrice (mat);
351 if ( M->gop!=UNDEFINED) {PW_CL->gop=M->gop;}
354 PW_CL->gop= get_avg_matrix_mm (PW_CL->M, AA_ALPHABET)*10;
357 if ( M->gep!=UNDEFINED)PW_CL->gep=M->gep;
360 if (M->extend_seq ==1) PW_CL->extend_seq=1;
361 if (M->reverse_seq==1)PW_CL->reverse_seq=1;
364 if ( strm2 ( mode,"fast_pair", "ifast_pair"))
368 PW_CL->use_fragments=0;
369 if ( !PW_CL->use_fragments)PW_CL->diagonal_threshold=0;
370 else PW_CL->diagonal_threshold=6;
372 sprintf (PW_CL->dp_mode, "fasta_pair_wise");
373 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
375 if ( strm ( mode, "fast_pair"))
377 PW_CL->residue_index=NULL;
379 PW_CL->get_dp_cost=slow_get_dp_cost;
380 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
384 else if ( strm2 ( mode,"diag_fast_pair","idiag_fast_pair"))
386 PW_CL->residue_index=NULL;
391 PW_CL->use_fragments=1;
392 PW_CL->diagonal_threshold=3;
394 sprintf (PW_CL->dp_mode, "fasta_pair_wise");
396 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
398 PW_CL->get_dp_cost=slow_get_dp_cost;
399 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
403 else if ( strm ( mode,"blast_pair"))
405 PW_CL->residue_index=NULL;
409 PW_CL->use_fragments=0;
411 PW_CL->pair_wise=gotoh_pair_wise;
412 PW_CL->evaluate_residue_pair=evaluate_blast_profile_score;
413 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
416 else if ( strm ( mode,"lalign_blast_pair"))
418 PW_CL->residue_index=NULL;
422 PW_CL->use_fragments=0;
423 PW_CL->pair_wise=sim_pair_wise_lalign;
424 PW_CL->evaluate_residue_pair=evaluate_blast_profile_score;
425 PW_CL->lalign_n_top=10;
427 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
430 else if ( strm ( mode,"viterbi_pair"))
434 PW_CL->use_fragments=0;
435 sprintf (PW_CL->dp_mode, "viterbi_pair_wise");
436 PW_CL->residue_index=NULL;
437 PW_CL->get_dp_cost=slow_get_dp_cost;
438 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
441 else if ( strm ( mode,"glocal_pair"))
445 PW_CL->use_fragments=0;
446 sprintf (PW_CL->dp_mode, "glocal_pair_wise");
447 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
449 PW_CL->residue_index=NULL;
450 PW_CL->get_dp_cost=slow_get_dp_cost;
451 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
454 else if ( strm ( mode,"test_pair"))
458 PW_CL->use_fragments=0;
459 sprintf (PW_CL->dp_mode, "test_pair_wise");
461 else if ( strm ( mode,"sticky_pair"))
465 PW_CL->use_fragments=0;
466 PW_CL->residue_index=NULL;
467 PW_CL->get_dp_cost=cw_profile_get_dp_cost;
468 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
470 sprintf (PW_CL->dp_mode, "gotoh_pair_wise_lgp_sticky");
471 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
474 else if ( strm ( mode,"slow_pair")|| strm (mode, "islow_pair" ) )
479 PW_CL->use_fragments=0;
480 sprintf (PW_CL->dp_mode, "myers_miller_pair_wise");
481 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
483 if ( strm ( "islow_pair", mode))
485 PW_CL->get_dp_cost=slow_get_dp_cost;
486 PW_CL->evaluate_residue_pair=residue_pair_extended_list;
489 else if ( strm ("slow_pair", mode) )
491 PW_CL->residue_index=NULL;
492 PW_CL->get_dp_cost=cw_profile_get_dp_cost;
493 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
497 else if ( strm (mode, "subop1_pair"))
502 PW_CL->use_fragments=0;
503 sprintf (PW_CL->dp_mode, "subop1_pair_wise");
504 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
505 PW_CL->residue_index=NULL;
506 PW_CL->get_dp_cost=slow_get_dp_cost;
507 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
510 else if ( strm (mode, "biphasic_pair"))
514 PW_CL->use_fragments=0;
515 sprintf (PW_CL->dp_mode, "biphasic_pair_wise");
516 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
517 PW_CL->residue_index=NULL;
518 PW_CL->get_dp_cost=cw_profile_get_dp_cost;
519 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
522 else if ( strm (mode, "proba_pair"))
527 PW_CL->use_fragments=0;
528 sprintf (PW_CL->dp_mode, "proba_pair_wise");
529 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
530 PW_CL->residue_index=NULL;
531 PW_CL->get_dp_cost=slow_get_dp_cost;
532 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
536 else if ( strm (mode, "diaa_slow_pair"))
541 PW_CL->use_fragments=0;
542 sprintf (PW_CL->dp_mode, "gotoh_pair_wise_lgp");
543 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
544 PW_CL->residue_index=NULL;
545 PW_CL->get_dp_cost=slow_get_dp_cost;
546 PW_CL->evaluate_residue_pair=evaluate_diaa_matrix_score;
549 else if ( strm (mode, "r_slow_pair"))
553 PW_CL->use_fragments=0;
554 sprintf (PW_CL->dp_mode, "gotoh_pair_wise_lgp");
555 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
556 PW_CL->residue_index=NULL;
557 PW_CL->get_dp_cost=slow_get_dp_cost;
558 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
560 PW_CL->reverse_seq=1;
562 else if ( strm (mode, "tm_slow_pair"))
566 PW_CL->use_fragments=0;
567 sprintf (PW_CL->dp_mode, "myers_miller_pair_wise");
568 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
569 PW_CL->residue_index=NULL;
570 PW_CL->get_dp_cost=slow_get_dp_cost;
571 PW_CL->evaluate_residue_pair=evaluate_tm_matrix_score;
574 else if ( strm (mode, "monoaa_slow_pair"))
579 PW_CL->use_fragments=0;
580 sprintf (PW_CL->dp_mode, "gotoh_pair_wise_lgp");
581 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
582 PW_CL->residue_index=NULL;
583 PW_CL->get_dp_cost=slow_get_dp_cost;
584 PW_CL->evaluate_residue_pair=evaluate_monoaa_matrix_score;
587 else if ( strm (mode, "subop2_pair"))
592 PW_CL->use_fragments=0;
593 sprintf (PW_CL->dp_mode, "subop2_pair_wise");
594 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
595 PW_CL->residue_index=NULL;
596 PW_CL->get_dp_cost=slow_get_dp_cost;
597 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
601 else if (strm ( mode, "exon2_pair"))
607 PW_CL->use_fragments=0;
608 sprintf (PW_CL->dp_mode, "myers_miller_pair_wise");
609 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
610 PW_CL->residue_index=NULL;
612 for ( a=0; a<60; a++)
614 PW_CL->M['x'-'A'][a]=0;
615 PW_CL->M[a]['x'-'A']=0;
616 PW_CL->M['X'-'A'][a]=0;
617 PW_CL->M[a]['X'-'A']=0;
619 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
622 else if (strm ( mode, "exon3_pair"))
628 PW_CL->use_fragments=0;
629 sprintf (PW_CL->dp_mode, "myers_miller_pair_wise");
630 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
631 PW_CL->residue_index=NULL;
633 for ( a=0; a<60; a++)
635 PW_CL->M['x'-'A'][a]=0;
636 PW_CL->M[a]['x'-'A']=0;
637 PW_CL->M['X'-'A'][a]=0;
638 PW_CL->M[a]['X'-'A']=0;
640 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
643 else if (strm ( mode, "exon_pair"))
649 PW_CL->use_fragments=0;
650 sprintf (PW_CL->dp_mode, "myers_miller_pair_wise");
651 sprintf (PW_CL->matrix_for_aa_group, "%s",group_mat);
652 PW_CL->residue_index=NULL;
654 for ( a=0; a<60; a++)
656 PW_CL->M['x'-'A'][a]=0;
657 PW_CL->M[a]['x'-'A']=0;
658 PW_CL->M['X'-'A'][a]=0;
659 PW_CL->M[a]['X'-'A']=0;
661 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
664 else if ( strm ( mode , "lalign_len_pair"))
666 PW_CL->residue_index=NULL;
669 PW_CL->use_fragments=0;
670 PW_CL->pair_wise=sim_pair_wise_lalign;
671 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
672 PW_CL->get_dp_cost=slow_get_dp_cost;
673 PW_CL->lalign_n_top=CL->lalign_n_top;
674 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
677 else if ( strm ( mode , "lalign_id_pair"))
679 PW_CL->residue_index=NULL;
682 PW_CL->use_fragments=0;
683 PW_CL->pair_wise=sim_pair_wise_lalign;
684 PW_CL->evaluate_residue_pair=evaluate_matrix_score;
685 PW_CL->get_dp_cost=slow_get_dp_cost;
686 PW_CL->lalign_n_top=CL->lalign_n_top;
687 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
690 else if ( strm ( mode , "tm_lalign_id_pair"))
692 PW_CL->residue_index=NULL;
695 PW_CL->use_fragments=0;
696 PW_CL->pair_wise=sim_pair_wise_lalign;
697 PW_CL->evaluate_residue_pair=evaluate_tm_matrix_score;
698 PW_CL->get_dp_cost=slow_get_dp_cost;
699 PW_CL->lalign_n_top=CL->lalign_n_top;
700 sprintf (PW_CL->matrix_for_aa_group,"%s", group_mat);
704 else if ( strm ( mode, "cdna_cfast_pair"))
706 PW_CL->residue_index=NULL;
710 PW_CL->use_fragments=0;
711 sprintf (PW_CL->dp_mode, "cfasta_cdna_pair_wise");
713 PW_CL->M=read_matrice (strcpy ( mat, "blosum62mt"));
716 PW_CL->f_gop=CL->f_gop;
717 PW_CL->f_gep=CL->f_gep;
718 PW_CL->get_dp_cost=get_dp_cost;
719 PW_CL->evaluate_residue_pair=evaluate_cdna_matrix_score;
722 else if ( strm ( mode, "cdna_fast_pair") || strncmp (mode,"cdna_fast_pair",14)==0)
725 PW_CL->residue_index=NULL;
728 PW_CL->use_fragments=0;
729 sprintf (PW_CL->dp_mode, "fasta_cdna_pair_wise");
737 PW_CL->get_dp_cost=get_dp_cost;
738 PW_CL->evaluate_residue_pair=evaluate_cdna_matrix_score;
743 free_constraint_list (PW_CL);
748 if (!strm (CL->method_evaluate_mode, "default"))
750 choose_extension_mode (CL->method_evaluate_mode, PW_CL);
754 /******************************************************************/
755 /* MULTIPLE ALIGNMENTS */
758 /******************************************************************/
759 Alignment * compute_prrp_aln (Alignment *A, Constraint_list *CL)
766 tmpseq=vtmpnam(NULL);
767 tmpaln=vtmpnam(NULL);
770 A=seq2aln ( CL->S, A, 1);
771 output_gotoh_seq (tmpseq, A);
772 sprintf ( command, "prrp -E/dev/null -o%s -F9 %s >/dev/null", tmpaln, tmpseq);
774 if (!check_file_exists(tmpaln)){return NULL;}
775 S=get_fasta_sequence (tmpaln, NULL);
779 free_sequence (S, S->nseq);
784 Alignment *seq2clustalw_aln (Sequence *S)
786 return aln2clustalw_aln (seq2aln (S, NULL,RM_GAP), NULL);
789 Alignment * aln2clustalw_aln (Alignment *B, Constraint_list *CL)
791 char *seq=NULL,*aln=NULL, command[1000];
793 output_fasta_seq (seq=vtmpnam (NULL), B);
794 sprintf ( command, "clustalw -infile=%s -outorder=input -outfile=%s %s", seq, aln=vtmpnam (NULL), TO_NULL_DEVICE);
797 if (!check_file_exists(aln))
799 else{B->nseq=0;return main_read_aln(aln,B);}
801 Alignment * compute_tcoffee_aln_quick (Alignment *A, Constraint_list *CL)
808 tmpseq=vtmpnam(NULL);
809 tmpaln=vtmpnam(NULL);
811 if ( CL)A=seq2aln ( CL->S, A, 1);
812 output_fasta_seq (tmpseq, A);
814 sprintf ( command, "t_coffee -seq %s -very_fast -outfile %s -quiet ",tmpseq,tmpaln);
817 if (!check_file_exists(tmpaln))return NULL;
819 A=main_read_aln(tmpaln,A);
826 Alignment * compute_clustalw_aln (Alignment *A, Constraint_list *CL)
833 tmpseq=vtmpnam(NULL);
834 tmpaln=vtmpnam(NULL);
836 A=seq2aln ( CL->S, A, 1);
837 output_fasta_seq (tmpseq, A);
839 sprintf ( command, "clustalw %sinfile=%s %soutfile=%s %s",CWF, tmpseq,CWF, tmpaln,TO_NULL_DEVICE);
842 if (!check_file_exists(tmpaln))return NULL;
844 A=main_read_aln(tmpaln,A);
851 Alignment * realign_block ( Alignment *A, int col1, int col2, char *pg)
853 /*Uses pg: (pg -infile=<infile> -outfile=<outfile> to realign the block [col1 col2[
854 Only guaranteed if pg can handle empty sequences
855 set pg to NULL to use the default program
859 Alignment *L, *M, *R;
862 char command[1000], script[1000];
866 seq_name=vtmpnam(NULL);
867 aln_name=vtmpnam (NULL);
869 L=copy_aln (A, NULL);
870 M=copy_aln (A, NULL);
871 R=copy_aln (A, NULL);
873 L=extract_aln ( L, 0, col1);
874 M=extract_aln ( M, col1, col2);
875 R=extract_aln ( R, col2, A->len_aln);
876 output_fasta_seq (seq_name, M);
878 sprintf ( script, "%s", (pg==NULL)?"t_coffee":pg);
880 sprintf ( command, "%s -infile=%s -outfile=%s %s", script,seq_name, aln_name, TO_NULL_DEVICE);
881 my_system ( command);
883 M=main_read_aln (aln_name, NULL);
886 M=reorder_aln (M, L->name,L->nseq);
890 free_aln (L);free_aln (M); free_aln (R);
899 /******************************************************************/
903 /******************************************************************/
906 /******************************************************************/
910 /******************************************************************/
916 Constraint_list * align_pdb_pair_2 (char *seq, Constraint_list *CL)
922 static char *command;
923 static char *program;
925 tmp_name=vtmpnam ( NULL);
927 if ( !program)program=vcalloc ( LONG_STRING, sizeof (char));
928 if ( !command)command=vcalloc ( LONG_STRING, sizeof (char));
930 #ifndef ALIGN_PDB_4_TCOFFEE
931 if ( getenv ( "ALIGN_PDB_4_TCOFFEE")==NULL)crash ("ALIGN_PDB_4_TCOFFEE IS NOT DEFINED");
932 else sprintf ( program, "%s", (getenv ( "ALIGN_PDB_4_TCOFFEE")));
934 if ( getenv ( "ALIGN_4_TCOFFEE")==NULL)sprintf (program, "%s", ALIGN_PDB_4_TCOFFEE);
935 else sprintf ( program, "%s", (getenv ( "ALIGN_PDB_4_TCOFFEE")));
938 atoi(strtok (seq,SEPARATORS));
939 s1=atoi(strtok (NULL,SEPARATORS));
940 s2=atoi(strtok (NULL,SEPARATORS));
942 sprintf ( command , "%s -in P%s P%s -gapopen=-40 -max_delta=2.5 -gapext=0 -scale=0 -hasch_mode=hasch_ca_trace_bubble -maximum_distance=10 -output pdb_constraint_list -outfile stdout> %s%s",program, (CL->S)->file[s1], (CL->S)->file[s2], get_cache_dir(),tmp_name);
944 my_system ( command);
945 CL=read_constraint_list_file(CL, tmp_name);
954 Constraint_list *align_pdb_pair (char *seq_in, char *dp_mode,char *evaluate_mode, char *file, Constraint_list *CL, Job_TC *job)
962 Constraint_list *PWCL;
965 sprintf ( seq, "%s",seq_in);
966 atoi(strtok (seq,SEPARATORS));
967 s1=atoi(strtok (NULL,SEPARATORS));
968 s2=atoi(strtok (NULL,SEPARATORS));
972 sprintf (name1, "%s%s_%s.%s.align_pdb", get_cache_dir(),(CL->S)->name[s1], (CL->S)->name[s2], dp_mode);
973 sprintf (name2, "%s%s_%s.%s.align_pdb", get_cache_dir(),(CL->S)->name[s2], (CL->S)->name[s1], dp_mode);
976 if ( check_file_exists (name1) && is_lib(name1))CL=read_constraint_list_file(CL,name1);
977 else if ( check_file_exists (name2) && is_lib(name2))CL=read_constraint_list_file(CL,name2);
980 PWCL=set_constraint_list4align_pdb ( CL,s1,dp_mode, evaluate_mode, NULL);
981 PWCL=set_constraint_list4align_pdb ( CL,s2,dp_mode, evaluate_mode, NULL);
982 ((job->param)->TCM)->PW_CL=PWCL;
984 output_constraints (name1, "100", F);
985 CL=aln2constraint_list (F, CL, "100");
991 Constraint_list * profile_pair (TC_method *M , char *in_seq, Constraint_list *CL)
996 char *result,*prf1_file, *prf2_file;
997 Alignment *F=NULL, *A1, *A2;
1002 if ( M->executable2[0]=='\0')
1003 fprintf ( stderr, "\nERROR: profile_pair requires a method: thread_pair@EP@executable2@<method> [FATAL:%s]\n", PROGRAM);
1006 sprintf ( seq, "%s", in_seq);
1007 atoi(strtok (seq,SEPARATORS));
1008 s1=atoi(strtok (NULL,SEPARATORS));
1009 s2=atoi(strtok (NULL,SEPARATORS));
1011 A1=seq2R_template_profile(CL->S,s1);
1012 A2=seq2R_template_profile(CL->S,s2);
1015 prf1_file=vtmpnam (NULL);
1016 fp=vfopen (prf1_file, "w");
1019 fprintf (fp, ">%s\n%s\n",(CL->S)->name[s1], aln2cons_seq_mat(A1, "blosum62mt"));
1020 for ( a=0; a< A1->nseq; a++)fprintf (fp, ">prf_seq1_%d\n%s\n", a, A1->seq_al[a]);
1024 fprintf ( fp, ">%s\n%s\n", (CL->S)->name[s1], (CL->S)->seq[s1]);
1028 prf2_file=vtmpnam (NULL);
1029 fp=vfopen (prf2_file, "w");
1032 fprintf (fp, ">%s\n%s\n",(CL->S)->name[s2], aln2cons_seq_mat(A2, "blosum62mt"));
1033 for ( a=0; a< A2->nseq; a++)fprintf (fp, ">prf_seq2_%d\n%s\n", a, A2->seq_al[a]);
1037 fprintf ( fp, ">%s\n%s\n", (CL->S)->name[s2], (CL->S)->seq[s2]);
1041 result=vtmpnam (NULL);
1044 param=vcalloc(strlen (M->param)+1, sizeof (char));
1045 sprintf ( param, "%s", M->param);
1046 param=substitute ( param, " ", "");
1047 param=substitute ( param, "\n", "");
1050 sprintf ( command, "tc_generic_method.pl -mode=profile_pair -method=%s %s%s %s%s %s%s -param=%s -tmpdir=%s", M->executable2,M->in_flag,prf1_file, M->in_flag2,prf2_file,M->out_flag, result, param, get_tmp_4_tcoffee());
1052 my_system ( command);
1056 if ( !check_file_exists (result))
1058 fprintf ( stderr, "\n\tprofile_pair/%s failed:\n\t%s\n",M->executable2, command);
1059 myexit (EXIT_FAILURE);
1061 else if ( is_lib (result))
1063 CL=read_constraint_list_file(CL,result);
1065 else if ( is_aln (result))
1067 F=main_read_aln (result, NULL);
1068 char *name1, *name2;
1069 name1=(CL->S)->name[s1];
1070 name2=(CL->S)->name[s2];
1072 fp=vfopen (result, "w");
1073 for ( a=0; a< F->nseq; a++)
1074 if (strm ( F->name[a], name1) || strm (F->name[a], name2))
1075 fprintf ( fp, ">%s\n%s\n", F->name[a], F->seq_al[a]);
1078 F=main_read_aln (result, NULL);
1079 CL=aln2constraint_list (F, CL, "sim");
1084 Constraint_list * pdbid_pair (TC_method *M , char *in_seq, Constraint_list *CL)
1090 char *result, *pdb1, *pdb2;
1095 if ( M->executable2[0]=='\0')
1097 fprintf ( stderr, "\nERROR: pdbid_pair requires a structural alignment method: pdb_pair@EP@EXECUTABLE2@<method> [FATAL:%s]\n", PROGRAM);
1098 myexit (EXIT_FAILURE);
1100 sprintf ( seq, "%s", in_seq);
1103 atoi(strtok (seq,SEPARATORS));
1104 s1=atoi(strtok (NULL,SEPARATORS));
1105 s2=atoi(strtok (NULL,SEPARATORS));
1107 pdb1=seq2P_pdb_id(CL->S,s1);
1108 pdb2=seq2P_pdb_id(CL->S,s2);
1110 if (!is_pdb_name (pdb1) || !is_pdb_name(pdb2))
1116 result=vtmpnam (NULL);
1117 command = vcalloc ( 1000, sizeof (char));
1119 sprintf ( command, "tc_generic_method.pl -mode=pdbid_pair -method=%s %s%s %s%s %s%s -email=%s -cache=%s -tmpdir=%s", M->executable2,M->in_flag,pdb1, M->in_flag2,pdb2,M->out_flag, result,getenv ("EMAIL"),get_cache_dir(), get_tmp_4_tcoffee());
1120 my_system ( command);
1122 if (file_is_empty (result))return CL;
1125 F=main_read_aln (result, NULL);
1129 fprintf ( stderr, "\n\tpdb_pair/%s failed:\n\t%s\n",M->executable2, command);
1134 sprintf ( F->name[0],"%s", (CL->S)->name[s1]);
1135 sprintf ( F->name[1],"%s", (CL->S)->name[s2]);
1136 CL=aln2constraint_list (F, CL, "sim");
1143 Constraint_list * pdb_pair (TC_method *M , char *in_seq, Constraint_list *CL)
1148 char *result, *pdb1,*pdb1_file, *pdb2, *pdb2_file;
1152 char command[10000];
1154 if ( M->executable2[0]=='\0')
1156 fprintf ( stderr, "\nERROR: pdb_pair requires a structural alignment method: pdb_pair@EP@EXECUTABLE2@<method> [FATAL:%s]\n", PROGRAM);
1157 myexit (EXIT_FAILURE);
1160 sprintf ( seq, "%s", in_seq);
1163 atoi(strtok (seq,SEPARATORS));
1164 s1=atoi(strtok (NULL,SEPARATORS));
1165 s2=atoi(strtok (NULL,SEPARATORS));
1167 pdb1=seq2P_template_file(CL->S,s1);
1168 pdb2=seq2P_template_file(CL->S,s2);
1169 if ( !pdb1 || !pdb2) return CL;
1172 pdb1_file=vtmpnam (NULL);
1173 pdb2_file=vtmpnam (NULL);
1177 sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb1, pdb1_file);
1178 my_system (command);
1182 sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb2, pdb2_file);
1183 my_system (command);
1186 result=vtmpnam (NULL);
1188 sprintf ( command, "tc_generic_method.pl -mode=pdb_pair -method=%s %s%s %s%s %s%s -tmpdir=%s", M->executable2,M->in_flag,pdb1_file, M->in_flag2,pdb2_file,M->out_flag, result, get_tmp_4_tcoffee());
1189 my_system ( command);
1191 F=main_read_aln (result, NULL);
1195 fprintf ( stderr, "\n\tpdb_pair/%s failed:\n\t%s\n",M->executable2, command);
1200 sprintf ( F->name[0],"%s", (CL->S)->name[s1]);
1201 sprintf ( F->name[1],"%s", (CL->S)->name[s2]);
1202 CL=aln2constraint_list (F, CL, "sim");
1210 Constraint_list * seq_msa (TC_method *M , char *in_seq, Constraint_list *CL)
1214 char *infile, *outfile;
1219 static char *db_file;
1224 db_file=vtmpnam (NULL);
1225 fp=vfopen (db_file, "w");
1226 for (a=0; a<(CL->S)->nseq; a++)fprintf ( fp, ">%s\n%s\n", (CL->S)->name[a], (CL->S)->seq[a]);
1230 infile=vtmpnam (NULL);
1231 outfile=vtmpnam (NULL);
1233 sprintf ( seq, "%s", in_seq);
1235 n=atoi(strtok (seq,SEPARATORS));
1237 fp=vfopen (infile, "w");
1238 for ( a=0; a<n; a++)
1240 s=atoi(strtok (NULL,SEPARATORS));
1241 fprintf (fp, ">%s\n%s\n", (CL->S)->name[s], (CL->S)->seq[s]);
1245 if (strstr (M->executable2, "blast"))sprintf ( db, " -database=%s", db_file);
1248 sprintf ( command, "t_coffee -other_pg tc_generic_method.pl -mode=%s -method=%s %s %s%s %s %s%s -tmpdir=%s %s %s", M->executable, M->executable2, M->param1,M->in_flag,infile,M->param2,M->out_flag,outfile,get_tmp_4_tcoffee(),db,M->param);
1253 //HERE ("%s", command);exit (0);
1254 //sprintf ( command, "t_coffee -other_pg tc_generic_method.pl -mode=seq_msa -method=%s %s%s %s%s -tmpdir=%s %s", M->executable2, M->in_flag, infile, M->out_flag, outfile, get_tmp_4_tcoffee(), M->param);
1255 my_system (command);
1258 if ( strm (M->out_mode, "aln") || strm (M->out_mode, "A"))
1260 F=main_read_aln (outfile, NULL);
1263 fprintf ( stderr, "\n\tseq_msa/%s failed:\n\t%s\n", M->executable2,command);
1267 CL=aln2constraint_list (F, CL, "sim");
1271 else if ( strm (M->out_mode, "fL")|| strm (M->out_mode, "lib"))
1273 Constraint_list *NCL;
1274 NCL=read_constraint_list_file(CL,outfile);
1277 fprintf ( stderr, "\n\tseq_msa/%s failed:\n\t%s\n", M->executable2,command);
1286 Constraint_list * thread_pair (TC_method *M , char *in_seq, Constraint_list *CL)
1292 if ( M->executable2[0]=='\0')
1294 fprintf ( stderr, "\nERROR: thread_pair requires a threading method: pdb_pair@EP@EXECUTABLE2@<method> [FATAL:%s]\n", PROGRAM);
1295 myexit (EXIT_FAILURE);
1298 sprintf ( seq, "%s", in_seq);
1299 atoi(strtok (seq,SEPARATORS));
1300 s1=atoi(strtok (NULL,SEPARATORS));
1301 s2=atoi(strtok (NULL,SEPARATORS));
1303 CL=thread_pair2(M,s1, s2, CL);
1304 CL=thread_pair2(M,s2, s1, CL);
1311 Constraint_list* thread_pair2 ( TC_method *M, int s1, int s2, Constraint_list *CL)
1313 char *result, *pep, *pdb, *pdb1;
1317 char command[10000];
1319 STL=(CL)?CL->STRUC_LIST:NULL;
1322 if ( !(CL->S) || !((CL->S)->T[s1]) || !((CL->S)->T[s1])->P || !seq2P_template_file(CL->S,s1))return CL;
1323 else pdb1=seq2P_template_file(CL->S,s1);
1327 result=vtmpnam (NULL);
1330 sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb1, pdb);
1331 my_system (command);
1335 fp=vfopen (pep, "w");
1336 fprintf ( fp, ">%s\n%s\n",(CL->S)->name[s2],(CL->S)->seq[s2] );
1338 sprintf ( command, "tc_generic_method.pl -mode=thread_pair -method=%s %s%s %s%s %s%s -tmpdir=%s", M->executable2,M->in_flag,pep, M->in_flag2,pdb,M->out_flag, result, get_tmp_4_tcoffee());
1339 my_system ( command);
1340 F=main_read_aln (result, NULL);
1344 fprintf ( stderr, "\n\tthread_pair/%s failed:\n\t%s\n", M->executable2,command);
1348 sprintf ( F->name[0],"%s", (CL->S)->name[s1]);
1349 sprintf ( F->name[1],"%s", (CL->S)->name[s2]);
1350 CL=aln2constraint_list (F, CL, "sim");
1358 Constraint_list * lsqman_pair ( char *in_seq, Constraint_list *CL)
1361 static CLIST_TYPE *entry;
1362 char command[STRING];
1365 char *seq_file, *lsqman_result, *tmp_name1;
1369 sprintf ( seq, "%s", in_seq);
1372 if ( !entry)entry=vcalloc ( LIST_N_FIELDS, sizeof ( CLIST_TYPE ));
1374 atoi(strtok (seq,SEPARATORS));
1375 s1=atoi(strtok (NULL,SEPARATORS));
1376 s2=atoi(strtok (NULL,SEPARATORS));
1379 tmp_name1=vcalloc (100, sizeof (char));
1380 sprintf ( tmp_name1, "%s_%s.lsqman_aln", (CL->S)->name[s1], (CL->S)->name[s2]);
1381 if ( check_file_exists ( tmp_name1) && (F=main_read_aln(tmp_name1, NULL))!=NULL)
1384 lsqman_result=tmp_name1;
1389 seq_file=vtmpnam (NULL);
1390 lsqman_result=tmp_name1;
1391 fp=vfopen (seq_file, "w");
1392 fprintf ( fp, ">%s\n%s\n",(CL->S)->name[s1],(CL->S)->seq[s2] );
1394 sprintf ( command, "%s -pdb %s -pep %s > %s%s", LSQMAN_4_TCOFFEE, (CL->S)->name[s1], seq_file,get_cache_dir(), lsqman_result);
1398 my_system ( command);
1399 F=main_read_aln (lsqman_result, NULL);
1402 fprintf ( stderr, "\n\tlsqman failed: will be retried");
1403 if ( n_failure==0)fprintf ( stderr, "\n\t%s", command);
1407 fprintf ( stderr, "\nCould not run Fugue: will replace it with slow_pair\n");
1408 vremove (lsqman_result);
1419 F=main_read_aln(lsqman_result, NULL);
1420 /*sprintf ( F->name[0],"%s", (CL->S)->name[s1]);
1421 sprintf ( F->name[1],"%s", (CL->S)->name[s2]);
1423 CL=aln2constraint_list (F, CL, "100");
1428 Constraint_list * sap_pair (char *seq, char *weight, Constraint_list *CL)
1432 char full_name[FILENAMELEN];
1433 char *tmp_pdb1, *tmp_pdb2;
1434 char *sap_seq1, *sap_seq2;
1435 char *sap_lib, *tmp_name, *tmp_name1, *tmp_name2;
1437 int s1, s2, r1=0, r2=0;
1438 int sim=0, tot=0, score=0;
1440 char program[STRING];
1441 char *string1, *string2, *string3, *string4, *string5;
1442 int max_struc_len=10000;
1443 char *template1, *template2;
1446 /*check_program_is_installed ( "sap" ,SAP_4_TCOFFEE, "SAP_4_TCOFFEE",MAIL, IS_FATAL);*/
1449 atoi(strtok (seq,SEPARATORS));
1450 s1=atoi(strtok (NULL,SEPARATORS));
1451 s2=atoi(strtok (NULL,SEPARATORS));
1453 template1=seq2T_value(CL->S,s1, "template_name", "_P_");
1454 template2=seq2T_value(CL->S,s2, "template_name", "_P_");
1457 if (!template1 || !template2) return CL;
1460 declare_name (string1);
1461 declare_name (string2);
1462 declare_name (string3);
1463 declare_name (string4);
1464 declare_name (string5);
1466 #ifndef SAP_4_TCOFFEE
1467 if ( getenv ( "SAP_4_TCOFFEE")==NULL)crash ("SAP_4_TCOFFEE IS NOT DEFINED");
1468 else sprintf ( program, "%s", (getenv ( "SAP_4_TCOFFEE")));
1470 if ( getenv ( "SAP_4_TCOFFEE")==NULL)sprintf (program, "%s", SAP_4_TCOFFEE);
1471 else sprintf ( program, "%s", (getenv ( "SAP_4_TCOFFEE")));
1476 tmp_name1=vcalloc (100, sizeof (char));
1477 sprintf ( tmp_name1, "%s_%s.sap_results",template1,template2);
1478 tmp_name2=vcalloc (100, sizeof (char));
1479 sprintf ( tmp_name2, "%s_%s.sap_results",template2,template1);
1483 if (is_sap_file (tmp_name1))
1487 else if ( is_sap_file (tmp_name2))
1500 tmp_pdb1=normalize_pdb_file(seq2P_template_file(CL->S,s1),(CL->S)->seq[s1], vtmpnam (NULL));
1501 tmp_pdb2=normalize_pdb_file(seq2P_template_file(CL->S,s2),(CL->S)->seq[s2], vtmpnam (NULL));
1502 sprintf ( full_name, "%s%s", get_cache_dir (), tmp_name);
1504 //pb: sap crashes when file names are too long
1505 //solution: create shorter names
1506 //To ease server, create files in tmp dir
1508 getcwd (cdir, sizeof(char)*1000);
1509 chdir (get_cache_dir());
1511 sprintf (local1, "%d_1.%d.sap_tmp", getpid(), rand()%10000);
1512 sprintf (local2, "%d_2.%d.sap_tmp", getpid(), rand()%10000);
1513 printf_system ("cp %s %s", tmp_pdb1, local1);
1514 printf_system ("cp %s %s", tmp_pdb2, local2);
1515 printf_system ("%s %s %s >%s 2>/dev/null::IGNORE_FAILURE::",program,local1,local2, full_name);
1516 printf_system ("rm %s", local1);
1517 printf_system ("rm %s", local2);
1520 if ( !check_file_exists (full_name) || !is_sap_file(full_name))
1522 add_warning ( stderr, "SAP failed to align: %s against %s [%s:WARNING]\n", seq2P_template_file(CL->S,s1),seq2P_template_file(CL->S,s2), PROGRAM);
1523 if ( check_file_exists (full_name))add2file2remove_list (full_name);
1526 if ( flag_file2remove_is_on())add2file2remove_list (full_name);
1527 remove ("super.pdb");
1532 sap_seq1=vcalloc (max_struc_len, sizeof (char));
1533 sap_seq2=vcalloc (max_struc_len, sizeof (char));
1535 fp=find_token_in_file ( tmp_name, NULL, "Percent");
1536 fp=find_token_in_file ( tmp_name, fp , "Percent");
1537 while ( (c=fgetc (fp))!='\n' && c!=EOF)fprintf ( fp, "%c", c);
1538 while ((buf=vfgets (buf, fp)))
1541 if ( !strstr (buf, "eighted") && !strstr (buf, "RMSd"))
1543 remove_charset (buf, "!alnum");
1545 r2=buf[strlen(buf)-1];
1551 if ( tot>max_struc_len)
1552 {max_struc_len+=max_struc_len;
1553 sap_seq1=vrealloc ( sap_seq1, sizeof(char)*max_struc_len);
1554 sap_seq2=vrealloc ( sap_seq2, sizeof(char)*max_struc_len);
1562 if ( is_number (weight))score=atoi(weight);
1563 else if ( strstr ( weight, "OW"))
1566 sscanf ( weight, "OW%d", &ow);
1576 sap_seq1[tot]=sap_seq2[tot]='\0';
1579 fp=vfopen ( sap_lib=vtmpnam(NULL), "w");
1580 fprintf (fp, "! TC_LIB_FORMAT_01\n");
1581 fprintf (fp, "2\n");
1582 fprintf (fp, "%s %d %s\n", (CL->S)->name[s2],(int)strlen (sap_seq1), sap_seq1);
1583 fprintf (fp, "%s %d %s\n", (CL->S)->name[s1],(int)strlen (sap_seq2), sap_seq2);
1584 fprintf (fp, "#1 2\n");
1588 //HERE ("\n%s\n%s\n%s\n\n%s\n%s\n%s", (CL->S)->name[s1],(CL->S)->seq[s1], sap_seq1, (CL->S)->name[s2],(CL->S)->seq[s2], sap_seq2);exit (0);
1589 for ( a=0; a< tot; a++)
1591 fprintf (fp, "%d %d %d 1 0\n", a+1, a+1, score);
1594 fprintf (fp, "! CPU 0\n");
1595 fprintf (fp, "! SEQ_1_TO_N\n");
1598 CL=read_constraint_list_file(CL,sap_lib);
1601 vfree ( string1);vfree ( string2);vfree ( string3);vfree ( string4);vfree ( string5);
1602 vfree (sap_seq1); vfree(sap_seq2);vfree (tmp_name1); vfree(tmp_name2);
1609 Constraint_list *rna_pair (TC_method *M ,
1611 Constraint_list *CL)
1615 char *result, *pdb1, *pdb2, *pdb1_file, *pdb2_file;
1620 char command[10000];
1622 if ( M->executable2[0]=='\0')
1624 fprintf ( stderr, "\nERROR: rna_pair requires a structural alignment method: rna_pair@EP@EXECUTABLE2@<method> [FATAL:%s]\n", PROGRAM);
1625 myexit (EXIT_FAILURE);
1628 sprintf ( seq, "%s", in_seq);
1631 atoi(strtok (seq,SEPARATORS));
1632 s1=atoi(strtok (NULL,SEPARATORS));
1633 s2=atoi(strtok (NULL,SEPARATORS));
1635 pdb1=seq2P_template_file(CL->S,s1);
1636 pdb1_file=vtmpnam (NULL);
1637 sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb1, pdb1_file);
1638 my_system (command);
1640 pdb2=seq2P_template_file(CL->S,s2);
1641 pdb2_file=vtmpnam (NULL);
1642 sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb2, pdb2_file);
1643 my_system (command);
1645 result=vtmpnam (NULL);
1647 sprintf ( command, "tc_generic_method.pl -mode=rna_pair -method=%s %s%s %s%s %s%s -tmpdir=%s", M->executable2,M->in_flag,pdb1_file, M->in_flag2,pdb2_file,M->out_flag, result, get_tmp_4_tcoffee());
1649 my_system ( command);
1651 F=main_read_aln (result, NULL);
1656 fprintf ( stderr, "\n\trna_pair/%s failed:\n\t%s\n",M->executable2, command);
1660 sprintf ( F->name[0],"%s", (CL->S)->name[s1]);
1661 sprintf ( F->name[1],"%s", (CL->S)->name[s2]);
1662 CL=aln2constraint_list (F, CL, "sim");
1671 /******************************************************************/
1672 /* GENERIC PAIRWISE METHODS */
1675 /******************************************************************/
1679 Constraint_list * best_pair4rna(Job_TC *job)
1686 Constraint_list *PW_CL;
1687 Constraint_list *CL, *RCL;
1690 TC_method *M, *sara_pairM, *proba_pairM;
1696 struct X_template *r1, *r2, *p1, *p2;
1697 static int **blosum;
1699 if (!seq)seq=vcalloc (100, sizeof (char));
1702 M=(job->param)->TCM;
1705 for (a=0; a<S->nseq; a++)ml=MAX(ml, strlen (S->name[a]));
1708 if ( !strm ( retrieve_seq_type(), "RNA") && !strm ( retrieve_seq_type(), "DNA"))
1709 printf_exit (EXIT_FAILURE, stderr, "ERROR: RNA Sequences Only with best4rna_pair [FATAL:%s]\n",PROGRAM);
1712 seq_in=(job->param)->seq_c;
1713 sprintf (seq, "%s", seq_in);
1714 seqlist=string2num_list (seq);
1716 if ( n!=2){fprintf ( stderr, "\nERROR: best_pair can only handle two seq at a time [FATAL]\n");myexit (EXIT_FAILURE);}
1727 PW_CL=((job->param)->TCM)->PW_CL;
1730 if (!blosum)blosum=read_matrice ("blosum62mt");
1732 // id=idscore_pairseq (S->seq[s1], S->seq[s2],-10,-1,blosum, "sim");
1735 proba_pairM=method_file2TC_method(method_name2method_file ("proba_pair"));
1736 proba_pairM->PW_CL=method2pw_cl(proba_pairM, CL);
1738 sara_pairM=method_file2TC_method(method_name2method_file ("sara_pair"));
1739 sara_pairM->PW_CL=method2pw_cl(sara_pairM, CL);
1743 //Avoid Structural Tem
1746 fprintf ( stderr, "\n\t%-*s %-*s: Structure Based Alignment\n", ml,S->name[s1], ml,S->name[s2]);
1747 (job->param)->TCM=sara_pairM;
1751 fprintf ( stderr, "\n\t%-*s %-*s: Direct Sequence Alignment\n", ml,S->name[s1], ml,S->name[s2]);
1752 (job->param)->TCM=proba_pairM;
1762 Constraint_list * best_pair4prot (Job_TC *job)
1769 Constraint_list *PW_CL;
1770 Constraint_list *CL, *RCL;
1773 TC_method *M, *sap_pairM, *proba_pairM;
1779 struct X_template *r1, *r2, *p1, *p2;
1780 static int **blosum;
1782 if (!seq)seq=vcalloc (100, sizeof (char));
1785 M=(job->param)->TCM;
1788 for (a=0; a<S->nseq; a++)ml=MAX(ml, strlen (S->name[a]));
1791 if ( strm ( retrieve_seq_type(), "DNA") ||strm ( retrieve_seq_type(), "RNA") )printf_exit (EXIT_FAILURE, stderr, "ERROR: Protein Sequences Only with bestprot_pair [FATAL:%s]\n",PROGRAM);
1794 seq_in=(job->param)->seq_c;
1795 sprintf (seq, "%s", seq_in);
1796 seqlist=string2num_list (seq);
1798 if ( n!=2){fprintf ( stderr, "\nERROR: best_pair can only handle two seq at a time [FATAL]\n");myexit (EXIT_FAILURE);}
1809 PW_CL=((job->param)->TCM)->PW_CL;
1812 if (!blosum)blosum=read_matrice ("blosum62mt");
1814 id=idscore_pairseq (S->seq[s1], S->seq[s2],-10,-1,blosum, "sim");
1817 proba_pairM=method_file2TC_method(method_name2method_file ("proba_pair"));
1818 proba_pairM->PW_CL=method2pw_cl(proba_pairM, CL);
1820 sap_pairM=method_file2TC_method(method_name2method_file ("sap_pair"));
1821 sap_pairM->PW_CL=method2pw_cl(sap_pairM, CL);
1828 fprintf ( stderr, "\n\t%-*s %-*s: Direct Sequence Alignment\n", ml,S->name[s1], ml,S->name[s2]);
1829 (job->param)->TCM=proba_pairM;
1833 //Avoid Structural Tem
1836 fprintf ( stderr, "\n\t%-*s %-*s: Structure Based Alignment\n", ml,S->name[s1], ml,S->name[s2]);
1837 (job->param)->TCM=sap_pairM;
1841 fprintf ( stderr, "\n\tt%-*s %-*s: PSIBLAST Profile Alignment\n", ml,S->name[s1], ml,S->name[s2]);
1842 (job->param)->TCM=proba_pairM;
1846 fprintf ( stderr, "\n\t%-*s %-*s: Direct Sequence Alignment (No Profile)\n", ml,S->name[s1], ml,S->name[s2]);
1847 (job->param)->TCM=proba_pairM;
1858 Alignment * fast_pair (Job_TC *job)
1866 Constraint_list *PW_CL;
1867 Constraint_list *CL;
1876 M=(job->param)->TCM;
1877 PW_CL=((job->param)->TCM)->PW_CL;
1879 seq_in=(job->param)->seq_c;
1882 sprintf (seq, "%s", seq_in);
1883 seqlist=string2num_list (seq);
1885 if ( n!=2){fprintf ( stderr, "\nERROR: fast_pw_aln can only handle two seq at a time [FATAL]\n");myexit (EXIT_FAILURE);}
1889 if (!A) {A=declare_aln (CL->S);}
1892 ns=vcalloc ( 2, sizeof (int));
1893 l_s=declare_int (2,(CL->S)->nseq);
1895 buf=vcalloc ( S->nseq, sizeof (char*));
1897 for ( a=0; a< n; a++)
1900 if ( strm (M->seq_type, "G"))
1903 S->seq[s]=((((S->T[s])->G)->VG)->S)->seq[0];
1909 sprintf ( A->seq_al[a], "%s",S->seq[s]);
1910 sprintf ( A->name[a], "%s", (CL->S)->name[s]);
1922 //Preprocessing of the sequences
1923 if (PW_CL->reverse_seq)
1925 invert_string2(A->seq_al[0]);
1926 invert_string2(A->seq_al[1]);
1927 invert_string2 ((CL->S)->seq[A->order[0][0]]);
1928 invert_string2 ((CL->S)->seq[A->order[1][0]]);
1930 if (PW_CL->extend_seq)//use te alphabet extension for nucleic acids
1933 extend_seqaln (A->S,NULL);
1934 extend_seqaln (NULL,A);
1937 score=pair_wise ( A, ns, l_s, PW_CL);
1938 //PostProcessing of the sequences
1939 if (PW_CL->reverse_seq)
1942 invert_string2(A->seq_al[0]);
1943 invert_string2(A->seq_al[1]);
1944 invert_string2 ((CL->S)->seq[A->order[0][0]]);
1945 invert_string2 ((CL->S)->seq[A->order[1][0]]);
1947 if (PW_CL->extend_seq)
1949 unextend_seqaln (A->S,NULL);
1950 unextend_seqaln (NULL,A);
1954 for ( a=0; a<S->nseq; a++)
1956 if ( !buf[a] || buf[a]==S->seq[a]);
1957 else S->seq[a]=buf[a];
1959 vfree (buf);vfree (seqlist);
1963 Alignment * align_two_aln ( Alignment *A1, Alignment *A2, char *in_matrix, int gop, int gep, char *in_align_mode)
1966 Constraint_list *CL;
1971 static char *matrix;
1972 static char *align_mode;
1974 if (!matrix)matrix=vcalloc ( 100, sizeof (char));
1975 if (!align_mode)align_mode=vcalloc ( 100, sizeof (char));
1979 sprintf ( matrix, "%s", in_matrix);
1980 sprintf ( align_mode, "%s", in_align_mode);
1982 CL=vcalloc ( 1, sizeof (Constraint_list));
1983 CL->pw_parameters_set=1;
1984 CL->M=read_matrice (matrix);
1985 CL->matrices_list=declare_char (10, 10);
1987 CL->evaluate_residue_pair=evaluate_matrix_score;
1988 CL->get_dp_cost=consensus_get_dp_cost;
1996 sprintf (CL->matrix_for_aa_group, "vasiliky");
1997 CL->use_fragments=0;
1999 if ( !CL->use_fragments)CL->diagonal_threshold=0;
2000 else CL->diagonal_threshold=6;
2002 sprintf (CL->dp_mode, "%s", align_mode);
2005 A=stack_aln (A, A2);
2006 CL->S=fill_sequence_struc(A->nseq, A->seq_al,A->name);
2008 ns=vcalloc ( 2, sizeof(int));
2009 ls=declare_int ( 2,A->nseq);
2012 for ( a=0; a<ns[0]; a++)
2014 for ( a=0; a< ns[1]; a++)
2015 ls[1][a]=a+A1->nseq;
2017 A->score_aln=pair_wise (A, ns, ls,CL);
2021 S=free_constraint_list (CL);
2022 free_sequence (S,-1);
2027 static int align_two_seq_keep_case;
2028 void toggle_case_in_align_two_sequences(int value)
2030 align_two_seq_keep_case=value;
2032 Alignment * align_two_sequences ( char *seq1, char *seq2, char *in_matrix, int gop, int gep, char *in_align_mode)
2034 static Alignment *A;
2035 Constraint_list *CL;
2043 static char *matrix;
2046 static char *align_mode;
2048 if (!matrix)matrix=vcalloc ( 100, sizeof (char));
2049 if (!align_mode)align_mode=vcalloc ( 100, sizeof (char));
2050 sprintf ( align_mode, "%s", in_align_mode);
2052 CL=vcalloc ( 1, sizeof (Constraint_list));
2054 CL->pw_parameters_set=1;
2056 CL->matrices_list=declare_char (10, 10);
2059 if ( !strm (matrix, in_matrix))
2061 sprintf ( matrix,"%s", in_matrix);
2062 M=CL->M=read_matrice (matrix);
2070 if (strstr (in_align_mode, "cdna"))
2071 CL->evaluate_residue_pair=evaluate_cdna_matrix_score;
2073 CL->evaluate_residue_pair=evaluate_matrix_score;
2075 CL->get_dp_cost=get_dp_cost;
2081 sprintf (CL->matrix_for_aa_group, "vasiliky");
2082 CL->use_fragments=0;
2084 if ( !CL->use_fragments)CL->diagonal_threshold=0;
2085 else CL->diagonal_threshold=6;
2087 sprintf (CL->dp_mode, "%s", align_mode);
2089 seq_array=declare_char ( 2, MAX(strlen(seq1), strlen (seq2))+1);
2090 sprintf (seq_array[0], "%s",seq1);
2091 sprintf (seq_array[1],"%s", seq2);
2092 ungap_array(seq_array,2);
2093 if (align_two_seq_keep_case !=KEEP_CASE)string_array_lower(seq_array,2);
2095 name_array=declare_char (2, STRING);
2096 sprintf ( name_array[0], "A");
2097 sprintf ( name_array[1], "B");
2100 ns=vcalloc ( 2, sizeof(int));
2101 l_s=declare_int ( 2, 1);
2108 CL->S=fill_sequence_struc(2, seq_array, name_array);
2110 A=seq2aln(CL->S, NULL, 1);
2112 ungap (A->seq_al[0]);
2113 ungap (A->seq_al[1]);
2117 A->score_aln=pair_wise (A, ns, l_s,CL);
2121 free_char (name_array, -1);free_char ( seq_array,-1);
2124 S=free_constraint_list (CL);
2125 free_sequence (S,-1);
2131 NT_node make_root_tree ( Alignment *A,Constraint_list *CL,int gop, int gep,Sequence *S, char *tree_file,int maximise)
2134 T=make_tree (A, CL, gop, gep,S,tree_file,maximise);
2135 (T[3][0])->nseq=S->nseq;
2138 NT_node ** make_tree ( Alignment *A,Constraint_list *CL,int gop, int gep,Sequence *S, char *tree_file,int maximise)
2144 char **out_seq_name;
2148 if ( !CL || !CL->tree_mode || !CL->tree_mode[0])
2150 fprintf ( stderr, "\nERROR: No CL->tree_mode specified (make_tree::util_dp_drivers.c [FATAL:%s]", PROGRAM);
2151 myexit (EXIT_FAILURE);
2154 fprintf (CL->local_stderr , "\nMAKE GUIDE TREE \n\t[MODE=%s][",CL->tree_mode);
2162 fprintf (CL->local_stderr, "---Two Sequences Only: Make Dummy Pair-Tree ---]");
2164 fp=vfopen (tmp,"w");
2165 fprintf ( fp, "(%s:0.1, %s:0.1):0.1;\n",S->name[0], S->name[1]);
2167 T=read_tree (tmp, &tot_node, (CL->S)->nseq,(CL->S)->name);
2171 else if ( strm (CL->tree_mode, "cwph"))
2173 return seq2cw_tree ( S, tree_file);
2176 else if (strm ( CL->tree_mode, "upgma") || strm ( CL->tree_mode, "nj"))
2179 out_seq_name=S->name;
2182 CL->DM=cl2distance_matrix (CL, NOALN,NULL,NULL,0);
2186 /*Shrink the distance matrix so that it only contains the required sequences*/
2187 distances=declare_int (S->nseq, S->nseq);
2188 for (a=0; a< S->nseq; a++)
2190 ra=name_is_in_list ((S)->name[a],(CL->S)->name, (CL->S)->nseq, 100);
2191 for ( b=0; b< S->nseq; b++)
2193 rb=name_is_in_list ((S)->name[b],(CL->S)->name, (CL->S)->nseq, 100);
2194 distances[a][b]=(CL->DM)->score_similarity_matrix[ra][rb];
2200 distances=duplicate_int ( (CL->DM)->score_similarity_matrix, -1, -1);
2205 distances=sim_array2dist_array (distances, MAXID*SCORE_K);
2206 distances=normalize_array (distances, MAXID*SCORE_K, 100);
2207 if ( strm (CL->tree_mode, "order"))
2209 for ( a=0; a< S->nseq; a++)
2210 for ( b=0; b< S->nseq; b++)
2211 distances[b][a]=100;
2212 T=make_nj_tree (A,distances,gop,gep,out_seq,out_seq_name,out_nseq, tree_file, CL->tree_mode);
2214 else if ( strm (CL->tree_mode, "nj"))
2216 T=make_nj_tree (A,distances,gop,gep,out_seq,out_seq_name,out_nseq, tree_file, CL->tree_mode);
2218 else if ( strm (CL->tree_mode, "upgma"))
2219 T=make_upgma_tree (A,distances,gop,gep,out_seq,out_seq_name,out_nseq, tree_file, CL->tree_mode);
2222 printf_exit (EXIT_FAILURE, stderr, "ERROR: %s is an unknown tree computation mode [FATAL:%s]", CL->tree_mode, PROGRAM);
2224 free_int (distances, out_nseq);
2228 fprintf (CL->local_stderr , "DONE]\n");
2233 Alignment *recompute_local_aln (Alignment *A, Sequence *S,Constraint_list *CL, int scale, int gep)
2239 sort_constraint_list (CL, 0, CL->ne);
2240 coor=declare_int (A->nseq, 3);
2241 for ( a=0; a< A->nseq; a++)
2243 coor[a][0]=A->order[a][0];
2244 coor[a][1]=A->order[a][1]+1;
2245 coor[a][2]=strlen(S->seq[A->order[a][0]])-coor[a][1];
2247 B=stack_progressive_nol_aln_with_seq_coor(CL,0,0,S,coor,A->nseq);
2255 Alignment *stack_progressive_nol_aln_with_seq_coor(Constraint_list *CL,int gop, int gep,Sequence *S, int **seq_coor, int nseq)
2258 static int ** local_coor1;
2259 static int ** local_coor2;
2260 if ( local_coor1!=NULL)free_int (local_coor1, -1);
2261 if ( local_coor2!=NULL)free_int (local_coor2, -1);
2263 local_coor1=get_nol_seq ( CL,seq_coor, nseq, S);
2264 local_coor2=minimise_repeat_coor ( local_coor1, nseq, S);
2266 return stack_progressive_aln_with_seq_coor(CL,gop, gep,S, local_coor2,nseq);
2270 Alignment *stack_progressive_aln_with_seq_coor (Constraint_list*CL,int gop, int gep, Sequence *S, int **coor, int nseq)
2274 A=seq_coor2aln (S,NULL, coor, nseq);
2276 return stack_progressive_aln ( A,CL, gop, gep);
2279 Alignment *est_progressive_aln(Alignment *A, Constraint_list *CL, int gop, int gep)
2285 n_groups=vcalloc ( 2, sizeof (int));
2286 group_list=declare_int ( 2, A->nseq);
2296 fprintf ( stderr, "\n");
2297 for ( a=1; a<n; a++)
2299 sprintf ( A->seq_al[1], "%s", A->seq_al[a]);
2300 fprintf ( stderr, "\t[%30s]->[len=%5d]", A->name[a],(int)strlen ( A->seq_al[0]));
2301 pair_wise ( A,n_groups, group_list, CL);
2303 seq=dna_aln2cons_seq(A);
2305 sprintf ( A->seq_al[0], "%s", seq);
2307 fprintf ( stderr, "\n");
2314 void analyse_seq ( Alignment *A, int s)
2326 for ( a=0; a< A->len_aln; a++)
2328 for ( b=0, c=0; b< s; b++)
2329 if ( !is_gap(A->seq_al[b][a])){c=1; break;}
2331 r=!is_gap(A->seq_al[s][a]);
2333 if ( r && c) state=1;
2334 else if ( !r && !c) state=2;
2335 else if ( !r && c) state=3;
2336 else if ( r && !c) state=4;
2338 if ( state !=pstate)
2346 score=score/(float)(((A->S)->len[s]*(A->S)->len[s]));
2347 fprintf ( stderr, "[%.2f]", score);
2352 Alignment *realign_aln ( Alignment*A, Constraint_list *CL)
2356 A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq);
2358 ns=vcalloc (2, sizeof(int));
2359 ls=declare_int ( 2, A->nseq);
2361 for (a=0; a< A->nseq; a++)
2364 for (c=0,b=0; b<A->nseq; b++)if (b!=a)ls[0][c++]=b;
2365 ungap_sub_aln ( A, ns[0], ls[0]);
2369 ungap_sub_aln ( A, ns[1], ls[1]);
2370 A->score_aln=pair_wise (A, ns, ls,CL);
2373 vfree (ns); free_int (ls, -1);
2377 Alignment *realign_aln_random_bipart ( Alignment*A, Constraint_list *CL)
2384 ns=vcalloc (2, sizeof (int));
2385 l_s=declare_int (2,A->nseq);
2387 for ( a=0; a< A->nseq; a++)
2393 fprintf ( stderr, "\n");
2394 ungap_sub_aln ( A, ns[0], l_s[0]);
2395 ungap_sub_aln ( A, ns[1], l_s[1]);
2399 for (b=0; b<ns[a]; b++)
2400 fprintf ( stderr, "[%d-%d]", a, l_s[a][b]);
2402 A->score_aln=pair_wise (A, ns, l_s,CL);
2404 vfree(ns);free_int(l_s, -1);
2407 Alignment *realign_aln_random_bipart_n ( Alignment*A, Constraint_list *CL, int n)
2415 if (n>=A->nseq)n=A->nseq/2;
2416 used=vcalloc (A->nseq, sizeof (int));
2421 if (!used[p]){used[p]=1;c++;}
2423 ns=vcalloc (2, sizeof (int));
2424 ls=declare_int (2,A->nseq);
2427 for (b=0; b<A->nseq; b++)
2428 if (used[b]==a)ls[a][ns[a]++]=b;
2430 ungap_sub_aln ( A, ns[0], ls[0]);
2431 ungap_sub_aln ( A, ns[1], ls[1]);
2434 A->score_aln=pair_wise (A, ns, ls,CL);
2435 vfree(ns);free_int(ls, -1);vfree (used);
2438 int ** seq2ecl_mat (Constraint_list *CL);
2439 int ** seq2ecl_mat (Constraint_list *CL)
2448 ns=vcalloc (2, sizeof (int));
2449 ls=declare_int ((CL->S)->nseq, 2);
2451 A=seq2aln (CL->S,NULL, RM_GAP);
2453 dm=declare_int (n, n);
2454 for (a=0; a<(CL->S)->nseq-1; a++)
2455 for (b=a+1; b<(CL->S)->nseq; b++)
2460 ungap (A->seq_al[a]);
2461 ungap (A->seq_al[b]);
2462 dm[a][b]=dm[b][a]=linked_pair_wise (A, ns, ls, CL);
2467 Alignment *realign_aln_clust ( Alignment*A, Constraint_list *CL)
2473 static int **rm, **dm, **target;
2480 free_int (dm, -1); free_int (rm, -1);free_int (target, -1);
2485 if (!rm)rm=seq2ecl_mat(CL);
2486 if (!dm)dm=declare_int (A->nseq, A->nseq);
2487 if (!target)target=declare_int (A->nseq*A->nseq, 3);
2489 ns=vcalloc (2, sizeof (int));
2490 ls=declare_int (2,A->nseq);
2493 for (a=0; a<A->nseq-1; a++)
2494 for (b=a+1; b<A->nseq; b++)
2499 score=sub_aln2ecl_raw_score (A, CL, ns[0], ls[0]);
2500 dm[a][b]=dm[b][a]=MAX(0,(rm[a][b]-score));
2502 for (n=0,a=0; a<A->nseq; a++)
2504 for (b=a; b<A->nseq; b++, n++)
2509 for ( c=0; c<A->nseq; c++)
2511 if (c!=a && c!=b)target[n][2]+=dm[a][c]+dm[b][c];
2515 sort_int_inv (target,3, 2, 0, n-1);
2517 for (a=0; a<A->nseq; a++)
2519 if (target[a][0]==target[a][1])
2522 ls[0][0]=target[a][0];
2527 ls[0][0]=target[a][0]; ls[0][1]=target[a][1];
2530 for (ns[1]=0,b=0; b<A->nseq; b++)
2532 if (b!=target[a][0] && b!=target[a][1])ls[1][ns[1]++]=b;
2535 ungap_sub_aln (A, ns[0], ls[0]);
2536 ungap_sub_aln (A, ns[1], ls[1]);
2538 A->score_aln=pair_wise (A, ns, ls,CL);
2539 fprintf ( stderr, "\nSEQ: %d %d SCORE=%d\n",target[a][0],target[a][1], aln2ecl_raw_score(A, CL));
2544 int get_best_group ( int **used, Constraint_list *CL);
2545 int seq_aln_thr1(Alignment *A, int **used, int threshold, Constraint_list *CL);
2546 int seq_aln_thr2( Alignment*A, int **used, int threshold, int g, Constraint_list *CL);
2548 int get_best_group ( int **used, Constraint_list *CL)
2550 int a,b,c,d,n, tot,stot, best_tot, best_seq, nseq;
2555 nseq=((CL->S)->nseq);
2557 for (a=0; a<nseq; a++)
2559 if ( used[a][0]==-1)continue;
2560 for ( tot=0,b=0; b< nseq; b++)
2562 if ( a==b) continue;
2563 if ( used[b][0]==-1)continue;
2568 for (stot=0, n=0,c=0; c<ns[0]; c++)
2569 for (d=0; d<ns[1]; d++, n++)
2571 stot+=(CL->DM)->similarity_matrix[ls[0][c]][ls[1][d]];
2587 Alignment * seq2aln_group (Alignment *A, int N, Constraint_list *CL)
2593 char *list, **list2;
2597 fprintf (CL->local_stderr, "\n##### DPA ##### Compute Fast Alignment");
2598 A=iterative_tree_aln (A,1, CL);
2599 fprintf (CL->local_stderr, "\n##### DPA ##### Identify Nodes");
2600 P=make_root_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1);
2601 set_node_score (A, P, "idmat_sim");
2602 fprintf (CL->local_stderr, "\n##### DPA ##### Split Nodes");
2603 list=split_nodes_nseq (A,P,N, list=vcalloc (P->nseq*200, sizeof (char)));
2605 list2=string2list (list);
2606 fprintf (CL->local_stderr, "\n##### DPA ##### Save Nodes");
2609 for (a=1; a<atoi (list2[0]); a++)
2611 A->A=main_read_aln(list2[a], NULL);
2614 fprintf (CL->local_stderr, "\n##### DPA ##### Finished");
2615 vfree (list); free_char (list2, -1);
2629 Alignment * seq_aln ( Alignment*A, int n,Constraint_list *CL)
2632 int **used, a, t,n1, nseq;
2635 n1=nseq=(CL->S)->nseq;
2636 used=declare_int (nseq, nseq+3);
2639 for (a=0; a< nseq; a++)
2646 for (t=50; t>=0 && nseq>1; t-=5)
2648 nseq=seq_aln_thr1 (A, used,t, CL);
2655 int seq_aln_thrX(Alignment *A, int **used, int threshold, Constraint_list *CL)
2658 seq_aln_thr1(A,used,threshold,CL);
2659 for ( a=0; a< (CL->S)->nseq; a++)
2660 n+=(used[a][1]>0)?1:0;
2664 int seq_aln_thr1(Alignment *A, int **used, int threshold, Constraint_list *CL)
2666 int a,g, nseq, n_groups;
2669 g=get_best_group(used, CL);
2675 while ( seq_aln_thr2 (A, used, threshold,g, CL)!=0)
2677 g=get_best_group (used, CL);
2681 for (n_groups=0,a=0; a< nseq; a++)
2691 int seq_aln_thr2( Alignment*A, int **used, int threshold, int g, Constraint_list *CL)
2695 int nseq, n_members;
2700 nseq=((CL->S)->nseq);
2705 for ( a=0; a< nseq; a++)
2713 ungap_sub_aln (A, ns[0], ls[0]);
2714 ungap_sub_aln (A, ns[1], ls[1]);
2716 A->score_aln=pair_wise (A, ns, ls,CL);
2718 for (sim=0,b=0; b<ns[0]; b++)
2720 for (c=0; c<ns[1]; c++)
2722 sim+=generic_get_seq_sim (A->seq_al[ls[0][b]], A->seq_al[ls[1][c]], NULL,"idmat_sim2");
2725 sim/=(double)(ns[0]*ns[1]);
2730 for (d=0; d<ns[1]; d++)
2731 ls[0][ns[0]++]=ls[1][d];
2741 if (n_members>0)used[g][0]=-1;
2744 /****************************************************************************/
2747 /* Alignment Methods */
2750 /****************************************************************************/
2752 Alignment * tsp_aln (Alignment *A, Constraint_list *CL, Sequence *S)
2759 A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq);
2760 ns=vcalloc (2, sizeof (int));
2761 ls=declare_int (2, (CL->S)->nseq);
2762 used=declare_int ( A->nseq, 2);
2765 CL->DM=cl2distance_matrix (CL, NOALN,NULL,NULL,0);
2766 distances=declare_int (A->nseq+1, A->nseq+1);
2767 distances=duplicate_int ( (CL->DM)->score_similarity_matrix, -1, -1);
2769 for (a=0; a< A->nseq; a++)
2772 for (b=0; b< A->nseq; b++)
2774 used[a][1]+=distances[a][b];
2778 sort_int_inv (used,2,1,0,(CL->S)->nseq-1);
2780 ls[0][ns[0]++]=used[0][0];
2783 for (a=1; a< S->nseq; a++)
2785 fprintf ( stderr, "\n%s %d", (CL->S)->name[used[a][0]], used[a][1]);
2786 ls[1][0]=used[a][0];
2787 pair_wise ( A,ns,ls, CL);
2788 ls[0][ns[0]++]=used[a][0];
2791 A->nseq=(CL->S)->nseq;
2796 Alignment *stack_progressive_aln(Alignment *A, Constraint_list *CL, int gop, int gep)
2804 sprintf ( dp_mode, "%s", CL->dp_mode);
2805 sprintf (CL->dp_mode, "gotoh_pair_wise");
2807 n_groups=vcalloc ( 2, sizeof (int));
2808 group_list=declare_int ( 2, A->nseq);
2812 for ( a=0; a<n; a++)ungap(A->seq_al[a]);
2813 for ( a=1; a<n; a++)
2817 group_list[0][a-1]=a-1;
2818 group_list[1][0] =a;
2820 pair_wise ( A,n_groups, group_list, CL);
2821 fprintf ( stderr, "\n\t[%d]->[%d]", a,(int)strlen ( A->seq_al[0]));
2823 fprintf (stderr, "\n");
2825 free_int ( group_list, -1);
2826 sprintf (CL->dp_mode, "%s",dp_mode);
2830 Alignment *realign_aln_clust ( Alignment*A, Constraint_list *CL);
2831 Alignment *realign_aln_random_bipart_n ( Alignment*A, Constraint_list *CL, int n);
2832 Alignment *iterate_aln ( Alignment*A, int nit, Constraint_list *CL)
2836 int score, iscore, delta;
2837 fprintf ( CL->local_stderr, "Iterated Refinement: %d cycles START: score= %d\n", nit,iscore=aln2ecl_raw_score (A, CL) );
2840 if ( nit==-1)nit=A->nseq*2;
2841 if ( A->len_aln==0)A=very_fast_aln (A, A->nseq, CL);
2842 A=reorder_aln (A,(CL->S)->name, A->nseq);
2844 for (it=0; it< nit; it++)
2846 //CL->local_stderr=output_completion (CL->local_stderr,it, nit,1, "");
2847 if (mode==0)A=realign_aln (A, CL);
2848 else if (mode ==1)A=realign_aln_random_bipart (A, CL);
2849 else if (mode ==2)A=realign_aln_clust (A, CL);
2850 else if (mode ==3)A=realign_aln_random_bipart_n (A, CL,2);
2853 score=aln2ecl_raw_score (A, CL);
2855 fprintf (CL->local_stderr, "\n\tIteration Cycle: %d Score=%d Improvement= %d", it+1,score, delta);
2857 fprintf ( CL->local_stderr, "\nIterated Refinement: Completed Improvement=%d\n", delta);
2861 int get_next_best (int seq, int nseq, int *used, int **dm);
2862 int get_next_best (int seq, int nseq, int *used, int **dm)
2864 int a,set, d, bd, bseq;
2866 for (set=0,a=0; a< nseq; a++)
2868 if (used[a] || seq==a)continue;
2879 Alignment * full_sorted_aln (Alignment *A, Constraint_list *CL)
2882 A=sorted_aln_seq (0, A, CL);
2884 for (a=1; a<A->nseq; a++)
2886 A=A->A=copy_aln (A, NULL);
2887 for (b=0; b<A->nseq; b++)ungap(A->seq_al[b]);
2888 A=sorted_aln_seq (a, A, CL);
2893 Alignment * sorted_aln (Alignment *A, Constraint_list *CL)
2895 return sorted_aln_seq (-1, A, CL);
2897 Alignment * sorted_aln_seq (int new_seq, Alignment *A, Constraint_list *CL)
2900 int *ns, **ls, **score, *used, **dm;
2903 dm=(CL->DM)->score_similarity_matrix;
2905 score=declare_int (nseq, 3);
2906 used=vcalloc (nseq, sizeof (int));
2907 ls=declare_int (2, nseq);
2908 ns=vcalloc (2, sizeof (int));
2913 for (a=0; a<nseq; a++)
2917 for ( b=0; b<nseq; b++)
2918 score[a][2]+=dm[a][b];
2920 sort_int ( score,3, 2, 0, nseq-1);
2921 old_seq=new_seq=score[nseq-1][0];
2924 for (a=1; a< nseq; a++)
2927 ls[0][ns[0]++]=new_seq;
2929 ls[1][0]=get_next_best(new_seq,nseq, used,dm);
2933 A->score_aln=pair_wise (A, ns, ls,CL);
2939 Alignment * ungap_aln4tree (Alignment *A);
2940 Alignment * ungap_aln4tree (Alignment *A)
2942 int t, n, max_sim, sim;
2952 B=copy_aln (A, NULL);
2953 B=ungap_aln_n(B, n);
2956 sim=aln2sim (B, "idmat");
2957 while (B->len_aln<t && sim>max_sim && n>0)
2961 B=ungap_aln_n(B, n);
2962 sim=aln2sim (B, "idmat");
2964 if ( B->len_aln<t && sim>max_sim)B=copy_aln (A, B);
2970 Alignment * iterative_tree_aln (Alignment *A,int n, Constraint_list *CL)
2975 T=make_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1);
2976 tree_aln ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL);
2977 for ( a=0; a< n; a++)
2982 B=copy_aln (A, NULL);
2983 B=ungap_aln_n (B, 20);
2984 sprintf ( CL->distance_matrix_mode, "aln");
2986 CL->DM=cl2distance_matrix ( CL,B,NULL,NULL, 1);
2990 T=make_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1);
2992 tree_aln ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL);
2997 Alignment *profile_aln (Alignment *A, Constraint_list *CL)
3003 ls=declare_int (2, nseq2);
3004 ns=vcalloc (2, sizeof (int));
3006 A=realloc_aln2(A,nseq2, A->len_aln);
3007 for (a=0; a< nseq; a++)
3009 for ( a=0; a<nseq;a++)
3011 sprintf (A->seq_al[a+nseq], "%s", (CL->S)->seq[a]);
3012 sprintf (A->name[a+nseq], "%s", (CL->S)->name[a]);
3013 A->order[a+nseq][0]=a;
3017 for (a=0; a<nseq; a++)
3021 A->score_aln=pair_wise (A, ns, ls,CL);
3023 ls[0][ns[0]++]=a+nseq;
3025 for (a=0; a< nseq; a++)
3027 sprintf (A->seq_al[a], "%s", A->seq_al[a+nseq]);
3033 Alignment * iterative_aln ( Alignment*A, int n,Constraint_list *CL)
3035 int *ns,**ls, **score, **dm;
3037 ls=declare_int (2, A->nseq);
3038 ns=vcalloc (2, sizeof (int));
3046 score=declare_int (nseq,2);
3047 dm=(CL->DM)->score_similarity_matrix;
3048 for (a=0; a<nseq; a++)
3051 for ( b=0; b<nseq; b++)
3052 score[a][1]+=dm[a][b];
3055 sort_int ( score,2, 1, 0, nseq-1);
3058 for (a=0; a<max; a++)
3061 for (ns[0]=0,b=0; b<nseq; b++)
3062 if (b!=score[a][0])ls[0][ns[0]++]=b;
3064 fprintf (stderr, "[%s %s %d]",(CL->S)->name[score[a][0]],A->name[score[a][0]], score[a][1]);
3066 ls[1][0]=score[a][0];
3067 ungap_sub_aln ( A, ns[0], ls[0]);
3068 ungap_sub_aln ( A, ns[1], ls[1]);
3069 A->score_aln=pair_wise (A, ns, ls,CL);
3075 Alignment *simple_progressive_aln (Sequence *S, NT_node **T, Constraint_list *CL, char *mat)
3081 A=seq2aln (S, NULL, RM_GAP);
3086 CL=declare_constraint_list (S, NULL, NULL, 0, NULL, NULL);
3087 sprintf ( CL->dp_mode, "myers_miller_pair_wise");
3088 sprintf ( CL->tree_mode, "nj");
3089 sprintf ( CL->distance_matrix_mode, "idscore");
3090 CL=choose_extension_mode ("matrix", CL);
3093 if (mat)CL->M=read_matrice (mat);
3094 CL->pw_parameters_set=1;
3095 CL->local_stderr=stderr;
3098 if ( !T)T=make_tree (A, CL, CL->gop, CL->gep,S, NULL,MAXIMISE);
3099 for ( a=0; a< A->nseq; a++)ungap (A->seq_al[a]);
3101 tree_aln ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL);
3102 A=reorder_aln ( A,A->tree_order,A->nseq);
3107 Alignment *very_fast_aln ( Alignment*A, int nseq, Constraint_list *CL)
3109 char command[10000];
3114 if ( CL && CL->local_stderr)fp=CL->local_stderr;
3117 fprintf (fp, "\n[Computation of an Approximate MSA...");
3118 tmp_seq= vtmpnam (NULL);
3119 tmp_aln= vtmpnam (NULL);
3120 output_fasta_seq ((tmp_seq=vtmpnam (NULL)), A);
3121 sprintf ( command, "t_coffee -infile=%s -special_mode quickaln -outfile=%s %s -outorder=input", tmp_seq, tmp_aln, TO_NULL_DEVICE);
3122 my_system ( command);
3124 A=main_read_aln (tmp_aln,A);
3125 fprintf (fp, "]\n");
3129 static NT_node* SNL;
3130 NT_node* tree_aln ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL)
3137 if ( strm ((CL->TC)->use_seqan, "NO"))
3141 if (!tmp)tmp=vtmpnam(NULL);
3142 if ( CL && CL->dp_mode && strstr (CL->dp_mode, "collapse"))dump_constraint_list (CL, tmp, "w");
3143 T=local_tree_aln (LT, RT, A, nseq, CL);
3145 if ( CL && CL->dp_mode && strstr (CL->dp_mode, "collapse"))
3147 empty_constraint_list (CL);
3148 undump_constraint_list (CL, tmp);
3153 else return seqan_tree_aln (LT, RT, A, nseq, CL);
3157 NT_node* seqan_tree_aln ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL)
3164 char *tree, *lib, *seq, *new_aln;
3168 tree=vtmpnam (NULL);
3169 print_newick_tree (LT->parent, tree);
3173 main_output_fasta_seq (seq=vtmpnam (NULL),B=seq2aln (CL->S,NULL,RM_GAP), NO_HEADER);
3177 new_aln=vtmpnam (NULL);
3178 vfclose (save_constraint_list ( CL, 0, CL->ne,lib=vtmpnam(NULL), NULL, "ascii",CL->S));
3180 fprintf (CL->local_stderr, "\n********* USE EXTERNAL ALIGNER: START:\n\tCOMMAND: %s -lib %s -seq %s -usetree %s -outfile %s\n", (CL->TC)->use_seqan,lib, seq, tree, new_aln);
3181 printf_system ( "%s -lib %s -seq %s -usetree %s -outfile %s", (CL->TC)->use_seqan,lib, seq, tree, new_aln);
3182 fprintf (CL->local_stderr, "\n********* USE EXTERNAL ALIGNER: END\n");
3185 main_read_aln (new_aln, A);
3186 return tree2ao (LT,RT, A, A->nseq, CL);
3191 NT_node rec_local_tree_aln ( NT_node P, Alignment*A, Constraint_list *CL, int print);
3192 NT_node* local_tree_aln ( NT_node l, NT_node r, Alignment*A,int nseq, Constraint_list *CL)
3198 if (!r && !l) return NULL;
3203 fprintf ( CL->local_stderr, "\nPROGRESSIVE_ALIGNMENT [Tree Based]\n");
3204 for ( a=0; a<nseq; a++)fprintf (CL->local_stderr,"Group %4d: %s\n",a+1, A->name[a]);
3205 //1: make sure the Alignment and the Sequences are labeled the same way
3206 if (CL->translation)vfree (CL->translation);
3207 CL->translation=vcalloc ( (CL->S)->nseq, sizeof (int));
3208 for ( a=0; a< (CL->S)->nseq; a++)
3209 CL->translation[a]=name_is_in_list ( (CL->S)->name[a], (CL->S)->name, (CL->S)->nseq, MAXNAMES);
3210 A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq);
3211 A->nseq=(CL->S)->nseq;
3213 //2 Make sure the tree is in the same order
3214 recode_tree (P, (CL->S));
3216 initialize_scoring_scheme (CL);
3218 if ( get_nproc()>1 && strstr (CL->multi_thread, "msa"))
3222 max_fork=get_nproc()/2;//number of nodes forked, one node =>two jobs
3224 NL=tree2node_list (P, NULL);
3225 min=declare_int (P->node+1,3);
3226 for (a=0; a<=P->node; a++)
3232 else if (N && N->nseq==1)min[a][1]=0;
3235 min[a][1]=MIN(((N->left)->nseq),((N->right)->nseq))*A->nseq+MAX(((N->left)->nseq),((N->right)->nseq));//sort on min and break ties on max
3236 min[a][2]=MIN(((N->left)->nseq),((N->right)->nseq));
3239 sort_int_inv (min,3, 1, 0, P->node);
3240 for (a=0; a<=P->node && a<max_fork; a++)
3242 if (min[a][2]>1)(NL[min[a][0]])->fork=1;
3246 rec_local_tree_aln (P, A,CL, 1);
3247 for (a=0; a<P->nseq; a++)sprintf (A->tree_order[a], "%s", (CL->S)->name[P->lseq[a]]);
3248 A->len_aln=strlen (A->seq_al[0]);
3250 fprintf ( CL->local_stderr, "\n\n");
3255 NT_node rec_local_tree_aln ( NT_node P, Alignment*A, Constraint_list *CL,int print)
3261 if (!P || P->nseq==1) return NULL;
3262 R=P->right;L=P->left;
3268 tmp1=vtmpnam (NULL);
3269 tmp2=vtmpnam (NULL);
3275 if (L->nseq>R->nseq)print=1;
3277 initiate_vtmpnam (NULL);
3278 rec_local_tree_aln (L, A, CL, print);
3279 dump_msa (tmp1,A, L->nseq, L->lseq);
3280 myexit (EXIT_SUCCESS);
3288 if (L->nseq>R->nseq)print=0;
3291 initiate_vtmpnam (NULL);
3292 rec_local_tree_aln (R, A, CL, print);
3293 dump_msa (tmp2, A, R->nseq, R->lseq);
3294 myexit (EXIT_SUCCESS);
3297 vwaitpid (pid1, &s, 0);
3298 vwaitpid (pid2, &s, 0);
3301 undump_msa (A,tmp1);
3302 undump_msa (A,tmp2);
3306 rec_local_tree_aln (L, A, CL, print);
3307 rec_local_tree_aln (R, A, CL, print);
3310 P->score=A->score_aln=score=profile_pair_wise (A,L->nseq, L->lseq,R->nseq,R->lseq,CL);
3311 score=node2sub_aln_score (A, CL, CL->evaluate_mode,P);
3312 A->len_aln=strlen (A->seq_al[P->lseq[0]]);
3314 if (print)fprintf(CL->local_stderr, "\n\tGroup %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->[Len=%5d][PID:%d]%s",P->index,R->index,R->nseq,L->index,L->nseq, A->len_aln,getpid(),(P->fork==1)?"[Forked]":"" );
3321 NT_node* tree2ao ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL)
3327 static int n_groups_done, do_split=0;
3337 if (n_groups_done==0)
3340 SNL=vcalloc ( (CL->S)->nseq, sizeof (NT_node));
3342 if (CL->translation)vfree(CL->translation);
3343 CL->translation=vcalloc ( (CL->S)->nseq, sizeof (int));
3345 for ( a=0; a< (CL->S)->nseq; a++)
3346 CL->translation[a]=name_is_in_list ( (CL->S)->name[a], (CL->S)->name, (CL->S)->nseq, MAXNAMES);
3348 n_groups_done=(CL->S)->nseq;
3349 A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq);
3353 translation=CL->translation;
3354 n_s=vcalloc (2, sizeof ( int));
3355 l_s=declare_int ( 2, nseq);
3358 if ( RT->parent !=LT->parent)fprintf ( stderr, "Tree Pb [FATAL:%s]", PROGRAM);
3361 if ( LT->leaf==1 && RT->leaf==0)
3362 tree2ao ( RT->left, RT->right,A, nseq,CL);
3364 else if ( RT->leaf==1 && LT->leaf==0)
3365 tree2ao ( LT->left, LT->right,A,nseq,CL);
3367 else if (RT->leaf==0 && LT->leaf==0)
3369 tree2ao ( LT->left, LT->right,A,nseq,CL);
3370 tree2ao ( RT->left, RT->right,A,nseq,CL);
3373 if ( LT->leaf==1 && RT->leaf==1)
3375 /*1 Identify the two groups of sequences to align*/
3377 nseq2align=LT->nseq+RT->nseq;
3379 for ( a=0; a< LT->nseq; a++)l_s[0][a]=translation[LT->lseq[a]];
3380 if ( LT->nseq==1)LT->group=l_s[0][0];
3383 for ( a=0; a< RT->nseq; a++)l_s[1][a]=translation[RT->lseq[a]];
3384 if ( RT->nseq==1)RT->group=l_s[1][0];
3387 P->group=n_groups_done++;
3389 if (nseq2align==nseq)
3391 for (b=0, a=0; a< n_s[0]; a++, b++)sprintf ( A->tree_order[b],"%s", (CL->S)->name[l_s[0][a]]);
3392 for (a=0; a< n_s[1] ; a++, b++)sprintf ( A->tree_order[b], "%s",(CL->S)->name[l_s[1][a]]);
3396 if (P->parent)P->leaf=1;
3397 if ( LT->isseq==0)LT->leaf=0;
3398 if ( RT->isseq==0)RT->leaf=0;
3400 if (RT->isseq){SNL[translation[RT->lseq[0]]]=RT;RT->score=100;}
3401 if (LT->isseq){SNL[translation[LT->lseq[0]]]=LT;LT->score=100;}
3403 do_split=split_condition (nseq2align,A->score_aln,CL);
3404 if (CL->split && do_split)
3407 for (a=0; a< P->nseq; a++)SNL[CL->translation[P->lseq[a]]]=NULL;
3408 SNL[CL->translation[RT->lseq[0]]]=P;
3418 NT_node* tree_realn ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL)
3424 static int n_groups_done;
3434 if (n_groups_done==0)
3437 SNL=vcalloc ( (CL->S)->nseq, sizeof (NT_node));
3439 if (CL->translation)vfree(CL->translation);
3440 CL->translation=vcalloc ( (CL->S)->nseq, sizeof (int));
3442 for ( a=0; a< (CL->S)->nseq; a++)
3443 CL->translation[a]=name_is_in_list ( (CL->S)->name[a], (CL->S)->name, (CL->S)->nseq, MAXNAMES);
3444 if (nseq>2)fprintf ( CL->local_stderr, "\nPROGRESSIVE_ALIGNMENT [Tree Based]\n");
3445 else fprintf ( CL->local_stderr, "\nPAIRWISE_ALIGNMENT [No Tree]\n");
3446 n_groups_done=(CL->S)->nseq;
3447 A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq);
3451 translation=CL->translation;
3452 n_s=vcalloc (2, sizeof ( int));
3453 l_s=declare_int ( 2, nseq);
3459 l_s[0][0]=name_is_in_list ((CL->S)->name[0],(CL->S)->name, (CL->S)->nseq, MAXNAMES);
3460 l_s[1][0]=name_is_in_list ((CL->S)->name[1],(CL->S)->name, (CL->S)->nseq, MAXNAMES);
3461 A->score_aln=score=pair_wise (A, n_s, l_s,CL);
3469 if ( RT->parent !=LT->parent)fprintf ( stderr, "Tree Pb [FATAL:%s]", PROGRAM);
3472 if ( LT->leaf==1 && RT->leaf==0)
3473 tree_realn ( RT->left, RT->right,A, nseq,CL);
3475 else if ( RT->leaf==1 && LT->leaf==0)
3476 tree_realn ( LT->left, LT->right,A,nseq,CL);
3478 else if (RT->leaf==0 && LT->leaf==0)
3480 tree_realn ( LT->left, LT->right,A,nseq,CL);
3481 tree_realn ( RT->left, RT->right,A,nseq,CL);
3484 if ( LT->leaf==1 && RT->leaf==1 && (RT->nseq+LT->nseq)<nseq)
3486 /*1 Identify the two groups of sequences to align*/
3487 int *list, s, id1, id2;
3488 list=vcalloc (nseq, sizeof (int));
3489 for (a=0; a<LT->nseq; a++)
3491 s=translation[LT->lseq[a]];
3494 for (a=0; a<RT->nseq; a++)
3496 s=translation[RT->lseq[a]];
3499 for (a=0; a<nseq; a++)
3507 id1=sub_aln2sim (A, n_s, l_s, "idmat_sim");
3510 ungap_sub_aln (A, n_s[0],l_s[0]);
3511 ungap_sub_aln (A, n_s[1],l_s[1]);
3512 P->score=A->score_aln=score=pair_wise (A, n_s, l_s,CL);
3513 id2=sub_aln2sim (A, n_s, l_s, "idmat_sim");
3518 if (nseq2align==nseq)
3520 for (b=0, a=0; a< n_s[0]; a++, b++)sprintf ( A->tree_order[b],"%s", (CL->S)->name[l_s[0][a]]);
3521 for (a=0; a< n_s[1] ; a++, b++)sprintf ( A->tree_order[b], "%s",(CL->S)->name[l_s[1][a]]);
3525 if (P->parent)P->leaf=1;
3527 if ( LT->isseq==0)LT->leaf=0;
3528 if ( RT->isseq==0)RT->leaf=0;
3530 if (RT->isseq){SNL[translation[RT->lseq[0]]]=RT;RT->score=100;}
3531 if (LT->isseq){SNL[translation[LT->lseq[0]]]=LT;LT->score=100;}
3543 Alignment* profile_tree_aln ( NT_node P,Alignment*A,Constraint_list *CL, int threshold)
3545 int *ns, **ls, a, sim;
3546 NT_node LT, RT, D, UD;
3549 static int n_groups_done;
3553 //Sequences must be in the same order as the tree sequences
3557 n_groups_done=P->nseq+1;
3563 if (LT->leaf==0)A=delayed_tree_aln1 (LT, A,CL, threshold);
3564 if (RT->leaf==0)A=delayed_tree_aln1 (RT, A,CL, threshold);
3566 ns=vcalloc (2, sizeof (int));
3567 ls=declare_int ( 2,R->nseq);
3571 ls[0][ns[0]++]=LT->lseq[0];
3572 LT->group=ls[0][0]+1;
3575 node2seq_list (LT,&ns[0], ls[0]);
3579 ls[1][ns[1]++]=RT->lseq[0];
3580 RT->group=ls[1][0]+1;
3583 node2seq_list (RT,&ns[1], ls[1]);
3586 P->group=++n_groups_done;
3587 fprintf (CL->local_stderr, "\n\tGroup %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->",P->group,RT->group, ns[1],LT->group, ns[0]);
3589 P->score=A->score_aln=pair_wise (A, ns, ls,CL);
3590 sim=sub_aln2sim(A, ns, ls, "idmat_sim1");
3594 UD=(ns[0]<=ns[1])?RT:LT;
3595 D= (ns[0]<=ns[1])?LT:RT;
3600 fprintf (CL->local_stderr, "[Delayed (Sim=%4d). Kept Group %4d]",sim,UD->group);
3603 ungap_sub_aln (A, ns[0],ls[0]);
3604 ungap_sub_aln (A, ns[1],ls[1]);
3605 A->nseq=MAX(ns[0],ns[1]);
3609 F->A=main_read_aln (output_fasta_sub_aln (NULL, A, ns[(D==LT)?0:1], ls[(D==LT)?0:1]), NULL);
3613 F->A=main_read_aln (output_fasta_sub_aln (NULL, A, ns[(D==LT)?1:0], ls[(D==LT)?1:0]), NULL);
3617 printf_exit (EXIT_FAILURE, stderr, "\nError: Empty group");
3622 LT->aligned=1; RT->aligned=1;
3623 fprintf (CL->local_stderr, "[Score=%4d][Len=%5d]",sub_aln2sub_aln_score (A, CL, CL->evaluate_mode,ns, ls), (int)strlen ( A->seq_al[ls[0][0]]));
3624 A->nseq=ns[0]+ns[1];
3629 F->A=main_read_aln (output_fasta_sub_aln2 (NULL, A, ns, ls), NULL);
3633 for (a=0; a<LT->nseq;a++)P->lseq[P->nseq++]=LT->lseq[a];
3634 for (a=0; a<RT->nseq;a++)P->lseq[P->nseq++]=RT->lseq[a];
3641 }////////////////////////////////////////////////////////////////////////////////////////
3645 ////////////////////////////////////////////////////////////////////////////////////////
3647 //Alignment *frame_tree_aln (Alignment *A, Constraint_list *CL)
3651 ////////////////////////////////////////////////////////////////////////////////////////
3655 ////////////////////////////////////////////////////////////////////////////////////////
3656 int delayed_pair_wise (Alignment *A, int *ns, int **ls,Constraint_list *CL);
3657 NT_node* delayed_tree_aln_mode1 ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL);
3658 NT_node* delayed_tree_aln_mode2 ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL);
3659 int paint_nodes2aligned ( NT_node P,char **list, int n);
3661 int reset_visited_nodes ( NT_node P);
3662 int reset_visited_nodes2 ( NT_node P);
3663 Alignment * make_delayed_tree_aln (Alignment *A,int n, Constraint_list *CL)
3668 T=make_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1);
3669 delayed_tree_aln_mode1 ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL);
3671 for ( a=0; a< n; a++)
3674 sprintf ( CL->distance_matrix_mode, "aln");
3675 CL->DM=cl2distance_matrix ( CL,A,NULL,NULL, 1);
3677 T=make_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1);
3678 delayed_tree_aln_mode1 ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL);
3683 NT_node* delayed_tree_aln_mode1 ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL)
3689 P=LT->parent;P->nseq=nseq;
3690 paint_nodes2aligned (P, NULL, 0);
3692 A=delayed_tree_aln1 (P, A, CL,50);
3693 A=delayed_tree_aln2 (P, A, CL, 0);
3697 NT_node* delayed_tree_aln_mode2 ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL)
3703 P=LT->parent;P->nseq=nseq;
3705 A=delayed_tree_aln1 (P, A, CL,thr);
3709 A=delayed_tree_aln2 (P, A, CL, thr);
3715 Alignment* delayed_tree_aln1 ( NT_node P,Alignment*A,Constraint_list *CL, int threshold)
3717 int *ns, **ls, a, sim;
3718 NT_node LT, RT, D, UD;
3721 static int n_groups_done;
3725 //Sequences must be in the same order as the tree sequences
3729 n_groups_done=P->nseq+1;
3735 if (LT->leaf==0)A=delayed_tree_aln1 (LT, A,CL, threshold);
3736 if (RT->leaf==0)A=delayed_tree_aln1 (RT, A,CL, threshold);
3738 ns=vcalloc (2, sizeof (int));
3739 ls=declare_int ( 2,R->nseq);
3742 node2seq_list (LT,&ns[0], ls[0]);
3743 if ( LT->nseq==1)LT->group=LT->lseq[0]+1;
3745 node2seq_list (RT,&ns[1], ls[1]);
3746 if ( RT->nseq==1)RT->group=RT->lseq[0]+1;
3749 P->group=++n_groups_done;
3752 if ( ns[0]==0 || ns[1]==0)
3754 fprintf (CL->local_stderr, "\n\tF-Group %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->Skipped",P->group,RT->group, ns[1],LT->group, ns[0]);
3756 LT->aligned=(ns[0]==0)?0:1;
3757 RT->aligned=(ns[1]==0)?0:1;
3761 fprintf (CL->local_stderr, "\n\tF-Group %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->",P->group,RT->group, ns[1],LT->group, ns[0]);
3762 P->score=A->score_aln=pair_wise (A, ns, ls,CL);
3763 sim=sub_aln2max_sim(A, ns, ls, "idmat_sim1");
3768 UD=(ns[0]<=ns[1])?RT:LT;
3769 D= (ns[0]<=ns[1])?LT:RT;
3774 fprintf (CL->local_stderr, "[Delayed (Sim=%4d). Kept Group %4d]",sim,UD->group);
3776 ungap_sub_aln (A, ns[0],ls[0]);
3777 ungap_sub_aln (A, ns[1],ls[1]);
3778 A->nseq=MAX(ns[0],ns[1]);
3782 LT->aligned=1; RT->aligned=1;
3783 fprintf (CL->local_stderr, "[Score=%4d][Len=%5d]",sub_aln2sub_aln_score (A, CL, CL->evaluate_mode,ns, ls), (int)strlen ( A->seq_al[ls[0][0]]));
3784 A->nseq=ns[0]+ns[1];
3787 for (a=0; a<LT->nseq;a++)P->lseq[P->nseq++]=LT->lseq[a];
3788 for (a=0; a<RT->nseq;a++)P->lseq[P->nseq++]=RT->lseq[a];
3797 Alignment* delayed_tree_aln2 ( NT_node P,Alignment*A,Constraint_list *CL, int thr)
3810 fprintf (CL->local_stderr, "\n");
3812 if (!LT->aligned && !RT->aligned)
3814 printf_exit (EXIT_FAILURE, stderr, "ERROR: Unresolved Node On Groups %d [FATAL:%s]\n", P->group,PROGRAM);
3816 else if (!LT->aligned || !RT->aligned)
3819 ns=vcalloc (2, sizeof (int));
3820 ls=declare_int (2, R->nseq);
3822 node2seq_list (R,&ns[0], ls[0]);
3824 D=(!LT->aligned)?LT:RT;
3826 node2seq_list (D,&ns[1], ls[1]);
3828 fprintf (CL->local_stderr, "\tS-Delayed Group %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->",P->group,D->group, ns[1],R->group, ns[0]);
3829 P->score=A->score_aln=pair_wise (A, ns, ls,CL);
3830 sim=sub_aln2max_sim(A, ns, ls, "idmat_sim1");
3833 fprintf (CL->local_stderr, " [Further Delayed]\n");
3834 ungap_sub_aln (A, ns[0],ls[0]);
3835 ungap_sub_aln (A, ns[1],ls[1]);
3840 fprintf (CL->local_stderr, "[Score=%4d][Len=%5d][thr=%d]\n",sub_aln2sub_aln_score (A, CL, CL->evaluate_mode,ns, ls), (int)strlen ( A->seq_al[ls[0][0]]), thr);
3843 vfree (ns);free_int (ls, -1);
3850 if (LT->leaf==0)A=delayed_tree_aln2 (LT, A,CL, thr);
3851 if (RT->leaf==0)A=delayed_tree_aln2 (RT, A,CL, thr);
3856 int delayed_pair_wise (Alignment *A, int *ns, int **ls,Constraint_list *CL)
3862 pair_wise (A, ns, ls, CL);
3864 sim=fast_aln2sim_list (A, "sim3", ns, ls);
3866 sort_int_inv ( sim,3, 2,0, ns[0]*ns[1]-1);
3868 for (a=0; a< 2; a++)
3869 for ( b=0; b< ns[a]; b++)
3870 A->order[ls[a][b]][4]=-1;
3872 for (a=0; a< 10 && sim[a][0]!=-1; a++)
3880 ungap_sub_aln (A, ns[0],ls[0]);
3881 ungap_sub_aln (A, ns[1],ls[1]);
3883 s=pair_wise (A, ns, ls, CL);
3885 for (a=0; a< 2; a++)
3886 for ( b=0; b< ns[a]; b++)
3887 A->order[ls[a][b]][4]=0;
3893 int node2seq_list2 (NT_node P, int *ns, int *ls)
3896 if ( !P || P->visited ) return ns[0];
3901 ls[ns[0]++]=P->lseq[0];
3904 if (P->left && (P->left) ->aligned)node2seq_list2 (P->left, ns,ls);
3905 if (P->right && (P->right)->aligned)node2seq_list2 (P->right,ns,ls);
3906 if (P->aligned && P->parent)node2seq_list2 (P->parent,ns,ls);
3912 int node2seq_list (NT_node P, int *ns, int *ls)
3915 if ( P->isseq && P->aligned)
3917 ls[ns[0]++]=P->lseq[0];
3921 if (P->left && (P->left) ->aligned)node2seq_list (P->left, ns,ls);
3922 if (P->right && (P->right)->aligned)node2seq_list (P->right,ns,ls);
3926 int paint_nodes2aligned ( NT_node P,char **list, int n)
3933 else if ( name_is_in_list ( P->name, list, n, 100)!=-1)
3941 r+=paint_nodes2aligned (P->left, list, n);
3942 r+=paint_nodes2aligned (P->right, list, n);
3947 int reset_visited_nodes ( NT_node P)
3949 while (P->parent)P=P->parent;
3950 return reset_visited_nodes2 (P);
3952 int reset_visited_nodes2 ( NT_node P)
3955 if (P->left)r+=reset_visited_nodes2(P->left);
3956 if (P->right)r+=reset_visited_nodes2(P->right);
3962 ////////////////////////////////////////////////////////////////////////////////////////
3966 ////////////////////////////////////////////////////////////////////////////////////////
3968 Alignment* dpa_msa2 ( NT_node P,Alignment*A,Constraint_list *CL);
3969 Alignment *dpa_align_node (NT_node P,Alignment*A,Constraint_list *CL);
3970 char *node2profile_list (NT_node P,Alignment*A,Constraint_list *CL, char *list);
3971 char * output_node_aln (NT_node P, Alignment *A, char *name);
3972 int node2nleaf ( NT_node P);
3974 Alignment* dpa_aln (Alignment*A,Constraint_list *CL)
3978 A=iterative_tree_aln (A,1, CL);
3979 P=make_root_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1);
3982 A=dpa_msa2(P, A, CL);
3986 int node2nleaf ( NT_node P)
3989 if ( P->leaf) return 1;
3992 n+=node2nleaf ( P->left);
3993 n+=node2nleaf ( P->right);
3997 Alignment* dpa_msa2 ( NT_node P,Alignment*A,Constraint_list *CL)
4006 n_l=node2nleaf (P->left);
4007 n_r=node2nleaf (P->right);
4010 return dpa_msa2 (P->left, A, CL);
4014 return dpa_msa2 (P->right, A, CL);
4017 A=dpa_align_node (P, A, CL);
4022 Alignment *dpa_align_node (NT_node P,Alignment*A,Constraint_list *CL)
4025 char *list, *tmp_aln;
4030 list=vcalloc ( P->nseq*100, sizeof (char));
4031 list=node2profile_list (P,A, CL, list);
4033 printf_system ( "t_coffee -profile %s -outfile=%s -dp_mode gotoh_pair_wise_lgp -msa_mode iterative_tree_aln -quiet", list,tmp_aln=vtmpnam (NULL));
4034 B=main_read_aln (tmp_aln, NULL);
4035 A=realloc_aln (A, B->len_aln+1);
4036 for ( a=0; a< B->nseq; a++)
4037 if ( (b=name_is_in_list (B->name[a], A->name, A->nseq, 100))!=-1)
4038 sprintf (A->seq_al[b], "%s", B->seq_al[a]);
4039 A->len_aln=B->len_aln;
4044 char *node2profile_list (NT_node P,Alignment*A,Constraint_list *CL, char *list)
4048 list=node2profile_list (P->left, A, CL, list);
4049 list=node2profile_list (P->right, A, CL, list);
4054 list=strcatf (list," %s", output_node_aln (P, A, NULL));
4055 if ( !P->isseq)P->leaf=0;
4059 char * output_node_aln (NT_node P, Alignment *A, char *name)
4063 if (name==NULL) name=vtmpnam (NULL);
4064 fp=vfopen (name, "w");
4066 for (a=0; a< P->nseq; a++)
4067 fprintf ( fp, ">%s\n%s", A->name[P->lseq[a]], A->seq_al[P->lseq[a]]);
4071 ////////////////////////////////////////////////////////////////////////////////////////
4075 ////////////////////////////////////////////////////////////////////////////////////////
4077 Alignment * new_dpa_aln (Alignment *A,Constraint_list *CL)
4084 A=make_delayed_tree_aln (A,1, CL);
4085 P=make_root_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1);
4086 set_node_score (A, P, "idmat_sim");
4089 list=split_nodes_nseq (A,P,15, list=vcalloc (P->nseq*200, sizeof (char)));
4090 printf_system ( "t_coffee -profile %s -outfile=%s -dp_mode gotoh_pair_wise_lgp -msa_mode iterative_tree_aln", list,tmp_aln=vtmpnam (NULL));
4091 return main_read_aln (tmp_aln, NULL);
4094 char *split_nodes_nseq (Alignment *A, NT_node P, int nseq, char *list)
4103 n=count_threshold_nodes (A, P, a);
4106 return split_nodes_idmax (A, P, a,list);
4108 char *split_nodes_idmax (Alignment *A, NT_node P, int t, char *list)
4110 if (P->isseq || P->score>=t)
4112 list=strcatf (list," %s", output_node_aln (P, A, NULL));
4114 else if ( P->score<t)
4116 list=split_nodes_idmax (A, P->left,t, list);
4117 list=split_nodes_idmax (A, P->right,t,list);
4121 int count_threshold_nodes (Alignment *A, NT_node P, int t)
4125 if (P->isseq || P->score>=t)
4129 else if ( P->score<t)
4131 s+=count_threshold_nodes (A, P->left,t);
4132 s+=count_threshold_nodes (A, P->right,t);
4137 int set_node_score (Alignment *A, NT_node P, char *mode)
4142 if (P->isseq) return 0;
4146 N=(a==0)?P->left:P->right;
4150 P->score=sub_aln2max_sim(A, ns, ls,mode);
4151 set_node_score (A,P->left, mode);
4152 set_node_score (A,P->right, mode);
4155 /////////////////////////////////////////////////////////////////////////////////////////
4156 int split_condition (int nseq, int score, Constraint_list *CL)
4158 int cond1=1, cond2=1;
4161 if ( CL->split_nseq_thres)cond1 =(nseq<=CL->split_nseq_thres)?1:0;
4162 if ( CL->split_score_thres)cond2=(score>=CL->split_score_thres)?1:0;
4164 return (cond1 && cond2);
4166 int profile_pair_wise (Alignment *A, int n1, int *l1, int n2, int *l2, Constraint_list *CL)
4173 ns=vcalloc (2, sizeof (int));
4174 ls=vcalloc (2, sizeof (int*));
4180 return pair_wise (A, ns, ls, CL);
4182 int pair_wise (Alignment *A, int*ns, int **l_s,Constraint_list *CL )
4197 /*Make sure evaluation functions update their cache if needed*/
4198 A=update_aln_random_tag (A);
4200 if (! CL->pw_parameters_set)
4202 fprintf ( stderr, "\nERROR pw_parameters_set must be set in pair_wise [FATAL]\n" );crash("");
4206 function=get_pair_wise_function(CL->pair_wise, CL->dp_mode,&glocal);
4207 if ( CL->get_dp_cost==NULL)CL->get_dp_cost=get_dp_cost;
4209 if (strlen ( A->seq_al[l_s[0][0]])==0 || strlen ( A->seq_al[l_s[1][0]])==0)
4210 score=empty_pair_wise ( A, ns, l_s, CL, glocal);
4212 score=function ( A, ns, l_s, CL);
4217 int empty_pair_wise ( Alignment *A, int *ns, int **l_s, Constraint_list *CL, int glocal)
4224 if ( glocal==GLOBAL)
4226 l0=strlen (A->seq_al[l_s[0][0]]);
4227 l1=strlen (A->seq_al[l_s[1][0]]);
4230 if ( len==0)return 0;
4231 else if (l0>l1){n=ns[1];l=l_s[1];}
4232 else if (l0<l1){n=ns[0];l=l_s[0];}
4233 string=generate_null (len);
4234 for ( a=0; a< n; a++)
4235 sprintf ( A->seq_al[l[a]], "%s", string);
4236 A->score=A->score_aln=0;
4241 else if ( glocal==LALIGN)
4243 A->A=declare_aln (A->S);
4245 for ( a=0; a< 2; a++)
4246 for ( b=0; b<ns[a]; b++)
4247 A->seq_al[l_s[a][b]][0]='\0';
4248 (A->A)->score_aln=(A->A)->score=0;
4257 Pwfunc get_pair_wise_function (Pwfunc pw,char *dp_mode, int *glocal)
4259 /*Returns a function and a mode (Glogal, Local...)*/
4269 /*The first time: initialize the list of pairwse functions*/
4272 pwl=vcalloc ( 100, sizeof (Pwfunc));
4273 dpl=declare_char (100, 100);
4274 dps=vcalloc ( 100, sizeof (int));
4276 pwl[npw]=fasta_cdna_pair_wise;
4277 sprintf (dpl[npw], "fasta_cdna_pair_wise");
4281 pwl[npw]=cfasta_cdna_pair_wise;
4282 sprintf (dpl[npw], "cfasta_cdna_pair_wise");
4286 pwl[npw]=idscore_pair_wise;
4287 sprintf (dpl[npw], "idscore_pair_wise");
4291 pwl[npw]=gotoh_pair_wise;
4292 sprintf (dpl[npw], "gotoh_pair_wise");
4296 pwl[npw]=gotoh_pair_wise_lgp;
4297 sprintf (dpl[npw], "gotoh_pair_wise_lgp");
4302 pwl[npw]=proba_pair_wise;
4303 sprintf (dpl[npw], "proba_pair_wise");
4307 pwl[npw]=biphasic_pair_wise;
4308 sprintf (dpl[npw], "biphasic_pair_wise");
4312 pwl[npw]=subop1_pair_wise;
4313 sprintf (dpl[npw], "subop1_pair_wise");
4317 pwl[npw]=subop2_pair_wise;
4318 sprintf (dpl[npw], "subop2_pair_wise");
4322 pwl[npw]=myers_miller_pair_wise;
4323 sprintf (dpl[npw], "myers_miller_pair_wise");
4327 pwl[npw]=test_pair_wise;
4328 sprintf (dpl[npw], "test_pair_wise");
4332 pwl[npw]=fasta_gotoh_pair_wise;
4333 sprintf (dpl[npw], "fasta_pair_wise");
4336 pwl[npw]=cfasta_gotoh_pair_wise;
4337 sprintf (dpl[npw], "cfasta_pair_wise");
4341 pwl[npw]=very_fast_gotoh_pair_wise;
4342 sprintf (dpl[npw], "very_fast_pair_wise");
4346 pwl[npw]=gotoh_pair_wise_sw;
4347 sprintf (dpl[npw], "gotoh_pair_wise_sw");
4351 pwl[npw]=cfasta_gotoh_pair_wise_sw;
4352 sprintf (dpl[npw], "cfasta_sw_pair_wise");
4356 pwl[npw]=gotoh_pair_wise_lalign;
4357 sprintf (dpl[npw], "gotoh_pair_wise_lalign");
4361 pwl[npw]=sim_pair_wise_lalign;
4362 sprintf (dpl[npw], "sim_pair_wise_lalign");
4366 pwl[npw]=domain_pair_wise;
4367 sprintf (dpl[npw], "domain_pair_wise");
4371 pwl[npw]=gotoh_pair_wise;
4372 sprintf (dpl[npw], "ssec_pair_wise");
4376 pwl[npw]=ktup_pair_wise;
4377 sprintf (dpl[npw], "ktup_pair_wise");
4381 pwl[npw]=precomputed_pair_wise;
4382 sprintf (dpl[npw], "precomputed_pair_wise");
4386 pwl[npw]=myers_miller_pair_wise;
4387 sprintf (dpl[npw], "default");
4391 pwl[npw]=viterbi_pair_wise;
4392 sprintf (dpl[npw], "viterbi_pair_wise");
4396 pwl[npw]=viterbiL_pair_wise;
4397 sprintf (dpl[npw], "viterbiL_pair_wise");
4401 pwl[npw]=viterbiD_pair_wise;
4402 sprintf (dpl[npw], "viterbiD_pair_wise");
4406 pwl[npw]=seq_viterbi_pair_wise;
4407 sprintf (dpl[npw], "seq_viterbi_pair_wise");
4411 pwl[npw]=pavie_pair_wise;
4412 sprintf (dpl[npw], "pavie_pair_wise");
4416 pwl[npw]=glocal_pair_wise;
4417 sprintf (dpl[npw], "glocal_pair_wise");
4421 pwl[npw]=linked_pair_wise;
4422 sprintf (dpl[npw], "linked_pair_wise");
4426 pwl[npw]=linked_pair_wise_collapse;
4427 sprintf (dpl[npw], "linked_pair_wise_collapse");
4433 pwl[npw]=viterbiDGL_pair_wise;
4434 sprintf (dpl[npw], "viterbiDGL_pair_wise");
4440 for ( a=0; a< npw; a++)
4442 if ( (dp_mode && strm (dpl[a], dp_mode)) || pwl[a]==pw)
4445 if (dp_mode)sprintf (dp_mode,"%s", dpl[a]);
4450 fprintf ( stderr, "\n[%s] is an unknown mode for dp_mode[FATAL]\n", dp_mode);
4456 /*******************************************************************************/
4459 /* Util Functions */
4462 /*******************************************************************************/
4464 char *build_consensus ( char *seq1, char *seq2, char *dp_mode)
4473 if ( !mat) mat=vcalloc ( STRING, sizeof (char));
4476 A=align_two_sequences (seq1, seq2, strcpy(mat,"idmat"), 0, 0,dp_mode);
4477 buf=vcalloc ( A->len_aln+1, sizeof (char));
4479 for ( a=0; a< A->len_aln; a++)
4483 if (is_gap(c1) && is_gap(c2))buf[a]='-';
4484 else if (is_gap(c1))buf[a]=c2;
4485 else if (is_gap(c2))buf[a]=c1;
4486 else if (c1!=c2){vfree (buf);buf=NULL;free_aln(A);return NULL;}
4490 free_sequence (free_aln (A), -1);
4498 combine_profile () Comobine two profiles into one, using the edit sequence produce by the DP
4499 edit_sequence () insert the gaps using the
4501 int fastal (int argv, char **arg)
4508 S=get_fasta_sequence (arg[1], NULL);
4512 for (a=0; a<S->nseq-1; a++)
4514 for (b=a+1; b<S->nseq; b++)
4516 mat[b][a]=mat[a][b]addrand()%100;
4520 int_dist2nj_tree (s, S->name, S->nseq, tree_name);
4521 T=main_read_tree (BT);
4522 =fastal_tree_aln (T->L,T->R,S);
4528 NT_node fastal_tree_aln ( NT_node P, Sequence *S)
4533 if (!P || P->nseq==1) return NULL;
4534 R=P->right;L=P->left;
4536 fastal_tree_aln (P->left,S);
4537 fastal_tree_aln (P->right,S);
4538 fastal_pair_wise (P);
4543 NT_node fastal_pair_wise (NT_node P)
4550 tb=fastal_align_profile ((P->right)->prf, (P->left)->prf);
4553 for (a=0; a< l; a++)
4556 if (tb[a]== 1 || tb[a] ==3)pr1=1;
4557 if (tb[a]== 2 || tb[a] ==3)pr2=1;
4559 for (b=0; b<20; b++)
4560 P->prf[a][b]=((pr1==1)?(P->right)->prf[ppr1][b]:0) + ((pr2==1)?(P->left)->prf[ppr2][b]:0);
4564 free_int ((P->left)->prf, -1);
4565 free_int ((P->right)->prf, -1);
4568 /******************************COPYRIGHT NOTICE*******************************/
4569 /*© Centro de Regulacio Genomica */
4571 /*Cedric Notredame */
4572 /*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
4573 /*All rights reserved.*/
4574 /*This file is part of T-COFFEE.*/
4576 /* T-COFFEE is free software; you can redistribute it and/or modify*/
4577 /* it under the terms of the GNU General Public License as published by*/
4578 /* the Free Software Foundation; either version 2 of the License, or*/
4579 /* (at your option) any later version.*/
4581 /* T-COFFEE is distributed in the hope that it will be useful,*/
4582 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
4583 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
4584 /* GNU General Public License for more details.*/
4586 /* You should have received a copy of the GNU General Public License*/
4587 /* along with Foobar; if not, write to the Free Software*/
4588 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
4589 /*............................................... |*/
4590 /* If you need some more information*/
4591 /* cedric.notredame@europe.com*/
4592 /*............................................... |*/
4596 /******************************COPYRIGHT NOTICE*******************************/