#include #include #include #include #include #include "io_lib_header.h" #include "util_lib_header.h" #include "define_header.h" #include "dp_lib_header.h" int count_threshold_nodes (Alignment *A, NT_node P, int t); int set_node_score (Alignment *A, NT_node P, char *mode); char *split_nodes_nseq (Alignment *A, NT_node P, int max_nseq, char *list); char *split_nodes_idmax (Alignment *A, NT_node P, int max_id, char *list); /******************************************************************/ /* MAIN DRIVER */ /* */ /* */ /******************************************************************/ Constraint_list *profile2list (Job_TC *job, int nprf) { int *seqlist, *cache1, *cache2; static CLIST_TYPE *entry; Alignment *A1, *A2, *A; int a, b, c; Constraint_list *SCL, *SCL2; static int *L; static int max_L_len; int max, n_pairs; int **score, n, s1, s2, si, r1, r2; char *seqlistb; int debug=0; int cons, cons_thres, max_n_pairs, tot_n_pairs; int *cons_table; Constraint_list *CL; char *command; char *seq; char *weight; TC_method *M; int *iA1, *iA2; static char *buf1; if ( !buf1) buf1=vcalloc (1000, sizeof (char)); /*initialize the structure*/ CL=(job->io)->CL; M=(job->param)->TCM; command=M->executable; weight=M->weight; seq=(job->param)->seq_c; debug=(getenv("DEBUG_TCOFFEE_profile2list")!=NULL)?1:0; if ( debug)print_mem_usage (stderr, "IN"); seqlistb=vcalloc (10, sizeof (char)); seqlist=string2num_list (seq)+1; if (!entry)entry=vcalloc (CL->entry_len, sizeof (int)); entry[SEQ1]=seqlist[1]; entry[SEQ2]=seqlist[2]; A1=seq2profile(CL->S, seqlist[1]); A2=seq2profile(CL->S, seqlist[2]); SCL=copy_constraint_list ( CL, SOFT_COPY); SCL->L=L;SCL->max_L_len=max_L_len; SCL->fp=NULL; SCL->M=NULL; SCL->ne=0; SCL2=copy_constraint_list ( CL, SOFT_COPY); SCL2->L=L;SCL2->max_L_len=max_L_len; SCL2->fp=NULL; SCL2->M=NULL; SCL2->ne=0; /*Merge Sequences*/ SCL->S=merge_seq (A1->S, NULL ); SCL->S=merge_seq (A2->S, SCL->S); /*1: Compare the two profiles and identify the N master pairs*/ n=(A1->nseq*A2->nseq); max=(nprf==0 ||method_uses_structure (M))?n:MIN(nprf,n); if ( maxnseq*A2->nseq,3); for (n=0,a=0; anseq; a++) for ( b=0; bnseq; b++,n++) { score[n][0]=a; score[n][1]=b; score[n][2]=(int)get_seq_fsim ( A->seq_al[a], A->seq_al[b+A1->nseq], "-", NOGROUP, NOMATRIX, AVERAGE_POSITIONS); } free_aln (A); sort_int_inv (score,3,2,0, n-1); } else { score=declare_int (n,3); for (n=0,a=0; anseq; a++) for ( b=0; bnseq; b++,n++) { score[n][0]=a; score[n][1]=b; } } iA1=get_name_index (A1->name,A1->nseq, (SCL->S)->name, (SCL->S)->nseq); iA2=get_name_index (A2->name,A2->nseq, (SCL->S)->name, (SCL->S)->nseq); /*submit the N pairs*/ for ( n_pairs=1,a=0; aseq_c=%s",buf1); /*2: Compute the pairewise library*/ (job->io)->CL=SCL; SCL=seq2list (job); /*Unwind the pointer counter:*/job->np--; if (debug)fprintf ( stderr, "\n\tProfile aln %s %s %s [(%d,%d):%d %%id]", (SCL->S)->name[iA1[s1]],(SCL->S)->name[iA2[s2]], command, score[a][0], score[a][1],score[a][2]); /*3: Update the main library with the pairwise library*/ cache1=seq2inv_pos (A1->seq_al[s1]); cache2=seq2inv_pos (A2->seq_al[s2]); if (debug)fprintf ( stderr, " =>%d pairs", SCL->ne); n_pairs+=(SCL->ne>0)?1:0; for (c=0; c< SCL->ne; c++) { si=vread_clist (SCL, c, SEQ1); r1=vread_clist(SCL, c, (si==iA1[s1])?R1:R2); r2=vread_clist(SCL, c, (si==iA1[s1])?R2:R1); entry[R1]=cache1[r1]; entry[R2]=cache2[r2]; entry[WE]=vread_clist(SCL,c,WE); entry[CONS]=1; add_entry2list(entry, SCL2); } SCL->ne=0; vfree ( cache1);vfree ( cache2); compact_list (SCL2, 0, SCL2->ne, "default"); if (debug)fprintf ( stderr, " =>%d pairs", SCL2->ne); } free_sequence (SCL->S,-1); vfree (iA1); vfree (iA2); if (debug)fprintf ( stderr, "\nNPairs=%d", n_pairs); compact_list (SCL2, 0, SCL2->ne, "default"); /*get the concistency distribution*/ cons_table=vcalloc ( 101, sizeof (int)); for (c=0; c< SCL2->ne; c++) { entry[R1]=vread_clist(SCL2,c,R1); entry[R2]=vread_clist(SCL2,c,R2); entry[WE]=vread_clist(SCL2,c,WE); entry[CONS]=vread_clist(SCL2,c,CONS); cons=(entry[CONS]*100)/n_pairs; cons_table[cons]++; } /*Identify the threshold*/ max_n_pairs=(int)((float)(MIN(A1->len_aln, A2->len_aln)*2)); for (cons_thres=0,tot_n_pairs=0,c=100; c>=0; c--) { tot_n_pairs+=cons_table[c]; if ( tot_n_pairs>=max_n_pairs){cons_thres=c;c=-1;} } vfree (cons_table); /*Produce the library*/ for (c=0; c< SCL2->ne; c++) { entry[R1]=vread_clist(SCL2,c,R1); entry[R2]=vread_clist(SCL2,c,R2); entry[WE]=vread_clist(SCL2,c,WE); entry[CONS]=vread_clist(SCL2,c,CONS); entry[WE]/=entry[CONS]; cons=(entry[CONS]*100)/n_pairs; if (cons>=cons_thres)add_entry2list(entry, CL); } if ( !seq2R_template_profile (CL->S,seqlist[1]))free_aln (A1); if ( !seq2R_template_profile (CL->S,seqlist[2]))free_aln (A2); vfree(seqlistb); vfree(seqlist-1) ; vfree (SCL->L);free_constraint_list ( SCL); if (debug)print_mem_usage (stderr, "BEF FREE SCL2"); vfree(SCL2->L);free_constraint_list ( SCL2); if (debug) { fprintf ( stderr, "\nCL->ne=%d", CL->ne); print_mem_usage (stderr, "OUT"); } (job->io)->CL=CL; return CL; } int method_uses_structure(TC_method *M) { if ( strchr (M->seq_type, 'P'))return 1; else return 0; } int method_uses_profile(TC_method *M) { if ( strchr (M->seq_type, 'R'))return 1; else return 0; } Constraint_list *seq2list ( Job_TC *job) { char *mode; Alignment *A=NULL; Constraint_list *PW_CL; Constraint_list *RCL=NULL; int full_prf, nprf; int *seqlist; static char *s1, *s2; Sequence *S, *STL; Constraint_list *CL; char *seq; char *weight; TC_method *M; M=(job->param)->TCM; mode=M->executable; weight=M->weight; PW_CL=M->PW_CL; CL=(job->io)->CL; seq=(job->param)->seq_c; S=(CL)?CL->S:NULL; STL=(CL)?CL->STRUC_LIST:NULL; seqlist=string2num_list (seq)+1; if (!s1)s1=vcalloc ( MAXNAMES+1, sizeof (char)); if (!s2)s2=vcalloc ( MAXNAMES+1, sizeof (char)); sprintf (s1, "%s", (CL->S)->name[seqlist[1]]); sprintf (s2, "%s", (CL->S)->name[seqlist[2]]); /*Proteins*/ if ( strncmp (CL->profile_comparison, "full", 4)==0) { full_prf=1; if ( CL->profile_comparison[4])nprf=atoi ( CL->profile_comparison+4); else nprf=0; } else { full_prf=0; } if ((method_uses_structure (M)) && profile2P_template_file (CL->S, seqlist[1]) && profile2P_template_file (CL->S, seqlist[2])) { RCL=profile2list (job, nprf); } else if ( strm (mode, "ktup_msa")) { RCL=hasch2constraint_list (CL->S, CL); } else if ( strm (mode, "test_pair") || strm ( mode,"fast_pair") || strm (mode, "ifast_pair") \ || strm ( mode, "diag_fast_pair")|| strm (mode, "idiag_fast_pair")\ || strm ( mode, "blast_pair") || strm (mode, "lalign_blast_pair") \ || strm ( mode, "viterbi_pair") || strm (mode, "slow_pair") || strm(mode, "glocal_pair") || strm (mode, "biphasic_pair") \ || strm ( mode, "islow_pair") || strm (mode, "tm_slow_pair") || strm (mode, "r_slow_pair") \ || strm ( mode, "lalign_id_pair")|| strm (mode, "tm_lalign_id_pair") || strm (mode , "lalign_len_pair") \ || strm (mode, "prrp_aln") || strm ( mode, "test_pair") \ || strm (mode, "cdna_fast_pair") || strm (mode, "diaa_slow_pair") || strm (mode, "monoaa_slow_pair")\ || strncmp (mode,"cdna_fast_pair",14)==0 \ ) { A=fast_pair (job); RCL=aln2constraint_list ((A->A)?A->A:A, CL,weight); } else if ( strm ( mode, "subop1_pair") || strm ( mode, "subop2_pair") ) { A=fast_pair (job); RCL=A->CL; } else if ( strm ( mode, "proba_pair") ) { A=fast_pair (job); RCL=A->CL; } else if ( strm ( mode, "best_pair4prot")) { RCL=best_pair4prot (job); } else if ( strm ( mode, "best_pair4rna")) { RCL=best_pair4rna (job); } else if ( strm ( mode, "exon2_pair")) { char weight2[1000]; A=fast_pair (job); sprintf ( weight2, "%s_subset_objOBJ-",weight); RCL=aln2constraint_list (A, CL,weight2); } else if ( strm ( mode, "exon_pair")) { A=fast_pair (job); RCL=aln2constraint_list (A, CL,weight); } else if ( strm ( mode, "exon3_pair")) { char weight2[1000]; A=fast_pair (job); sprintf ( weight2, "%s_subset_objOBJ-",weight); RCL=aln2constraint_list (A, CL,weight2); } /*STRUCTURAL METHODS*/ else if ( strm (mode, "seq_msa")) { RCL=seq_msa(M, seq, CL); } /*STRUCTURAL METHODS*/ else if (strm (mode, "profile_pair") || strm (mode, "hh_pair")) { RCL=profile_pair (M, seq, CL); } else if ( strm (mode, "sap_pair")) { RCL=sap_pair (seq, weight, CL); } else if ( strm (mode, "thread_pair")) { RCL=thread_pair (M,seq, CL); } else if ( strm (mode, "pdb_pair")) { RCL=pdb_pair (M,seq, CL); } else if (strm (mode, "rna_pair")) { RCL=rna_pair(M, seq, CL); } else if ( strm (mode, "pdbid_pair")) { RCL=pdbid_pair (M,seq, CL); } else if ( strm (mode, "fugue_pair")) { RCL=thread_pair (M,seq, CL); } else if ( strm (mode, "lsqman_pair")) { RCL=lsqman_pair(seq, CL); } else if ( strm ( mode, "align_pdb_pair")) { RCL=align_pdb_pair ( seq,"gotoh_pair_wise", CL->align_pdb_hasch_mode,CL->align_pdb_param_file,CL, job); } else if ( strm ( mode, "lalign_pdb_pair")) { RCL=align_pdb_pair ( seq,"sim_pair_wise_lalign", CL->align_pdb_hasch_mode,CL->align_pdb_param_file,CL, job); } else if ( strm ( mode, "align_pdb_pair_2")) { RCL=align_pdb_pair_2 ( seq, CL); } else { fprintf ( CL->local_stderr, "\nERROR: THE FUNCTION %s DOES NOT EXIST [FATAL:%s]\n", mode, PROGRAM);crash(""); } add_method_output2method_log (NULL,NULL, (A&&A->len_aln)?A:NULL,RCL, NULL); RCL=(RCL==NULL)?CL:RCL; vfree ( seqlist-1); free_aln (A); return RCL; } Constraint_list *method2pw_cl (TC_method *M, Constraint_list *CL) { char *mode; Constraint_list *PW_CL=NULL; Sequence *S; char mat[100], *m; char group_mat[100]; mode=M->executable; PW_CL=copy_constraint_list ( CL, SOFT_COPY); PW_CL->pw_parameters_set=1; S=(PW_CL)?PW_CL->S:NULL; /*DNA or Protein*/ m=PW_CL->method_matrix; if ( strm ((PW_CL->S)->type, "PROTEIN")) { sprintf ( mat, "%s", (strm(m, "default"))?"blosum62mt":m); sprintf (group_mat, "vasiliky"); PW_CL->ktup=2; } else if ( strm ((PW_CL->S)->type, "DNA") || strm ((PW_CL->S)->type, "RNA") ) { sprintf(group_mat, "idmat"); sprintf ( mat, "%s", (strm(m, "default"))?"dna_idmat":m); PW_CL->ktup=5; } if ( M->matrix[0])sprintf ( mat, "%s", M->matrix); PW_CL->M=read_matrice (mat); if ( M->gop!=UNDEFINED) {PW_CL->gop=M->gop;} else { PW_CL->gop= get_avg_matrix_mm (PW_CL->M, AA_ALPHABET)*10; } if ( M->gep!=UNDEFINED)PW_CL->gep=M->gep; else PW_CL->gep=-1; if ( strm2 ( mode,"fast_pair", "ifast_pair")) { PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; if ( !PW_CL->use_fragments)PW_CL->diagonal_threshold=0; else PW_CL->diagonal_threshold=6; sprintf (PW_CL->dp_mode, "fasta_pair_wise"); sprintf (PW_CL->matrix_for_aa_group, group_mat); if ( strm ( mode, "fast_pair")) { PW_CL->L=NULL; PW_CL->get_dp_cost=slow_get_dp_cost; PW_CL->evaluate_residue_pair=evaluate_matrix_score; PW_CL->extend_jit=0; } } else if ( strm2 ( mode,"diag_fast_pair","idiag_fast_pair")) { PW_CL->L=NULL; PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->S=CL->S; PW_CL->use_fragments=1; PW_CL->diagonal_threshold=3; sprintf (PW_CL->dp_mode, "fasta_pair_wise"); PW_CL->ktup=1; sprintf (PW_CL->matrix_for_aa_group, group_mat); PW_CL->get_dp_cost=slow_get_dp_cost; PW_CL->evaluate_residue_pair=evaluate_matrix_score; PW_CL->extend_jit=0; } else if ( strm ( mode,"blast_pair")) { PW_CL->L=NULL; PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; PW_CL->pair_wise=gotoh_pair_wise; PW_CL->evaluate_residue_pair=evaluate_blast_profile_score; sprintf (PW_CL->matrix_for_aa_group, group_mat); PW_CL->extend_jit=0; } else if ( strm ( mode,"lalign_blast_pair")) { PW_CL->L=NULL; PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; PW_CL->pair_wise=sim_pair_wise_lalign; PW_CL->evaluate_residue_pair=evaluate_blast_profile_score; PW_CL->lalign_n_top=10; sprintf (PW_CL->matrix_for_aa_group, group_mat); PW_CL->extend_jit=0; } else if ( strm ( mode,"viterbi_pair")) { PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; sprintf (PW_CL->dp_mode, "viterbi_pair_wise"); PW_CL->L=NULL; PW_CL->get_dp_cost=slow_get_dp_cost; PW_CL->evaluate_residue_pair=evaluate_matrix_score; PW_CL->extend_jit=0; } else if ( strm ( mode,"glocal_pair")) { PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; sprintf (PW_CL->dp_mode, "glocal_pair_wise"); sprintf (PW_CL->matrix_for_aa_group, group_mat); PW_CL->L=NULL; PW_CL->get_dp_cost=slow_get_dp_cost; PW_CL->evaluate_residue_pair=evaluate_matrix_score; PW_CL->extend_jit=0; } else if ( strm ( mode,"test_pair")) { PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; sprintf (PW_CL->dp_mode, "test_pair_wise"); } else if ( strm ( mode,"sticky_pair")) { PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; PW_CL->L=NULL; PW_CL->get_dp_cost=cw_profile_get_dp_cost; PW_CL->evaluate_residue_pair=evaluate_matrix_score; PW_CL->extend_jit=0; sprintf (PW_CL->dp_mode, "gotoh_pair_wise_lgp_sticky"); sprintf (PW_CL->matrix_for_aa_group, group_mat); } else if ( strm ( mode,"slow_pair")|| strm (mode, "islow_pair" ) ) { PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; sprintf (PW_CL->dp_mode, "myers_miller_pair_wise"); sprintf (PW_CL->matrix_for_aa_group, group_mat); if ( strm ( "islow_pair", mode)) { PW_CL->get_dp_cost=slow_get_dp_cost; PW_CL->evaluate_residue_pair=residue_pair_extended_list; PW_CL->extend_jit=1; } else if ( strm ("slow_pair", mode) ) { PW_CL->L=NULL; PW_CL->get_dp_cost=cw_profile_get_dp_cost; PW_CL->evaluate_residue_pair=evaluate_matrix_score; PW_CL->extend_jit=0; } } else if ( strm (mode, "subop1_pair")) { PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; sprintf (PW_CL->dp_mode, "subop1_pair_wise"); sprintf (PW_CL->matrix_for_aa_group, group_mat); PW_CL->L=NULL; PW_CL->get_dp_cost=slow_get_dp_cost; PW_CL->evaluate_residue_pair=evaluate_matrix_score; PW_CL->extend_jit=0; } else if ( strm (mode, "biphasic_pair")) { PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; sprintf (PW_CL->dp_mode, "biphasic_pair_wise"); sprintf (PW_CL->matrix_for_aa_group, group_mat); PW_CL->L=NULL; PW_CL->get_dp_cost=cw_profile_get_dp_cost; PW_CL->evaluate_residue_pair=evaluate_matrix_score; PW_CL->extend_jit=0; } else if ( strm (mode, "proba_pair")) { PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; sprintf (PW_CL->dp_mode, "proba_pair_wise"); sprintf (PW_CL->matrix_for_aa_group, group_mat); PW_CL->L=NULL; PW_CL->get_dp_cost=slow_get_dp_cost; PW_CL->evaluate_residue_pair=evaluate_matrix_score; PW_CL->extend_jit=0; } else if ( strm (mode, "diaa_slow_pair")) { PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; sprintf (PW_CL->dp_mode, "gotoh_pair_wise_lgp"); sprintf (PW_CL->matrix_for_aa_group, group_mat); PW_CL->L=NULL; PW_CL->get_dp_cost=slow_get_dp_cost; PW_CL->evaluate_residue_pair=evaluate_diaa_matrix_score; PW_CL->extend_jit=0; } else if ( strm (mode, "r_slow_pair")) { PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; sprintf (PW_CL->dp_mode, "gotoh_pair_wise_lgp"); sprintf (PW_CL->matrix_for_aa_group, group_mat); PW_CL->L=NULL; PW_CL->get_dp_cost=slow_get_dp_cost; PW_CL->evaluate_residue_pair=evaluate_matrix_score; PW_CL->extend_jit=0; PW_CL->reverse_seq=1; } else if ( strm (mode, "tm_slow_pair")) { PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; sprintf (PW_CL->dp_mode, "myers_miller_pair_wise"); sprintf (PW_CL->matrix_for_aa_group, group_mat); PW_CL->L=NULL; PW_CL->get_dp_cost=slow_get_dp_cost; PW_CL->evaluate_residue_pair=evaluate_tm_matrix_score; PW_CL->extend_jit=0; } else if ( strm (mode, "monoaa_slow_pair")) { PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; sprintf (PW_CL->dp_mode, "gotoh_pair_wise_lgp"); sprintf (PW_CL->matrix_for_aa_group, group_mat); PW_CL->L=NULL; PW_CL->get_dp_cost=slow_get_dp_cost; PW_CL->evaluate_residue_pair=evaluate_monoaa_matrix_score; PW_CL->extend_jit=0; } else if ( strm (mode, "subop2_pair")) { PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; sprintf (PW_CL->dp_mode, "subop2_pair_wise"); sprintf (PW_CL->matrix_for_aa_group, group_mat); PW_CL->L=NULL; PW_CL->get_dp_cost=slow_get_dp_cost; PW_CL->evaluate_residue_pair=evaluate_matrix_score; PW_CL->extend_jit=0; } else if (strm ( mode, "exon2_pair")) { int a; PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; sprintf (PW_CL->dp_mode, "myers_miller_pair_wise"); sprintf (PW_CL->matrix_for_aa_group, group_mat); PW_CL->L=NULL; for ( a=0; a<60; a++) { PW_CL->M['x'-'A'][a]=0; PW_CL->M[a]['x'-'A']=0; PW_CL->M['X'-'A'][a]=0; PW_CL->M[a]['X'-'A']=0; } PW_CL->evaluate_residue_pair=evaluate_matrix_score; PW_CL->extend_jit=0; } else if (strm ( mode, "exon3_pair")) { int a; PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; sprintf (PW_CL->dp_mode, "myers_miller_pair_wise"); sprintf (PW_CL->matrix_for_aa_group, group_mat); PW_CL->L=NULL; for ( a=0; a<60; a++) { PW_CL->M['x'-'A'][a]=0; PW_CL->M[a]['x'-'A']=0; PW_CL->M['X'-'A'][a]=0; PW_CL->M[a]['X'-'A']=0; } PW_CL->evaluate_residue_pair=evaluate_matrix_score; PW_CL->extend_jit=0; } else if (strm ( mode, "exon_pair")) { int a; PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; sprintf (PW_CL->dp_mode, "myers_miller_pair_wise"); sprintf (PW_CL->matrix_for_aa_group, group_mat); PW_CL->L=NULL; for ( a=0; a<60; a++) { PW_CL->M['x'-'A'][a]=0; PW_CL->M[a]['x'-'A']=0; PW_CL->M['X'-'A'][a]=0; PW_CL->M[a]['X'-'A']=0; } PW_CL->evaluate_residue_pair=evaluate_matrix_score; PW_CL->extend_jit=0; } else if ( strm ( mode , "lalign_len_pair")) { PW_CL->L=NULL; PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; PW_CL->pair_wise=sim_pair_wise_lalign; PW_CL->evaluate_residue_pair=evaluate_matrix_score; PW_CL->get_dp_cost=slow_get_dp_cost; PW_CL->lalign_n_top=CL->lalign_n_top; sprintf (PW_CL->matrix_for_aa_group, group_mat); PW_CL->extend_jit=0; } else if ( strm ( mode , "lalign_id_pair")) { PW_CL->L=NULL; PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; PW_CL->pair_wise=sim_pair_wise_lalign; PW_CL->evaluate_residue_pair=evaluate_matrix_score; PW_CL->get_dp_cost=slow_get_dp_cost; PW_CL->lalign_n_top=CL->lalign_n_top; sprintf (PW_CL->matrix_for_aa_group, group_mat); PW_CL->extend_jit=0; } else if ( strm ( mode , "tm_lalign_id_pair")) { PW_CL->L=NULL; PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; PW_CL->pair_wise=sim_pair_wise_lalign; PW_CL->evaluate_residue_pair=evaluate_tm_matrix_score; PW_CL->get_dp_cost=slow_get_dp_cost; PW_CL->lalign_n_top=CL->lalign_n_top; sprintf (PW_CL->matrix_for_aa_group, group_mat); PW_CL->extend_jit=0; } /*CDNA*/ else if ( strm ( mode, "cdna_cfast_pair")) { PW_CL->L=NULL; PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->S=CL->S; PW_CL->use_fragments=0; sprintf (PW_CL->dp_mode, "cfasta_cdna_pair_wise"); PW_CL->M=read_matrice (strcpy ( mat, "blosum62mt")); PW_CL->extend_jit=0; PW_CL->f_gop=CL->f_gop; PW_CL->f_gep=CL->f_gep; PW_CL->get_dp_cost=get_dp_cost; PW_CL->evaluate_residue_pair=evaluate_cdna_matrix_score; PW_CL->ktup=1; } else if ( strm ( mode, "cdna_fast_pair") || strncmp (mode,"cdna_fast_pair",14)==0) { PW_CL->L=NULL; PW_CL->maximise=1; PW_CL->TG_MODE=1; PW_CL->use_fragments=0; sprintf (PW_CL->dp_mode, "fasta_cdna_pair_wise"); PW_CL->extend_jit=0; PW_CL->gop=-5; PW_CL->gep=-1; PW_CL->f_gop=-15; PW_CL->f_gep=0; PW_CL->get_dp_cost=get_dp_cost; PW_CL->evaluate_residue_pair=evaluate_cdna_matrix_score; PW_CL->ktup=1; } else { free_constraint_list (PW_CL); PW_CL=NULL; } if (!strm (CL->method_evaluate_mode, "default")) { choose_extension_mode (CL->method_evaluate_mode, PW_CL); } return PW_CL; } /******************************************************************/ /* MULTIPLE ALIGNMENTS */ /* */ /* */ /******************************************************************/ Alignment * compute_prrp_aln (Alignment *A, Constraint_list *CL) { char *tmpseq=NULL; char *tmpaln=NULL; char command[10000]; Sequence *S; tmpseq=vtmpnam(NULL); tmpaln=vtmpnam(NULL); A=seq2aln ( CL->S, A, 1); output_gotoh_seq (tmpseq, A); sprintf ( command, "prrp -E/dev/null -o%s -F9 %s >/dev/null", tmpaln, tmpseq); my_system (command); if (!check_file_exists(tmpaln)){return NULL;} S=get_fasta_sequence (tmpaln, NULL); S->contains_gap=0; A=seq2aln(S, A,0); free_sequence (S, S->nseq); return A; } Alignment *seq2clustalw_aln (Sequence *S) { return aln2clustalw_aln (seq2aln (S, NULL,RM_GAP), NULL); } Alignment * aln2clustalw_aln (Alignment *B, Constraint_list *CL) { char *seq=NULL,*aln=NULL, command[1000]; output_fasta_seq (seq=vtmpnam (NULL), B); sprintf ( command, "clustalw -infile=%s -outorder=input -outfile=%s %s", seq, aln=vtmpnam (NULL), TO_NULL_DEVICE); my_system (command); if (!check_file_exists(aln)) return NULL; else{B->nseq=0;return main_read_aln(aln,B);} } Alignment * compute_tcoffee_aln_quick (Alignment *A, Constraint_list *CL) { char *tmpseq=NULL; char *tmpaln=NULL; char command[10000]; tmpseq=vtmpnam(NULL); tmpaln=vtmpnam(NULL); if ( CL)A=seq2aln ( CL->S, A, 1); output_fasta_seq (tmpseq, A); sprintf ( command, "t_coffee -seq %s -very_fast -outfile %s -quiet ",tmpseq,tmpaln); my_system (command); if (!check_file_exists(tmpaln))return NULL; A->nseq=0; A=main_read_aln(tmpaln,A); vremove( tmpseq); vremove (tmpaln); return A; } Alignment * compute_clustalw_aln (Alignment *A, Constraint_list *CL) { char *tmpseq=NULL; char *tmpaln=NULL; char command[10000]; tmpseq=vtmpnam(NULL); tmpaln=vtmpnam(NULL); A=seq2aln ( CL->S, A, 1); output_fasta_seq (tmpseq, A); sprintf ( command, "clustalw %sinfile=%s %soutfile=%s %s",CWF, tmpseq,CWF, tmpaln,TO_NULL_DEVICE); my_system (command); if (!check_file_exists(tmpaln))return NULL; A->nseq=0; A=main_read_aln(tmpaln,A); vremove( tmpseq); vremove (tmpaln); return A; } Alignment * realign_block ( Alignment *A, int col1, int col2, char *pg) { /*Uses pg: (pg -infile= -outfile= to realign the block [col1 col2[ Only guaranteed if pg can handle empty sequences set pg to NULL to use the default program */ Alignment *L, *M, *R; char *seq_name; char *aln_name; char command[1000], script[1000]; seq_name=vtmpnam(NULL); aln_name=vtmpnam (NULL); L=copy_aln (A, NULL); M=copy_aln (A, NULL); R=copy_aln (A, NULL); L=extract_aln ( L, 0, col1); M=extract_aln ( M, col1, col2); R=extract_aln ( R, col2, A->len_aln); output_fasta_seq (seq_name, M); sprintf ( script, "%s", (pg==NULL)?"t_coffee":pg); sprintf ( command, "%s -infile=%s -outfile=%s %s", script,seq_name, aln_name, TO_NULL_DEVICE); my_system ( command); free_aln (M); M=main_read_aln (aln_name, NULL); M=reorder_aln (M, L->name,L->nseq); L=aln_cat (L, M); L=aln_cat (L, R); A=copy_aln (L, A); free_aln (L);free_aln (M); free_aln (R); return A; } /******************************************************************/ /* DNA */ /* */ /* */ /******************************************************************/ /******************************************************************/ /* STRUCTURES */ /* */ /* */ /******************************************************************/ Constraint_list * align_pdb_pair_2 (char *seq, Constraint_list *CL) { char *tmp_name=NULL; int s1, s2; static char *command; static char *program; tmp_name=vtmpnam ( NULL); if ( !program)program=vcalloc ( LONG_STRING, sizeof (char)); if ( !command)command=vcalloc ( LONG_STRING, sizeof (char)); #ifndef ALIGN_PDB_4_TCOFFEE if ( getenv ( "ALIGN_PDB_4_TCOFFEE")==NULL)crash ("ALIGN_PDB_4_TCOFFEE IS NOT DEFINED"); else sprintf ( program, "%s", (getenv ( "ALIGN_PDB_4_TCOFFEE"))); #else if ( getenv ( "ALIGN_4_TCOFFEE")==NULL)sprintf (program, "%s", ALIGN_PDB_4_TCOFFEE); else sprintf ( program, "%s", (getenv ( "ALIGN_PDB_4_TCOFFEE"))); #endif atoi(strtok (seq,SEPARATORS)); s1=atoi(strtok (NULL,SEPARATORS)); s2=atoi(strtok (NULL,SEPARATORS)); sprintf ( command , "%s -in P%s P%s -gapopen=-40 -max_delta=2.5 -gapext=0 -scale=0 -hasch_mode=hasch_ca_trace_bubble -maximum_distance=10 -output pdb_constraint_list -outfile stdout> %s%s",program, (CL->S)->file[s1], (CL->S)->file[s2], get_cache_dir(),tmp_name); my_system ( command); CL=read_constraint_list_file(CL, tmp_name); vremove ( tmp_name); return CL; } Constraint_list *align_pdb_pair (char *seq_in, char *dp_mode,char *evaluate_mode, char *file, Constraint_list *CL, Job_TC *job) { int s1, s2; char seq[1000]; char name1[1000]; char name2[1000]; Constraint_list *PWCL; Alignment *F; sprintf ( seq, "%s",seq_in); atoi(strtok (seq,SEPARATORS)); s1=atoi(strtok (NULL,SEPARATORS)); s2=atoi(strtok (NULL,SEPARATORS)); sprintf (name1, "%s%s_%s.%s.align_pdb", get_cache_dir(),(CL->S)->name[s1], (CL->S)->name[s2], dp_mode); sprintf (name2, "%s%s_%s.%s.align_pdb", get_cache_dir(),(CL->S)->name[s2], (CL->S)->name[s1], dp_mode); if ( check_file_exists (name1) && is_lib(name1))CL=read_constraint_list_file(CL,name1); else if ( check_file_exists (name2) && is_lib(name2))CL=read_constraint_list_file(CL,name2); else { PWCL=set_constraint_list4align_pdb ( CL,s1,dp_mode, evaluate_mode, NULL); PWCL=set_constraint_list4align_pdb ( CL,s2,dp_mode, evaluate_mode, NULL); ((job->param)->TCM)->PW_CL=PWCL; F=fast_pair (job); output_constraints (name1, "100", F); CL=aln2constraint_list (F, CL, "100"); free_aln (F); } return CL; } Constraint_list * profile_pair (TC_method *M , char *in_seq, Constraint_list *CL) { char seq[1000]; int a, s1, s2; char *result,*prf1_file, *prf2_file; Alignment *F=NULL, *A1, *A2; FILE *fp; char command[10000]; char *param; if ( M->executable2[0]=='\0') fprintf ( stderr, "\nERROR: profile_pair requires a method: thread_pair@EP@executable2@ [FATAL:%s]\n", PROGRAM); sprintf ( seq, "%s", in_seq); atoi(strtok (seq,SEPARATORS)); s1=atoi(strtok (NULL,SEPARATORS)); s2=atoi(strtok (NULL,SEPARATORS)); A1=seq2R_template_profile(CL->S,s1); A2=seq2R_template_profile(CL->S,s2); prf1_file=vtmpnam (NULL); fp=vfopen (prf1_file, "w"); if ( A1) { fprintf (fp, ">%s\n%s\n",(CL->S)->name[s1], aln2cons_seq_mat(A1, "blosum62mt")); for ( a=0; a< A1->nseq; a++)fprintf (fp, ">prf_seq1_%d\n%s\n", a, A1->seq_al[a]); } else { fprintf ( fp, ">%s\n%s\n", (CL->S)->name[s1], (CL->S)->seq[s1]); } vfclose (fp); prf2_file=vtmpnam (NULL); fp=vfopen (prf2_file, "w"); if (A2) { fprintf (fp, ">%s\n%s\n",(CL->S)->name[s2], aln2cons_seq_mat(A2, "blosum62mt")); for ( a=0; a< A2->nseq; a++)fprintf (fp, ">prf_seq2_%d\n%s\n", a, A2->seq_al[a]); } else { fprintf ( fp, ">%s\n%s\n", (CL->S)->name[s2], (CL->S)->seq[s2]); } vfclose (fp); result=vtmpnam (NULL); if ( M->param) { param=vcalloc(strlen (M->param)+1, sizeof (char)); sprintf ( param, "%s", M->param); param=substitute ( param, " ", ""); param=substitute ( param, "\n", ""); } sprintf ( command, "tc_generic_method.pl -mode=profile_pair -method=%s %s%s %s%s %s%s -param=%s -tmpdir=%s", M->executable2,M->in_flag,prf1_file, M->in_flag2,prf2_file,M->out_flag, result, param, get_tmp_4_tcoffee()); my_system ( command); if ( !check_file_exists (result)) { fprintf ( stderr, "\n\tprofile_pair/%s failed:\n\t%s\n",M->executable2, command); myexit (EXIT_FAILURE); } else if ( is_lib (result)) { CL=read_constraint_list_file(CL,result); } else if ( is_aln (result)) { F=main_read_aln (result, NULL); char *name1, *name2; name1=(CL->S)->name[s1]; name2=(CL->S)->name[s2]; fp=vfopen (result, "w"); for ( a=0; a< F->nseq; a++) if (strm ( F->name[a], name1) || strm (F->name[a], name2)) fprintf ( fp, ">%s\n%s\n", F->name[a], F->seq_al[a]); vfclose (fp); free_aln (F); F=main_read_aln (result, NULL); CL=aln2constraint_list (F, CL, "sim"); free_aln (F); } return CL; } Constraint_list * pdbid_pair (TC_method *M , char *in_seq, Constraint_list *CL) { char seq[1000]; char *alternative_method; char *current_,method; int s1, s2; char *result, *pdb1,*pdb1_file, *pdb2, *pdb2_file; Alignment *F=NULL; char *command; if ( M->executable2[0]=='\0') { fprintf ( stderr, "\nERROR: pdbid_pair requires a structural alignment method: pdb_pair@EP@EXECUTABLE2@ [FATAL:%s]\n", PROGRAM); myexit (EXIT_FAILURE); } sprintf ( seq, "%s", in_seq); atoi(strtok (seq,SEPARATORS)); s1=atoi(strtok (NULL,SEPARATORS)); s2=atoi(strtok (NULL,SEPARATORS)); pdb1=seq2P_pdb_id(CL->S,s1); pdb2=seq2P_pdb_id(CL->S,s2); if (!is_pdb_name (pdb1) || !is_pdb_name(pdb2)) { return CL; } result=vtmpnam (NULL); command = vcalloc ( 1000, sizeof (char)); sprintf ( command, "tc_generic_method.pl -mode=pdbid_pair -method=%s %s%s %s%s %s%s -email=%s -cache=%s -tmpdir=%s", M->executable2,M->in_flag,pdb1, M->in_flag2,pdb2,M->out_flag, result, Email(ENV,SET),get_cache_dir(), get_tmp_4_tcoffee()); my_system ( command); vfree (command); if (file_is_empty (result))return CL; else { F=main_read_aln (result, NULL); if ( !F) { fprintf ( stderr, "\n\tpdb_pair/%s failed:\n\t%s\n",M->executable2, command); } else { sprintf ( F->name[0],"%s", (CL->S)->name[s1]); sprintf ( F->name[1],"%s", (CL->S)->name[s2]); F=fix_aln_seq (F, CL->S); CL=aln2constraint_list (F, CL, "sim"); } free_aln (F); } return CL; } Constraint_list * pdb_pair (TC_method *M , char *in_seq, Constraint_list *CL) { char seq[1000]; int s1, s2; char *result, *pdb1,*pdb1_file, *pdb2, *pdb2_file; Alignment *F=NULL; char command[10000]; if ( M->executable2[0]=='\0') { fprintf ( stderr, "\nERROR: pdb_pair requires a structural alignment method: pdb_pair@EP@EXECUTABLE2@ [FATAL:%s]\n", PROGRAM); myexit (EXIT_FAILURE); } sprintf ( seq, "%s", in_seq); atoi(strtok (seq,SEPARATORS)); s1=atoi(strtok (NULL,SEPARATORS)); s2=atoi(strtok (NULL,SEPARATORS)); pdb1=seq2P_template_file(CL->S,s1); pdb2=seq2P_template_file(CL->S,s2); if ( !pdb1 || !pdb2) return CL; pdb1_file=vtmpnam (NULL); pdb2_file=vtmpnam (NULL); sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb1, pdb1_file); my_system (command); sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb2, pdb2_file); my_system (command); result=vtmpnam (NULL); sprintf ( command, "tc_generic_method.pl -mode=pdb_pair -method=%s %s%s %s%s %s%s -tmpdir=%s", M->executable2,M->in_flag,pdb1_file, M->in_flag2,pdb2_file,M->out_flag, result, get_tmp_4_tcoffee()); my_system ( command); F=main_read_aln (result, NULL); if ( !F) { fprintf ( stderr, "\n\tpdb_pair/%s failed:\n\t%s\n",M->executable2, command); } else { sprintf ( F->name[0],"%s", (CL->S)->name[s1]); sprintf ( F->name[1],"%s", (CL->S)->name[s2]); F=fix_aln_seq (F, CL->S); CL=aln2constraint_list (F, CL, "sim"); } free_aln (F); return CL; } Constraint_list * seq_msa (TC_method *M , char *in_seq, Constraint_list *CL) { char seq[1000]; char *infile, *outfile; int a, n, s; Alignment *F=NULL; FILE *fp; char command[1000]; infile=vtmpnam (NULL); outfile=vtmpnam (NULL); sprintf ( seq, "%s", in_seq); n=atoi(strtok (seq,SEPARATORS)); fp=vfopen (infile, "w"); for ( a=0; a%s\n%s\n", (CL->S)->name[s], (CL->S)->seq[s]); } vfclose (fp); sprintf ( command, "t_coffee -other_pg tc_generic_method.pl -mode=seq_msa -method=%s %s%s %s%s -tmpdir=%s", M->executable2, M->in_flag, infile, M->out_flag, outfile, get_tmp_4_tcoffee()); my_system (command); if ( strm (M->out_mode, "aln") || strm (M->out_mode, "A")) { F=main_read_aln (outfile, NULL); if ( !F) { fprintf ( stderr, "\n\tseq_msa/%s failed:\n\t%s\n", M->executable2,command); } else { CL=aln2constraint_list (F, CL, "sim"); } free_aln (F); } else if ( strm (M->out_mode, "fL")|| strm (M->out_mode, "lib")) { Constraint_list *NCL; NCL=read_constraint_list_file(CL,outfile); if ( !NCL) { fprintf ( stderr, "\n\tseq_msa/%s failed:\n\t%s\n", M->executable2,command); } else { CL=NCL; } } return CL; } Constraint_list * thread_pair (TC_method *M , char *in_seq, Constraint_list *CL) { char seq[1000]; int s1, s2; if ( M->executable2[0]=='\0') { fprintf ( stderr, "\nERROR: thread_pair requires a threading method: pdb_pair@EP@EXECUTABLE2@ [FATAL:%s]\n", PROGRAM); myexit (EXIT_FAILURE); } sprintf ( seq, "%s", in_seq); atoi(strtok (seq,SEPARATORS)); s1=atoi(strtok (NULL,SEPARATORS)); s2=atoi(strtok (NULL,SEPARATORS)); CL=thread_pair2(M,s1, s2, CL); CL=thread_pair2(M,s2, s1, CL); return CL; } Constraint_list* thread_pair2 ( TC_method *M, int s1, int s2, Constraint_list *CL) { char *result, *pep, *pdb, *pdb1; Alignment *F=NULL; Sequence *STL; FILE *fp; char command[10000]; STL=(CL)?CL->STRUC_LIST:NULL; if ( !(CL->S) || !((CL->S)->T[s1]) || !((CL->S)->T[s1])->P || !seq2P_template_file(CL->S,s1))return CL; else pdb1=seq2P_template_file(CL->S,s1); pdb=vtmpnam (NULL); result=vtmpnam (NULL); pep=vtmpnam (NULL); sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb1, pdb); my_system (command); fp=vfopen (pep, "w"); fprintf ( fp, ">%s\n%s\n",(CL->S)->name[s2],(CL->S)->seq[s2] ); vfclose (fp); sprintf ( command, "tc_generic_method.pl -mode=thread_pair -method=%s %s%s %s%s %s%s -tmpdir=%s", M->executable2,M->in_flag,pep, M->in_flag2,pdb,M->out_flag, result, get_tmp_4_tcoffee()); my_system ( command); F=main_read_aln (result, NULL); if ( !F) { fprintf ( stderr, "\n\tthread_pair/%s failed:\n\t%s\n", M->executable2,command); } else { sprintf ( F->name[0],"%s", (CL->S)->name[s1]); sprintf ( F->name[1],"%s", (CL->S)->name[s2]); F=fix_aln_seq (F, CL->S); CL=aln2constraint_list (F, CL, "sim"); } free_aln (F); return CL; } Constraint_list * lsqman_pair ( char *in_seq, Constraint_list *CL) { FILE *fp; static CLIST_TYPE *entry; char command[STRING]; char seq[1000]; int s1, s2; char *seq_file, *lsqman_result, *tmp_name1; Alignment *F=NULL; int n_failure=0; sprintf ( seq, "%s", in_seq); if ( !entry)entry=vcalloc ( LIST_N_FIELDS, sizeof ( CLIST_TYPE )); atoi(strtok (seq,SEPARATORS)); s1=atoi(strtok (NULL,SEPARATORS)); s2=atoi(strtok (NULL,SEPARATORS)); tmp_name1=vcalloc (100, sizeof (char)); sprintf ( tmp_name1, "%s_%s.lsqman_aln", (CL->S)->name[s1], (CL->S)->name[s2]); if ( check_file_exists ( tmp_name1) && (F=main_read_aln(tmp_name1, NULL))!=NULL) { free_aln(F); lsqman_result=tmp_name1; } else { seq_file=vtmpnam (NULL); lsqman_result=tmp_name1; fp=vfopen (seq_file, "w"); fprintf ( fp, ">%s\n%s\n",(CL->S)->name[s1],(CL->S)->seq[s2] ); vfclose (fp); sprintf ( command, "%s -pdb %s -pep %s > %s%s", LSQMAN_4_TCOFFEE, (CL->S)->name[s1], seq_file,get_cache_dir(), lsqman_result); while (!F) { my_system ( command); F=main_read_aln (lsqman_result, NULL); if ( !F) { fprintf ( stderr, "\n\tlsqman failed: will be retried"); if ( n_failure==0)fprintf ( stderr, "\n\t%s", command); n_failure++; if ( n_failure==10) { fprintf ( stderr, "\nCould not run Fugue: will replace it with slow_pair\n"); vremove (lsqman_result); return NULL; } } free_aln (F); } vremove( seq_file); } F=main_read_aln(lsqman_result, NULL); /*sprintf ( F->name[0],"%s", (CL->S)->name[s1]); sprintf ( F->name[1],"%s", (CL->S)->name[s2]); */ F=fix_aln_seq (F, CL->S); CL=aln2constraint_list (F, CL, "100"); free_aln (F); return CL; } Constraint_list * sap_pair (char *seq, char *weight, Constraint_list *CL) { register int a; FILE *fp; char full_name[FILENAMELEN]; char *tmp_pdb1, *tmp_pdb2; char *sap_seq1, *sap_seq2; char *sap_lib, *tmp_name, *tmp_name1, *tmp_name2; char *buf=NULL; int s1, s2, r1=0, r2=0; int sim=0, tot=0, score=0; char program[STRING]; char *string1, *string2, *string3, *string4, *string5; int max_struc_len=10000; char *template1, *template2; /*check_program_is_installed ( "sap" ,SAP_4_TCOFFEE, "SAP_4_TCOFFEE",MAIL, IS_FATAL);*/ atoi(strtok (seq,SEPARATORS)); s1=atoi(strtok (NULL,SEPARATORS)); s2=atoi(strtok (NULL,SEPARATORS)); template1=seq2T_value(CL->S,s1, "template_name", "_P_"); template2=seq2T_value(CL->S,s2, "template_name", "_P_"); if (!template1 || !template2) return CL; declare_name (string1); declare_name (string2); declare_name (string3); declare_name (string4); declare_name (string5); #ifndef SAP_4_TCOFFEE if ( getenv ( "SAP_4_TCOFFEE")==NULL)crash ("SAP_4_TCOFFEE IS NOT DEFINED"); else sprintf ( program, "%s", (getenv ( "SAP_4_TCOFFEE"))); #else if ( getenv ( "SAP_4_TCOFFEE")==NULL)sprintf (program, "%s", SAP_4_TCOFFEE); else sprintf ( program, "%s", (getenv ( "SAP_4_TCOFFEE"))); #endif tmp_name1=vcalloc (100, sizeof (char)); sprintf ( tmp_name1, "%s_%s.sap_results",template1,template2); tmp_name2=vcalloc (100, sizeof (char)); sprintf ( tmp_name2, "%s_%s.sap_results",template2,template1); if (is_sap_file (tmp_name1)) { tmp_name=tmp_name1; } else if ( is_sap_file (tmp_name2)) { tmp_name=tmp_name2; SWAP (s1, s2); } else { tmp_name=tmp_name1; tmp_pdb1=normalize_pdb_file(seq2P_template_file(CL->S,s1),(CL->S)->seq[s1], vtmpnam (NULL)); tmp_pdb2=normalize_pdb_file(seq2P_template_file(CL->S,s2),(CL->S)->seq[s2], vtmpnam (NULL)); sprintf ( full_name, "%s%s", get_cache_dir (), tmp_name); printf_system ("%s %s %s >%s",program,tmp_pdb1,tmp_pdb2, full_name); if ( !check_file_exists (full_name) || !is_sap_file(full_name)) { add_warning ( stderr, "WARNING: SAP failed to align: %s against %s [%s:WARNING]\n", seq2P_template_file(CL->S,s1),seq2P_template_file(CL->S,s2), PROGRAM); if ( check_file_exists (full_name))add2file2remove_list (full_name); return CL; } if ( flag_file2remove_is_on())add2file2remove_list (full_name); remove ("super.pdb"); } sap_seq1=vcalloc (max_struc_len, sizeof (char)); sap_seq2=vcalloc (max_struc_len, sizeof (char)); fp=find_token_in_file ( tmp_name, NULL, "Percent"); fp=find_token_in_file ( tmp_name, fp , "Percent"); while ( (fgetc (fp))!='\n'); while ((buf=vfgets (buf, fp))) { if ( !strstr (buf, "eighted")) { remove_charset (buf, "!alnum"); r1=buf[0]; r2=buf[strlen(buf)-1]; } sim+=(r1==r2)?1:0; if ( tot>max_struc_len) {max_struc_len+=max_struc_len; sap_seq1=vrealloc ( sap_seq1, sizeof(char)*max_struc_len); sap_seq2=vrealloc ( sap_seq2, sizeof(char)*max_struc_len); } sap_seq1[tot]=r1; sap_seq2[tot]=r2; tot++; } sim=(sim*100)/tot; if ( is_number (weight))score=atoi(weight); else if ( strstr ( weight, "OW")) { int ow; sscanf ( weight, "OW%d", &ow); score=sim*ow; } else { score=sim; } vfclose (fp); sap_seq1[tot]=sap_seq2[tot]='\0'; fp=vfopen ( sap_lib=vtmpnam(NULL), "w"); fprintf (fp, "! TC_LIB_FORMAT_01\n"); fprintf (fp, "2\n"); fprintf (fp, "%s %d %s\n", (CL->S)->name[s2],(int)strlen (sap_seq1), sap_seq1); fprintf (fp, "%s %d %s\n", (CL->S)->name[s1],(int)strlen (sap_seq2), sap_seq2); fprintf (fp, "#1 2\n"); for ( a=0; a< tot; a++) { fprintf (fp, "%d %d %d 1 0\n", a+1, a+1, score); } fprintf (fp, "! CPU 0\n"); fprintf (fp, "! SEQ_1_TO_N\n"); vfclose (fp); CL=read_constraint_list_file(CL,sap_lib); vremove (sap_lib); vfree ( string1);vfree ( string2);vfree ( string3);vfree ( string4);vfree ( string5); vfree (sap_seq1); vfree(sap_seq2);vfree (tmp_name1); vfree(tmp_name2); vfree (buf); return CL; } Constraint_list *rna_pair (TC_method *M , char *in_seq, Constraint_list *CL) { char seq[1000]; int s1, s2; char *result, *pdb1, *pdb2, *pdb1_file, *pdb2_file; Alignment *F=NULL; char command[10000]; if ( M->executable2[0]=='\0') { fprintf ( stderr, "\nERROR: rna_pair requires a structural alignment method: pdb_pair@EP@EXECUTABLE2@ [FATAL:%s]\n", PROGRAM); myexit (EXIT_FAILURE); } sprintf ( seq, "%s", in_seq); atoi(strtok (seq,SEPARATORS)); s1=atoi(strtok (NULL,SEPARATORS)); s2=atoi(strtok (NULL,SEPARATORS)); pdb1=seq2P_template_file(CL->S,s1); pdb1_file=vtmpnam (NULL); sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb1, pdb1_file); my_system (command); // pdb2=seq2P_template_file(CL->S,s2); pdb2_file=vtmpnam (NULL); sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb2, pdb2_file); my_system (command); result=vtmpnam (NULL); sprintf ( command, "tc_generic_method.pl -mode=rna_pair -method=%s %s%s %s%s %s%s -tmpdir=%s", M->executable2,M->in_flag,pdb1_file, M->in_flag2,pdb2_file,M->out_flag, result, get_tmp_4_tcoffee()); my_system ( command); F=main_read_aln (result, NULL); if ( !F) { fprintf ( stderr, "\n\trna_pair/%s failed:\n\t%s\n",M->executable2, command); } else { sprintf ( F->name[0],"%s", (CL->S)->name[s1]); sprintf ( F->name[1],"%s", (CL->S)->name[s2]); F=fix_aln_seq (F, CL->S); CL=aln2constraint_list (F, CL, "sim"); } free_aln (F); return CL; } /******************************************************************/ /* GENERIC PAIRWISE METHODS */ /* */ /* */ /******************************************************************/ Constraint_list * best_pair4rna(Job_TC *job) { int n,a; static char *seq; Alignment *A; Constraint_list *PW_CL; Constraint_list *CL, *RCL; char *seq_in; Sequence *S; TC_method *M, *sara_pairM, *proba_pairM; int*seqlist; int id, s1, s2; Template *T1, *T2; int ml=0; struct X_template *r1, *r2, *p1, *p2; static int **blosum; if (!seq)seq=vcalloc (100, sizeof (char)); A=(job->io)->A; M=(job->param)->TCM; CL=(job->io)->CL; S=CL->S; for (a=0; anseq; a++)ml=MAX(ml, strlen (S->name[a])); if ( !strm ( retrieve_seq_type(), "RNA") ) printf_exit (EXIT_FAILURE, stderr, "ERROR: RNA Sequences Only with best4rna_pair [FATAL:%s]\n",PROGRAM); seq_in=(job->param)->seq_c; sprintf (seq, "%s", seq_in); seqlist=string2num_list (seq); n=seqlist[1]; if ( n!=2){fprintf ( stderr, "\nERROR: best_pair can only handle two seq at a time [FATAL]\n");myexit (EXIT_FAILURE);} s1=seqlist[2]; s2=seqlist[3]; T1=S->T[s1]; T2=S->T[s2]; r1=T1->R; r2=T2->R; p1=T1->P; p2=T2->P; PW_CL=((job->param)->TCM)->PW_CL; CL=(job->io)->CL; if (!blosum)blosum=read_matrice ("blosum62mt"); // id=idscore_pairseq (S->seq[s1], S->seq[s2],-10,-1,blosum, "sim"); proba_pairM=method_file2TC_method(method_name2method_file ("proba_pair")); proba_pairM->PW_CL=method2pw_cl(proba_pairM, CL); sara_pairM=method_file2TC_method(method_name2method_file ("sara_pair")); sara_pairM->PW_CL=method2pw_cl(sara_pairM, CL); if ( p1 && p2) { //Avoid Structural Tem T1->R=NULL; T2->R=NULL; fprintf ( stderr, "\n\t%-*s %-*s: Structure Based Alignment\n", ml,S->name[s1], ml,S->name[s2]); (job->param)->TCM=sara_pairM; } else { fprintf ( stderr, "\n\t%-*s %-*s: Direct Sequence Alignment\n", ml,S->name[s1], ml,S->name[s2]); (job->param)->TCM=proba_pairM; } RCL=seq2list (job); T1->R=r1; T2->R=r2; return RCL; } Constraint_list * best_pair4prot (Job_TC *job) { int n,a; static char *seq; Alignment *A; Constraint_list *PW_CL; Constraint_list *CL, *RCL; char *seq_in; Sequence *S; TC_method *M, *sap_pairM, *proba_pairM; int*seqlist; int id, s1, s2; Template *T1, *T2; int ml=0; struct X_template *r1, *r2, *p1, *p2; static int **blosum; if (!seq)seq=vcalloc (100, sizeof (char)); A=(job->io)->A; M=(job->param)->TCM; CL=(job->io)->CL; S=CL->S; for (a=0; anseq; a++)ml=MAX(ml, strlen (S->name[a])); if ( strm ( retrieve_seq_type(), "DNA") ||strm ( retrieve_seq_type(), "RNA") )printf_exit (EXIT_FAILURE, stderr, "ERROR: Protein Sequences Only with bestprot_pair [FATAL:%s]\n",PROGRAM); seq_in=(job->param)->seq_c; sprintf (seq, "%s", seq_in); seqlist=string2num_list (seq); n=seqlist[1]; if ( n!=2){fprintf ( stderr, "\nERROR: best_pair can only handle two seq at a time [FATAL]\n");myexit (EXIT_FAILURE);} s1=seqlist[2]; s2=seqlist[3]; T1=S->T[s1]; T2=S->T[s2]; r1=T1->R; r2=T2->R; p1=T1->P; p2=T2->P; PW_CL=((job->param)->TCM)->PW_CL; CL=(job->io)->CL; if (!blosum)blosum=read_matrice ("blosum62mt"); id=idscore_pairseq (S->seq[s1], S->seq[s2],-10,-1,blosum, "sim"); proba_pairM=method_file2TC_method(method_name2method_file ("proba_pair")); proba_pairM->PW_CL=method2pw_cl(proba_pairM, CL); sap_pairM=method_file2TC_method(method_name2method_file ("sap_pair")); sap_pairM->PW_CL=method2pw_cl(sap_pairM, CL); if ( id>80) { //Hide The Template T1->R=NULL; T2->R=NULL; fprintf ( stderr, "\n\t%-*s %-*s: Direct Sequence Alignment\n", ml,S->name[s1], ml,S->name[s2]); (job->param)->TCM=proba_pairM; } else if ( p1 && p2) { //Avoid Structural Tem T1->R=NULL; T2->R=NULL; fprintf ( stderr, "\n\t%-*s %-*s: Structure Based Alignment\n", ml,S->name[s1], ml,S->name[s2]); (job->param)->TCM=sap_pairM; } else if ( r1 || r2) { fprintf ( stderr, "\n\tt%-*s %-*s: PSIBLAST Profile Alignment\n", ml,S->name[s1], ml,S->name[s2]); (job->param)->TCM=proba_pairM; } else { fprintf ( stderr, "\n\t%-*s %-*s: Direct Sequence Alignment (No Profile)\n", ml,S->name[s1], ml,S->name[s2]); (job->param)->TCM=proba_pairM; } RCL=seq2list (job); T1->R=r1; T2->R=r2; return RCL; } Alignment * fast_pair (Job_TC *job) { int s, n,a; static int **l_s; static int *ns; char seq[1000]; Alignment *A; Constraint_list *PW_CL; Constraint_list *CL; char *seq_in; Sequence *S; TC_method *M; int*seqlist; char **buf; A=(job->io)->A; M=(job->param)->TCM; PW_CL=((job->param)->TCM)->PW_CL; CL=(job->io)->CL; seq_in=(job->param)->seq_c; sprintf (seq, "%s", seq_in); seqlist=string2num_list (seq); n=seqlist[1]; if ( n!=2){fprintf ( stderr, "\nERROR: fast_pw_aln can only handle two seq at a time [FATAL]\n");myexit (EXIT_FAILURE);} S=(CL)->S; if (!A) {A=declare_aln (CL->S);} if ( !ns) { ns=vcalloc ( 2, sizeof (int)); l_s=declare_int (2,(CL->S)->nseq); } buf=vcalloc ( S->nseq, sizeof (char*)); for ( a=0; a< n; a++) { s=seqlist[a+2]; if ( strm (M->seq_type, "G")) { buf[s]=S->seq[s]; S->seq[s]=((((S->T[s])->G)->VG)->S)->seq[0]; } else buf[s]=S->seq[s]; sprintf ( A->seq_al[a], "%s",S->seq[s]); sprintf ( A->name[a], "%s", (CL->S)->name[s]); A->order[a][0]=s; } A->S=CL->S; PW_CL->S=CL->S; A->CL=CL; ns[0]=ns[1]=1; l_s[0][0]=0; l_s[1][0]=1; if (PW_CL->reverse_seq) { invert_string2(A->seq_al[0]); invert_string2(A->seq_al[1]); invert_string2 ((CL->S)->seq[A->order[0][0]]); invert_string2 ((CL->S)->seq[A->order[1][0]]); } pair_wise ( A, ns, l_s, PW_CL); if (PW_CL->reverse_seq) { invert_string2(A->seq_al[0]); invert_string2(A->seq_al[1]); invert_string2 ((CL->S)->seq[A->order[0][0]]); invert_string2 ((CL->S)->seq[A->order[1][0]]); } A->nseq=n; for ( a=0; anseq; a++) { if ( !buf[a] || buf[a]==S->seq[a]); else S->seq[a]=buf[a]; } vfree (buf);vfree (seqlist); return A; } Alignment * align_two_aln ( Alignment *A1, Alignment *A2, char *in_matrix, int gop, int gep, char *in_align_mode) { Alignment *A=NULL; Constraint_list *CL; Sequence *S; int a; int *ns; int **ls; static char *matrix; static char *align_mode; if (!matrix)matrix=vcalloc ( 100, sizeof (char)); if (!align_mode)align_mode=vcalloc ( 100, sizeof (char)); sprintf ( matrix, "%s", in_matrix); sprintf ( align_mode, "%s", in_align_mode); CL=vcalloc ( 1, sizeof (Constraint_list)); CL->pw_parameters_set=1; CL->M=read_matrice (matrix); CL->matrices_list=declare_char (10, 10); CL->evaluate_residue_pair=evaluate_matrix_score; CL->get_dp_cost=consensus_get_dp_cost; CL->normalise=1; CL->extend_jit=0; CL->maximise=1; CL->gop=gop; CL->gep=gep; CL->TG_MODE=2; sprintf (CL->matrix_for_aa_group, "vasiliky"); CL->use_fragments=0; CL->ktup=5; if ( !CL->use_fragments)CL->diagonal_threshold=0; else CL->diagonal_threshold=6; sprintf (CL->dp_mode, "%s", align_mode); A=copy_aln (A1, A); A=stack_aln (A, A2); CL->S=fill_sequence_struc(A->nseq, A->seq_al,A->name); ns=vcalloc ( 2, sizeof(int)); ls=declare_int ( 2,A->nseq); ns[0]=A1->nseq; ns[1]=A2->nseq; for ( a=0; anseq; A->score_aln=pair_wise (A, ns, ls,CL); vfree (ns); free_int (ls, -1); S=free_constraint_list (CL); free_sequence (S,-1); A->S=NULL; return A; } static int align_two_seq_keep_case; void toggle_case_in_align_two_sequences(int value) { align_two_seq_keep_case=value; } Alignment * align_two_sequences ( char *seq1, char *seq2, char *in_matrix, int gop, int gep, char *in_align_mode) { static Alignment *A; Constraint_list *CL; Sequence *S; int *ns; int **l_s; char **seq_array; char **name_array; static char *matrix; static int **M; static char *align_mode; if (!matrix)matrix=vcalloc ( 100, sizeof (char)); if (!align_mode)align_mode=vcalloc ( 100, sizeof (char)); sprintf ( align_mode, "%s", in_align_mode); CL=vcalloc ( 1, sizeof (Constraint_list)); CL->pw_parameters_set=1; CL->matrices_list=declare_char (10, 10); if ( !strm (matrix, in_matrix)) { sprintf ( matrix,"%s", in_matrix); M=CL->M=read_matrice (matrix); } else { CL->M=M; } if (strstr (in_align_mode, "cdna")) CL->evaluate_residue_pair=evaluate_cdna_matrix_score; else CL->evaluate_residue_pair=evaluate_matrix_score; CL->get_dp_cost=get_dp_cost; CL->extend_jit=0; CL->maximise=1; CL->gop=gop; CL->gep=gep; CL->TG_MODE=2; sprintf (CL->matrix_for_aa_group, "vasiliky"); CL->use_fragments=0; CL->ktup=3; if ( !CL->use_fragments)CL->diagonal_threshold=0; else CL->diagonal_threshold=6; sprintf (CL->dp_mode, "%s", align_mode); seq_array=declare_char ( 2, MAX(strlen(seq1), strlen (seq2))+1); sprintf (seq_array[0], "%s",seq1); sprintf (seq_array[1],"%s", seq2); ungap_array(seq_array,2); if (align_two_seq_keep_case !=KEEP_CASE)string_array_lower(seq_array,2); name_array=declare_char (2, STRING); sprintf ( name_array[0], "A"); sprintf ( name_array[1], "B"); ns=vcalloc ( 2, sizeof(int)); l_s=declare_int ( 2, 1); ns[0]=ns[1]=1; l_s[0][0]=0; l_s[1][0]=1; CL->S=fill_sequence_struc(2, seq_array, name_array); A=seq2aln(CL->S, NULL, 1); ungap (A->seq_al[0]); ungap (A->seq_al[1]); A->score_aln=pair_wise (A, ns, l_s,CL); vfree (ns); free_int (l_s, -1); free_char (name_array, -1);free_char ( seq_array,-1); CL->M=NULL; S=free_constraint_list (CL); free_sequence (S,-1); A->S=NULL; return A; } NT_node make_root_tree ( Alignment *A,Constraint_list *CL,int gop, int gep,Sequence *S, char *tree_file,int maximise) { NT_node **T=NULL; T=make_tree (A, CL, gop, gep,S,tree_file,maximise); (T[3][0])->nseq=S->nseq; return T[3][0]; } NT_node ** make_tree ( Alignment *A,Constraint_list *CL,int gop, int gep,Sequence *S, char *tree_file,int maximise) { int a, b, ra, rb; NT_node **T=NULL; int **distances; int out_nseq; char **out_seq_name; char **out_seq; if ( !CL || !CL->tree_mode || !CL->tree_mode[0]) { fprintf ( stderr, "\nERROR: No CL->tree_mode specified (make_tree::util_dp_drivers.c [FATAL:%s]", PROGRAM); myexit (EXIT_FAILURE); } else fprintf (CL->local_stderr , "\nMAKE GUIDE TREE \n\t[MODE=%s][",CL->tree_mode); if ( A->nseq==2) { int tot_node; char *tmp; FILE *fp; fprintf (CL->local_stderr, "---Two Sequences Only: Make Dummy Pair-Tree ---]"); tmp=vtmpnam (NULL); fp=vfopen (tmp,"w"); fprintf ( fp, "(%s:0.1, %s:0.1):0.1;\n",S->name[0], S->name[1]); vfclose (fp); T=read_tree (tmp, &tot_node, (CL->S)->nseq,(CL->S)->name); return T; } else if ( strm (CL->tree_mode, "cwph")) { return seq2cw_tree ( S, tree_file); } else if (strm ( CL->tree_mode, "upgma") || strm ( CL->tree_mode, "nj")) { out_nseq=S->nseq; out_seq_name=S->name; out_seq=S->seq; CL->DM=cl2distance_matrix (CL, NOALN,NULL,NULL,0); if ( CL->S!=S) { /*Shrink the distance matrix so that it only contains the required sequences*/ distances=declare_int (S->nseq, S->nseq); for (a=0; a< S->nseq; a++) { ra=name_is_in_list ((S)->name[a],(CL->S)->name, (CL->S)->nseq, 100); for ( b=0; b< S->nseq; b++) { rb=name_is_in_list ((S)->name[b],(CL->S)->name, (CL->S)->nseq, 100); distances[a][b]=(CL->DM)->score_similarity_matrix[ra][rb]; } } } else { distances=duplicate_int ( (CL->DM)->score_similarity_matrix, -1, -1); } distances=sim_array2dist_array (distances, MAXID*SCORE_K); distances=normalize_array (distances, MAXID*SCORE_K, 100); if ( strm (CL->tree_mode, "order")) { for ( a=0; a< S->nseq; a++) for ( b=0; b< S->nseq; b++) distances[b][a]=100; T=make_nj_tree (A,distances,gop,gep,out_seq,out_seq_name,out_nseq, tree_file, CL->tree_mode); } else if ( strm (CL->tree_mode, "nj")) { T=make_nj_tree (A,distances,gop,gep,out_seq,out_seq_name,out_nseq, tree_file, CL->tree_mode); } else if ( strm (CL->tree_mode, "upgma")) T=make_upgma_tree (A,distances,gop,gep,out_seq,out_seq_name,out_nseq, tree_file, CL->tree_mode); else { printf_exit (EXIT_FAILURE, stderr, "ERROR: %s is an unknown tree computation mode [FATAL:%s]", CL->tree_mode, PROGRAM); } free_int (distances, out_nseq); } fprintf (CL->local_stderr , "DONE]\n"); return T; } Alignment *recompute_local_aln (Alignment *A, Sequence *S,Constraint_list *CL, int scale, int gep) { int **coor; int a; Alignment *B; sort_constraint_list (CL, 0, CL->ne); coor=declare_int (A->nseq, 3); for ( a=0; a< A->nseq; a++) { coor[a][0]=A->order[a][0]; coor[a][1]=A->order[a][1]+1; coor[a][2]=strlen(S->seq[A->order[a][0]])-coor[a][1]; } B=stack_progressive_nol_aln_with_seq_coor(CL,0,0,S,coor,A->nseq); A=copy_aln ( B, A); CL=compact_list (CL, 0,CL->ne, "shrink"); free_Alignment(B); return A; } Alignment *stack_progressive_nol_aln_with_seq_coor(Constraint_list *CL,int gop, int gep,Sequence *S, int **seq_coor, int nseq) { static int ** local_coor1; static int ** local_coor2; if ( local_coor1!=NULL)free_int (local_coor1, -1); if ( local_coor2!=NULL)free_int (local_coor2, -1); local_coor1=get_nol_seq ( CL,seq_coor, nseq, S); local_coor2=minimise_repeat_coor ( local_coor1, nseq, S); return stack_progressive_aln_with_seq_coor(CL,gop, gep,S, local_coor2,nseq); } Alignment *stack_progressive_aln_with_seq_coor (Constraint_list*CL,int gop, int gep, Sequence *S, int **coor, int nseq) { Alignment *A=NULL; A=seq_coor2aln (S,NULL, coor, nseq); return stack_progressive_aln ( A,CL, gop, gep); } Alignment *est_progressive_aln(Alignment *A, Constraint_list *CL, int gop, int gep) { int a,n; int**group_list; int *n_groups; char *seq; n_groups=vcalloc ( 2, sizeof (int)); group_list=declare_int ( 2, A->nseq); n=A->nseq; n_groups[0]=1; n_groups[1]=1; group_list[0][0]=0; group_list[0][1]=1; group_list[1][0]=1; fprintf ( stderr, "\n"); for ( a=1; aseq_al[1], "%s", A->seq_al[a]); fprintf ( stderr, "\t[%30s]->[len=%5d]", A->name[a],(int)strlen ( A->seq_al[0])); pair_wise ( A,n_groups, group_list, CL); seq=dna_aln2cons_seq(A); sprintf ( A->seq_al[0], "%s", seq); vfree (seq); fprintf ( stderr, "\n"); } A->nseq=1; return A; } void analyse_seq ( Alignment *A, int s) { int a, b, c; int r; int len=0; int state=0; int pstate=-1; float score=0; for ( a=0; a< A->len_aln; a++) { for ( b=0, c=0; b< s; b++) if ( !is_gap(A->seq_al[b][a])){c=1; break;} r=!is_gap(A->seq_al[s][a]); if ( r && c) state=1; else if ( !r && !c) state=2; else if ( !r && c) state=3; else if ( r && !c) state=4; if ( state !=pstate) { score+=len*len; len=0; } len+=r; pstate=state; } score=score/(float)(((A->S)->len[s]*(A->S)->len[s])); fprintf ( stderr, "[%.2f]", score); return; } Alignment *realign_aln ( Alignment*A, Constraint_list *CL) { int a, b, c; int *ns, **ls; A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq); ns=vcalloc (2, sizeof(int)); ls=declare_int ( 2, A->nseq); for (a=0; a< A->nseq; a++) { ns[0]=A->nseq-1; for (c=0,b=0; bnseq; b++)if (b!=a)ls[0][c++]=b; ungap_sub_aln ( A, ns[0], ls[0]); ns[1]=1; ls[1][0]=a; ungap_sub_aln ( A, ns[1], ls[1]); A->score_aln=pair_wise (A, ns, ls,CL); } vfree (ns); free_int (ls, -1); return A; } Alignment *realign_aln_random_bipart ( Alignment*A, Constraint_list *CL) { int *ns; int **l_s; int a,g; ns=vcalloc (2, sizeof (int)); l_s=declare_int (2,A->nseq); for ( a=0; a< A->nseq; a++) { g=rand()%2; l_s[g][ns[g]++]=a; } fprintf ( stderr, "\n"); ungap_sub_aln ( A, ns[0], l_s[0]); ungap_sub_aln ( A, ns[1], l_s[1]); /* //Display Groups for (a=0;a<2; a++) for (b=0; bscore_aln=pair_wise (A, ns, l_s,CL); vfree(ns);free_int(l_s, -1); return A; } Alignment *realign_aln_random_bipart_n ( Alignment*A, Constraint_list *CL, int n) { int *ns; int **ls; int *used; int a,b,c, p; if (n>=A->nseq)n=A->nseq/2; used=vcalloc (A->nseq, sizeof (int)); c=0; while (cnseq; if (!used[p]){used[p]=1;c++;} } ns=vcalloc (2, sizeof (int)); ls=declare_int (2,A->nseq); for (a=0; a<2; a++) { for (b=0; bnseq; b++) if (used[b]==a)ls[a][ns[a]++]=b; } ungap_sub_aln ( A, ns[0], ls[0]); ungap_sub_aln ( A, ns[1], ls[1]); A->score_aln=pair_wise (A, ns, ls,CL); vfree(ns);free_int(ls, -1);vfree (used); return A; } int ** seq2ecl_mat (Constraint_list *CL); int ** seq2ecl_mat (Constraint_list *CL) { int a, b, n; Alignment *A; int *ns, **ls; int **dm; ns=vcalloc (2, sizeof (int)); ls=declare_int ((CL->S)->nseq, 2); A=seq2aln (CL->S,NULL, RM_GAP); n=(CL->S)->nseq; dm=declare_int (n, n); for (a=0; a<(CL->S)->nseq-1; a++) for (b=a+1; b<(CL->S)->nseq; b++) { ns[0]=ns[1]=1; ls[0][0]=a; ls[1][0]=b; ungap (A->seq_al[a]); ungap (A->seq_al[b]); dm[a][b]=dm[b][a]=linked_pair_wise (A, ns, ls, CL); } return dm; } Alignment *realign_aln_clust ( Alignment*A, Constraint_list *CL) { int *ns; int **ls; int a, b, c,n; static int **rm, **dm, **target; int score; if (!A) { free_int (dm, -1); free_int (rm, -1);free_int (target, -1); dm=rm=target=NULL; } if (!rm)rm=seq2ecl_mat(CL); if (!dm)dm=declare_int (A->nseq, A->nseq); if (!target)target=declare_int (A->nseq*A->nseq, 3); ns=vcalloc (2, sizeof (int)); ls=declare_int (2,A->nseq); for (a=0; anseq-1; a++) for (b=a+1; bnseq; b++) { ns[0]=2; ls[0][0]=a; ls[0][1]=b; score=sub_aln2ecl_raw_score (A, CL, ns[0], ls[0]); dm[a][b]=dm[b][a]=MAX(0,(rm[a][b]-score)); } for (n=0,a=0; anseq; a++) { for (b=a; bnseq; b++, n++) { target[n][0]=a; target[n][1]=b; for ( c=0; cnseq; c++) { if (c!=a && c!=b)target[n][2]+=dm[a][c]+dm[b][c]; } } } sort_int_inv (target,3, 2, 0, n-1); for (a=0; anseq; a++) { if (target[a][0]==target[a][1]) { ns[0]=1; ls[0][0]=target[a][0]; } else { ns[0]=2; ls[0][0]=target[a][0]; ls[0][1]=target[a][1]; } for (ns[1]=0,b=0; bnseq; b++) { if (b!=target[a][0] && b!=target[a][1])ls[1][ns[1]++]=b; } ungap_sub_aln (A, ns[0], ls[0]); ungap_sub_aln (A, ns[1], ls[1]); A->score_aln=pair_wise (A, ns, ls,CL); fprintf ( stderr, "\nSEQ: %d %d SCORE=%d\n",target[a][0],target[a][1], aln2ecl_raw_score(A, CL)); } return A; } int get_best_group ( int **used, Constraint_list *CL); int seq_aln_thr1(Alignment *A, int **used, int threshold, Constraint_list *CL); int seq_aln_thr2( Alignment*A, int **used, int threshold, int g, Constraint_list *CL); int get_best_group ( int **used, Constraint_list *CL) { int a,b,c,d,n, tot,stot, best_tot, best_seq, nseq; int ns[2]; int *ls[2]; best_seq=0; nseq=((CL->S)->nseq); tot=best_tot=0; for (a=0; aDM)->similarity_matrix[ls[0][c]][ls[1][d]]; } if (n>0)stot/=n; tot+=stot; } if (tot>best_tot) { best_tot=tot; best_seq=a; } } return best_seq; } char ** list_file2dpa_list_file (char **list, int *len,int maxnseq, Sequence *S) { char **nlist, **profile_list; int nl, l, a, np, has_lib; char *seq, *profile; Alignment *A, *F; nlist=declare_char (read_array_size_new ((void *)list), read_array_size_new ((void *)list[0])); nl=0; profile_list=declare_char (read_array_size_new ((void *)list), read_array_size_new ((void *)list[0])); np=0; l=len[0]; for (a=0; anseq; a++)fprintf ( fp, ">%s %s\n%s\n", A->name[a],A->seq_comment[a], A->seq_al[a]); vfclose (fp); sprintf (profile_list[np++], "%s", file); A=A->A; } free_aln (F); for (has_lib=0,a=0; alocal_stderr, "\n##### DPA ##### Compute Fast Alignment"); A=iterative_tree_aln (A,1, CL); fprintf (CL->local_stderr, "\n##### DPA ##### Identify Nodes"); P=make_root_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1); set_node_score (A, P, "idmat_sim"); fprintf (CL->local_stderr, "\n##### DPA ##### Split Nodes"); list=split_nodes_nseq (A,P,N, list=vcalloc (P->nseq*200, sizeof (char))); list2=string2list (list); fprintf (CL->local_stderr, "\n##### DPA ##### Save Nodes"); F=A; for (a=1; aA=main_read_aln(list2[a], NULL); A=A->A; } fprintf (CL->local_stderr, "\n##### DPA ##### Finished"); vfree (list); free_char (list2, -1); A=F; while (A) { A=A->A; } return F; } Alignment * seq_aln ( Alignment*A, int n,Constraint_list *CL) { int **used, a, t,n1, nseq; n1=nseq=(CL->S)->nseq; used=declare_int (nseq, nseq+3); for (a=0; a< nseq; a++) { used[a][1]=1; used[a][2]=a; } for (t=50; t>=0 && nseq>1; t-=5) { nseq=seq_aln_thr1 (A, used,t, CL); } vfree (used); return A; } int seq_aln_thrX(Alignment *A, int **used, int threshold, Constraint_list *CL) { int n=0,a; seq_aln_thr1(A,used,threshold,CL); for ( a=0; a< (CL->S)->nseq; a++) n+=(used[a][1]>0)?1:0; return n; } int seq_aln_thr1(Alignment *A, int **used, int threshold, Constraint_list *CL) { int a,g, nseq, n_groups; nseq=(CL->S)->nseq; g=get_best_group(used, CL); used[g][0]=1; while ( seq_aln_thr2 (A, used, threshold,g, CL)!=0) { g=get_best_group (used, CL); used[g][0]=1; } for (n_groups=0,a=0; a< nseq; a++) if ( used[a][1]!=0) { n_groups++; used[a][0]=0; } return n_groups; } int seq_aln_thr2( Alignment*A, int **used, int threshold, int g, Constraint_list *CL) { int a, b,c,d; int ns[2], *ls[2]; int nseq, n_members; double sim; n_members=0; nseq=((CL->S)->nseq); used[g][0]=1; ns[0]=used[g][1]; ls[0]=used[g]+2; for ( a=0; a< nseq; a++) { if (used[a][0]!=0); else { ns[1]=used[a][1]; ls[1]=used[a]+2; ungap_sub_aln (A, ns[0], ls[0]); ungap_sub_aln (A, ns[1], ls[1]); A->score_aln=pair_wise (A, ns, ls,CL); for (sim=0,b=0; bseq_al[ls[0][b]], A->seq_al[ls[1][c]], NULL,"idmat_sim2"); } } sim/=(double)(ns[0]*ns[1]); if (sim>=threshold) { used[g][1]+=ns[1]; for (d=0; d0)used[g][0]=-1; return n_members; } /****************************************************************************/ /* */ /* */ /* Alignment Methods */ /* */ /* */ /****************************************************************************/ Alignment * tsp_aln (Alignment *A, Constraint_list *CL, Sequence *S) { int a, b ; int ** distances; int *ns, **ls; int **used; A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq); ns=vcalloc (2, sizeof (int)); ls=declare_int (2, (CL->S)->nseq); used=declare_int ( A->nseq, 2); CL->DM=cl2distance_matrix (CL, NOALN,NULL,NULL,0); distances=declare_int (A->nseq+1, A->nseq+1); distances=duplicate_int ( (CL->DM)->score_similarity_matrix, -1, -1); for (a=0; a< A->nseq; a++) { used[a][0]=a; for (b=0; b< A->nseq; b++) { used[a][1]+=distances[a][b]; } } sort_int_inv (used,2,1,0,(CL->S)->nseq-1); ls[0][ns[0]++]=used[0][0]; ns[1]=1; for (a=1; a< S->nseq; a++) { fprintf ( stderr, "\n%s %d", (CL->S)->name[used[a][0]], used[a][1]); ls[1][0]=used[a][0]; pair_wise ( A,ns,ls, CL); ls[0][ns[0]++]=used[a][0]; } A->nseq=(CL->S)->nseq; return A; } Alignment *stack_progressive_aln(Alignment *A, Constraint_list *CL, int gop, int gep) { int a,n; int**group_list; int *n_groups; char dp_mode[100]; sprintf ( dp_mode, "%s", CL->dp_mode); sprintf (CL->dp_mode, "gotoh_pair_wise"); n_groups=vcalloc ( 2, sizeof (int)); group_list=declare_int ( 2, A->nseq); n=A->nseq; for ( a=0; aseq_al[a]); for ( a=1; a[%d]", a,(int)strlen ( A->seq_al[0])); } fprintf (stderr, "\n"); vfree(n_groups); free_int ( group_list, -1); sprintf (CL->dp_mode, "%s",dp_mode); return A; } Alignment *realign_aln_clust ( Alignment*A, Constraint_list *CL); Alignment *realign_aln_random_bipart_n ( Alignment*A, Constraint_list *CL, int n); Alignment *iterate_aln ( Alignment*A, int nit, Constraint_list *CL) { int it; int mode=1; int score, iscore, delta; fprintf ( CL->local_stderr, "Iterated Refinement: %d cycles START: score= %d\n", nit,iscore=aln2ecl_raw_score (A, CL) ); if ( nit==-1)nit=A->nseq*2; if ( A->len_aln==0)A=very_fast_aln (A, A->nseq, CL); A=reorder_aln (A,(CL->S)->name, A->nseq); for (it=0; it< nit; it++) { //CL->local_stderr=output_completion (CL->local_stderr,it, nit,1, ""); if (mode==0)A=realign_aln (A, CL); else if (mode ==1)A=realign_aln_random_bipart (A, CL); else if (mode ==2)A=realign_aln_clust (A, CL); else if (mode ==3)A=realign_aln_random_bipart_n (A, CL,2); score=aln2ecl_raw_score (A, CL); delta=iscore-score; fprintf (CL->local_stderr, "\n\tIteration Cycle: %d Score=%d Improvement= %d", it+1,score, delta); } fprintf ( CL->local_stderr, "\nIterated Refinement: Completed Improvement=%d\n", delta); return A; } int get_next_best (int seq, int nseq, int *used, int **dm); int get_next_best (int seq, int nseq, int *used, int **dm) { int a,set, d, bd, bseq; for (set=0,a=0; a< nseq; a++) { if (used[a] || seq==a)continue; d=dm[seq][a]; if (set==0 || d>bd) { bseq=a; bd=d; set=1; } } return bseq; } Alignment * full_sorted_aln (Alignment *A, Constraint_list *CL) { int a,b; A=sorted_aln_seq (0, A, CL); print_aln(A); for (a=1; anseq; a++) { A=A->A=copy_aln (A, NULL); for (b=0; bnseq; b++)ungap(A->seq_al[b]); A=sorted_aln_seq (a, A, CL); print_aln(A); } return A; } Alignment * sorted_aln (Alignment *A, Constraint_list *CL) { return sorted_aln_seq (-1, A, CL); } Alignment * sorted_aln_seq (int new_seq, Alignment *A, Constraint_list *CL) { int a, b=0, nseq; int *ns, **ls, **score, *used, **dm; int old_seq; dm=(CL->DM)->score_similarity_matrix; nseq=(CL->S)->nseq; score=declare_int (nseq, 3); used=vcalloc (nseq, sizeof (int)); ls=declare_int (2, nseq); ns=vcalloc (2, sizeof (int)); if ( new_seq==-1) { for (a=0; ascore_aln=pair_wise (A, ns, ls,CL); } return A; } Alignment * ungap_aln4tree (Alignment *A); Alignment * ungap_aln4tree (Alignment *A) { int t, n, max_sim, sim; Alignment *B; n=35; max_sim=60; t=A->len_aln/10; B=copy_aln (A, NULL); B=ungap_aln_n(B, n); return B; sim=aln2sim (B, "idmat"); while (B->len_alnmax_sim && n>0) { n-=10; B=copy_aln (A, B); B=ungap_aln_n(B, n); sim=aln2sim (B, "idmat"); } if ( B->len_alnmax_sim)B=copy_aln (A, B); return B; } Alignment * iterative_tree_aln (Alignment *A,int n, Constraint_list *CL) { NT_node **T=NULL; int a; T=make_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1); tree_aln ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL); for ( a=0; a< n; a++) { Alignment *B; B=copy_aln (A, NULL); B=ungap_aln_n (B, 20); sprintf ( CL->distance_matrix_mode, "aln"); CL->DM=cl2distance_matrix ( CL,B,NULL,NULL, 1); free_aln (B); degap_aln (A); T=make_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1); tree_aln ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL); } return A; } Alignment *profile_aln (Alignment *A, Constraint_list *CL) { int a,nseq,nseq2; int **ls, *ns; nseq=A->nseq; nseq2=2*nseq; ls=declare_int (2, nseq2); ns=vcalloc (2, sizeof (int)); A=realloc_aln2(A,nseq2, A->len_aln); for (a=0; a< nseq; a++) ls[0][ns[0]++]=a; for ( a=0; aseq_al[a+nseq], "%s", (CL->S)->seq[a]); sprintf (A->name[a+nseq], "%s", (CL->S)->name[a]); A->order[a+nseq][0]=a; } ns[1]=1; for (a=0; ascore_aln=pair_wise (A, ns, ls,CL); ls[0][ns[0]++]=a+nseq; } for (a=0; a< nseq; a++) { sprintf (A->seq_al[a], "%s", A->seq_al[a+nseq]); } A->nseq=nseq; return A; } Alignment * iterative_aln ( Alignment*A, int n,Constraint_list *CL) { int *ns,**ls, **score, **dm; int a,b, nseq, max; ls=declare_int (2, A->nseq); ns=vcalloc (2, sizeof (int)); ls[0][ns[0]++]=0; nseq=(CL->S)->nseq; score=declare_int (nseq,2); dm=(CL->DM)->score_similarity_matrix; for (a=0; aS)->name[score[a][0]],A->name[score[a][0]], score[a][1]); ns[1]=1; ls[1][0]=score[a][0]; ungap_sub_aln ( A, ns[0], ls[0]); ungap_sub_aln ( A, ns[1], ls[1]); A->score_aln=pair_wise (A, ns, ls,CL); ls[0][ns[0]++]=a; } return A; } Alignment *simple_progressive_aln (Sequence *S, NT_node **T, Constraint_list *CL, char *mat) { int a; Alignment *A; A=seq2aln (S, NULL, RM_GAP); if ( !CL) { CL=declare_constraint_list (S, NULL, NULL, 0, NULL, NULL); sprintf ( CL->dp_mode, "myers_miller_pair_wise"); sprintf ( CL->tree_mode, "nj"); sprintf ( CL->distance_matrix_mode, "idscore"); CL=choose_extension_mode ("matrix", CL); CL->gop=-10; CL->gep=-1; if (mat)CL->M=read_matrice (mat); CL->pw_parameters_set=1; CL->local_stderr=stderr; } if ( !T)T=make_tree (A, CL, CL->gop, CL->gep,S, NULL,MAXIMISE); for ( a=0; a< A->nseq; a++)ungap (A->seq_al[a]); tree_aln ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL); A=reorder_aln ( A,A->tree_order,A->nseq); return A; } Alignment *very_fast_aln ( Alignment*A, int nseq, Constraint_list *CL) { char command[10000]; char *tmp_seq; char *tmp_aln; FILE *fp; if ( CL && CL->local_stderr)fp=CL->local_stderr; else fp=stderr; fprintf (fp, "\n[Computation of an Approximate MSA..."); tmp_seq= vtmpnam (NULL); tmp_aln= vtmpnam (NULL); output_fasta_seq ((tmp_seq=vtmpnam (NULL)), A); sprintf ( command, "t_coffee -infile=%s -special_mode quickaln -outfile=%s %s -outorder=input", tmp_seq, tmp_aln, TO_NULL_DEVICE); my_system ( command); A->nseq=0; A=main_read_aln (tmp_aln,A); fprintf (fp, "]\n"); return A; } static NT_node* SNL; NT_node* tree_aln ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL) { if ( strm ((CL->TC)->use_seqan, "NO"))return local_tree_aln (LT, RT, A, nseq, CL); else return seqan_tree_aln (LT, RT, A, nseq, CL); } NT_node* seqan_tree_aln ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL) { Alignment *B; char *tree, *lib, *seq, *new_aln; //Output tree tree=vtmpnam (NULL); print_newick_tree (LT->parent, tree); //Output seq main_output_fasta_seq (seq=vtmpnam (NULL),B=seq2aln (CL->S,NULL,RM_GAP), NO_HEADER); free_aln (B); //Output lib new_aln=vtmpnam (NULL); vfclose (save_constraint_list ( CL, 0, CL->ne,lib=vtmpnam(NULL), NULL, "ascii",CL->S)); fprintf (CL->local_stderr, "\n********* USE EXTERNAL ALIGNER: START:\n\tCOMMAND: %s -lib %s -seq %s -usetree %s -outfile %s\n", (CL->TC)->use_seqan,lib, seq, tree, new_aln); printf_system ( "%s -lib %s -seq %s -usetree %s -outfile %s", (CL->TC)->use_seqan,lib, seq, tree, new_aln); fprintf (CL->local_stderr, "\n********* USE EXTERNAL ALIGNER: END\n"); main_read_aln (new_aln, A); return tree2ao (LT,RT, A, A->nseq, CL); } NT_node rec_local_tree_aln ( NT_node P, Alignment*A, Constraint_list *CL, int print); NT_node* local_tree_aln ( NT_node l, NT_node r, Alignment*A,int nseq, Constraint_list *CL) { int a; NT_node P, *NL; int **min=NULL; if (!r && !l) return NULL; else if (!r)P=l; else if (!l)P=r; else P=r->parent; fprintf ( CL->local_stderr, "\nPROGRESSIVE_ALIGNMENT [Tree Based]\n"); //1: make sure the Alignment and the Sequences are labeled the same way if (CL->translation)vfree (CL->translation); CL->translation=vcalloc ( (CL->S)->nseq, sizeof (int)); for ( a=0; a< (CL->S)->nseq; a++) CL->translation[a]=name_is_in_list ( (CL->S)->name[a], (CL->S)->name, (CL->S)->nseq, MAXNAMES); A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq); A->nseq=(CL->S)->nseq; //2 Make sure the tree is in the same order recode_tree (P, (CL->S)); index_tree_node(P); if ( get_nproc()>1 && strstr (CL->multi_thread, "msa")) { int max_fork; max_fork=get_nproc()/2;//number of nodes forked, one node =>two jobs tree2nnode (P); NL=tree2node_list (P, NULL); min=declare_int (P->node+1,3); for (a=0; a<=P->node; a++) { NT_node N; N=NL[a]; min[a][0]=a; if (!N); else if (N && N->nseq==1)min[a][1]=0; else { min[a][1]=MIN(((N->left)->nseq),((N->right)->nseq))*A->nseq+MAX(((N->left)->nseq),((N->right)->nseq));//sort on min and break ties on max min[a][2]=MIN(((N->left)->nseq),((N->right)->nseq)); } } sort_int_inv (min,3, 1, 0, P->node); for (a=0; a<=P->node && a1)(NL[min[a][0]])->fork=1; } } free_int (min, -1); rec_local_tree_aln (P, A,CL, 1); for (a=0; anseq; a++)sprintf (A->tree_order[a], "%s", (CL->S)->name[P->lseq[a]]); A->len_aln=strlen (A->seq_al[0]); fprintf ( CL->local_stderr, "\n\n"); return NULL; } NT_node rec_local_tree_aln ( NT_node P, Alignment*A, Constraint_list *CL,int print) { NT_node R,L; int score; if (!P || P->nseq==1) return NULL; R=P->right;L=P->left; if (P->fork ) { int s, pid1, pid2; char *tmp1, *tmp2; tmp1=vtmpnam (NULL); tmp2=vtmpnam (NULL); pid1=vfork(); if (pid1==0) { if (print==1) if (L->nseq>R->nseq)print=1; initiate_vtmpnam (NULL); rec_local_tree_aln (L, A, CL, print); dump_msa (tmp1,A, L->nseq, L->lseq); exit (EXIT_SUCCESS); } else { pid2=vfork(); if (pid2==0) { if (print==1) if (L->nseq>R->nseq)print=0; initiate_vtmpnam (NULL); rec_local_tree_aln (R, A, CL, print); dump_msa (tmp2, A, R->nseq, R->lseq); exit (EXIT_SUCCESS); } } vwaitpid (pid1, &s, 0); vwaitpid (pid2, &s, 0); undump_msa (A,tmp1); undump_msa (A,tmp2); } else { rec_local_tree_aln (L, A, CL, print); rec_local_tree_aln (R, A, CL, print); } P->score=A->score_aln=score=profile_pair_wise (A,L->nseq, L->lseq,R->nseq,R->lseq,CL); A->len_aln=strlen (A->seq_al[P->lseq[0]]); score=node2sub_aln_score (A, CL, CL->evaluate_mode,P); if (print)fprintf(CL->local_stderr, "\n\tGroup %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->[Score=%4d][Len=%5d][PID:%d]%s",P->index,R->index,R->nseq,L->index,L->nseq,score, A->len_aln,getpid(),(P->fork==1)?"[Forked]":"" ); return P; } NT_node* tree2ao ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL) { int *n_s; int ** l_s; int a, b; static int n_groups_done, do_split=0; int nseq2align=0; int *translation; NT_node P=NULL; if (n_groups_done==0) { if (SNL)vfree(SNL); SNL=vcalloc ( (CL->S)->nseq, sizeof (NT_node)); if (CL->translation)vfree(CL->translation); CL->translation=vcalloc ( (CL->S)->nseq, sizeof (int)); for ( a=0; a< (CL->S)->nseq; a++) CL->translation[a]=name_is_in_list ( (CL->S)->name[a], (CL->S)->name, (CL->S)->nseq, MAXNAMES); n_groups_done=(CL->S)->nseq; A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq); A->nseq=nseq; } translation=CL->translation; n_s=vcalloc (2, sizeof ( int)); l_s=declare_int ( 2, nseq); if ( RT->parent !=LT->parent)fprintf ( stderr, "Tree Pb [FATAL:%s]", PROGRAM); else P=RT->parent; if ( LT->leaf==1 && RT->leaf==0) tree2ao ( RT->left, RT->right,A, nseq,CL); else if ( RT->leaf==1 && LT->leaf==0) tree2ao ( LT->left, LT->right,A,nseq,CL); else if (RT->leaf==0 && LT->leaf==0) { tree2ao ( LT->left, LT->right,A,nseq,CL); tree2ao ( RT->left, RT->right,A,nseq,CL); } if ( LT->leaf==1 && RT->leaf==1) { /*1 Identify the two groups of sequences to align*/ nseq2align=LT->nseq+RT->nseq; n_s[0]=LT->nseq; for ( a=0; a< LT->nseq; a++)l_s[0][a]=translation[LT->lseq[a]]; if ( LT->nseq==1)LT->group=l_s[0][0]; n_s[1]=RT->nseq; for ( a=0; a< RT->nseq; a++)l_s[1][a]=translation[RT->lseq[a]]; if ( RT->nseq==1)RT->group=l_s[1][0]; P->group=n_groups_done++; if (nseq2align==nseq) { for (b=0, a=0; a< n_s[0]; a++, b++)sprintf ( A->tree_order[b],"%s", (CL->S)->name[l_s[0][a]]); for (a=0; a< n_s[1] ; a++, b++)sprintf ( A->tree_order[b], "%s",(CL->S)->name[l_s[1][a]]); n_groups_done=0; } } if (P->parent)P->leaf=1; if ( LT->isseq==0)LT->leaf=0; if ( RT->isseq==0)RT->leaf=0; if (RT->isseq){SNL[translation[RT->lseq[0]]]=RT;RT->score=100;} if (LT->isseq){SNL[translation[LT->lseq[0]]]=LT;LT->score=100;} do_split=split_condition (nseq2align,A->score_aln,CL); if (CL->split && do_split) { for (a=0; a< P->nseq; a++)SNL[CL->translation[P->lseq[a]]]=NULL; SNL[CL->translation[RT->lseq[0]]]=P; } vfree ( n_s); free_int ( l_s, 2); return SNL; } NT_node* tree_realn ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL) { int *n_s; int ** l_s; int a, b; int score; static int n_groups_done; int nseq2align=0; int *translation; NT_node P=NULL; if (n_groups_done==0) { if (SNL)vfree(SNL); SNL=vcalloc ( (CL->S)->nseq, sizeof (NT_node)); if (CL->translation)vfree(CL->translation); CL->translation=vcalloc ( (CL->S)->nseq, sizeof (int)); for ( a=0; a< (CL->S)->nseq; a++) CL->translation[a]=name_is_in_list ( (CL->S)->name[a], (CL->S)->name, (CL->S)->nseq, MAXNAMES); if (nseq>2)fprintf ( CL->local_stderr, "\nPROGRESSIVE_ALIGNMENT [Tree Based]\n"); else fprintf ( CL->local_stderr, "\nPAIRWISE_ALIGNMENT [No Tree]\n"); n_groups_done=(CL->S)->nseq; A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq); A->nseq=nseq; } translation=CL->translation; n_s=vcalloc (2, sizeof ( int)); l_s=declare_int ( 2, nseq); if ( nseq==2) { n_s[0]=n_s[1]=1; l_s[0][0]=name_is_in_list ((CL->S)->name[0],(CL->S)->name, (CL->S)->nseq, MAXNAMES); l_s[1][0]=name_is_in_list ((CL->S)->name[1],(CL->S)->name, (CL->S)->nseq, MAXNAMES); A->score_aln=score=pair_wise (A, n_s, l_s,CL); vfree ( n_s); free_int ( l_s, 2); return SNL; } else { if ( RT->parent !=LT->parent)fprintf ( stderr, "Tree Pb [FATAL:%s]", PROGRAM); else P=RT->parent; if ( LT->leaf==1 && RT->leaf==0) tree_realn ( RT->left, RT->right,A, nseq,CL); else if ( RT->leaf==1 && LT->leaf==0) tree_realn ( LT->left, LT->right,A,nseq,CL); else if (RT->leaf==0 && LT->leaf==0) { tree_realn ( LT->left, LT->right,A,nseq,CL); tree_realn ( RT->left, RT->right,A,nseq,CL); } if ( LT->leaf==1 && RT->leaf==1 && (RT->nseq+LT->nseq)nseq; a++) { s=translation[LT->lseq[a]]; list[s]=1; } for (a=0; anseq; a++) { s=translation[RT->lseq[a]]; list[s]=1; } for (a=0; ascore=A->score_aln=score=pair_wise (A, n_s, l_s,CL); id2=sub_aln2sim (A, n_s, l_s, "idmat_sim"); if (nseq2align==nseq) { for (b=0, a=0; a< n_s[0]; a++, b++)sprintf ( A->tree_order[b],"%s", (CL->S)->name[l_s[0][a]]); for (a=0; a< n_s[1] ; a++, b++)sprintf ( A->tree_order[b], "%s",(CL->S)->name[l_s[1][a]]); n_groups_done=0; } } if (P->parent)P->leaf=1; //Recycle the tree if ( LT->isseq==0)LT->leaf=0; if ( RT->isseq==0)RT->leaf=0; if (RT->isseq){SNL[translation[RT->lseq[0]]]=RT;RT->score=100;} if (LT->isseq){SNL[translation[LT->lseq[0]]]=LT;LT->score=100;} vfree ( n_s); free_int ( l_s, 2); return SNL; } } Alignment* profile_tree_aln ( NT_node P,Alignment*A,Constraint_list *CL, int threshold) { int *ns, **ls, a, sim; NT_node LT, RT, D, UD; Alignment *F; static NT_node R; static int n_groups_done; //first pass //Sequences must be in the same order as the tree sequences if (!P->parent) { R=P; n_groups_done=P->nseq+1; } LT=P->left; RT=P->right; if (LT->leaf==0)A=delayed_tree_aln1 (LT, A,CL, threshold); if (RT->leaf==0)A=delayed_tree_aln1 (RT, A,CL, threshold); ns=vcalloc (2, sizeof (int)); ls=declare_int ( 2,R->nseq); if ( LT->nseq==1) { ls[0][ns[0]++]=LT->lseq[0]; LT->group=ls[0][0]+1; } else node2seq_list (LT,&ns[0], ls[0]); if ( RT->nseq==1) { ls[1][ns[1]++]=RT->lseq[0]; RT->group=ls[1][0]+1; } else node2seq_list (RT,&ns[1], ls[1]); P->group=++n_groups_done; fprintf (CL->local_stderr, "\n\tGroup %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->",P->group,RT->group, ns[1],LT->group, ns[0]); P->score=A->score_aln=pair_wise (A, ns, ls,CL); sim=sub_aln2sim(A, ns, ls, "idmat_sim1"); if ( simaligned=1; D->aligned=0; fprintf (CL->local_stderr, "[Delayed (Sim=%4d). Kept Group %4d]",sim,UD->group); ungap_sub_aln (A, ns[0],ls[0]); ungap_sub_aln (A, ns[1],ls[1]); A->nseq=MAX(ns[0],ns[1]); F=A; while (F->A)F=F->A; F->A=main_read_aln (output_fasta_sub_aln (NULL, A, ns[(D==LT)?0:1], ls[(D==LT)?0:1]), NULL); if ( P==R) { F=F->A; F->A=main_read_aln (output_fasta_sub_aln (NULL, A, ns[(D==LT)?1:0], ls[(D==LT)?1:0]), NULL); } if (F->A==NULL) { printf_exit (EXIT_FAILURE, stderr, "\nError: Empty group"); } } else { LT->aligned=1; RT->aligned=1; fprintf (CL->local_stderr, "[Score=%4d][Len=%5d]",sub_aln2sub_aln_score (A, CL, CL->evaluate_mode,ns, ls), strlen ( A->seq_al[ls[0][0]])); A->nseq=ns[0]+ns[1]; if (P==R) { F=A; while (F->A)F=F->A; F->A=main_read_aln (output_fasta_sub_aln2 (NULL, A, ns, ls), NULL); } } P->nseq=0; for (a=0; anseq;a++)P->lseq[P->nseq++]=LT->lseq[a]; for (a=0; anseq;a++)P->lseq[P->nseq++]=RT->lseq[a]; P->aligned=1; vfree ( ns); free_int ( ls,-1); return A; }//////////////////////////////////////////////////////////////////////////////////////// // // Frame Tree Aln // //////////////////////////////////////////////////////////////////////////////////////// //Alignment *frame_tree_aln (Alignment *A, Constraint_list *CL) //{ //////////////////////////////////////////////////////////////////////////////////////// // // Delayed Tree Aln // //////////////////////////////////////////////////////////////////////////////////////// int delayed_pair_wise (Alignment *A, int *ns, int **ls,Constraint_list *CL); NT_node* delayed_tree_aln_mode1 ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL); NT_node* delayed_tree_aln_mode2 ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL); int paint_nodes2aligned ( NT_node P,char **list, int n); int reset_visited_nodes ( NT_node P); int reset_visited_nodes2 ( NT_node P); Alignment * make_delayed_tree_aln (Alignment *A,int n, Constraint_list *CL) { NT_node **T=NULL; int a; T=make_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1); delayed_tree_aln_mode1 ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL); for ( a=0; a< n; a++) { sprintf ( CL->distance_matrix_mode, "aln"); CL->DM=cl2distance_matrix ( CL,A,NULL,NULL, 1); degap_aln (A); T=make_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1); delayed_tree_aln_mode1 ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL); } return A; } NT_node* delayed_tree_aln_mode1 ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL) { NT_node P; P=LT->parent;P->nseq=nseq; paint_nodes2aligned (P, NULL, 0); A=delayed_tree_aln1 (P, A, CL,50); A=delayed_tree_aln2 (P, A, CL, 0); return NULL; } NT_node* delayed_tree_aln_mode2 ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL) { NT_node P; int thr=50; P=LT->parent;P->nseq=nseq; A=delayed_tree_aln1 (P, A, CL,thr); thr-=10; while (thr>=0) { A=delayed_tree_aln2 (P, A, CL, thr); thr-=10; } return NULL; } Alignment* delayed_tree_aln1 ( NT_node P,Alignment*A,Constraint_list *CL, int threshold) { int *ns, **ls, a, sim; NT_node LT, RT, D, UD; static NT_node R; static int n_groups_done; //first pass //Sequences must be in the same order as the tree sequences if (!P->parent) { R=P; n_groups_done=P->nseq+1; } LT=P->left; RT=P->right; if (LT->leaf==0)A=delayed_tree_aln1 (LT, A,CL, threshold); if (RT->leaf==0)A=delayed_tree_aln1 (RT, A,CL, threshold); ns=vcalloc (2, sizeof (int)); ls=declare_int ( 2,R->nseq); node2seq_list (LT,&ns[0], ls[0]); if ( LT->nseq==1)LT->group=LT->lseq[0]+1; node2seq_list (RT,&ns[1], ls[1]); if ( RT->nseq==1)RT->group=RT->lseq[0]+1; P->group=++n_groups_done; if ( ns[0]==0 || ns[1]==0) { fprintf (CL->local_stderr, "\n\tF-Group %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->Skipped",P->group,RT->group, ns[1],LT->group, ns[0]); LT->aligned=(ns[0]==0)?0:1; RT->aligned=(ns[1]==0)?0:1; } else { fprintf (CL->local_stderr, "\n\tF-Group %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->",P->group,RT->group, ns[1],LT->group, ns[0]); P->score=A->score_aln=pair_wise (A, ns, ls,CL); sim=sub_aln2max_sim(A, ns, ls, "idmat_sim1"); if ( simaligned=1; D->aligned=0; fprintf (CL->local_stderr, "[Delayed (Sim=%4d). Kept Group %4d]",sim,UD->group); ungap_sub_aln (A, ns[0],ls[0]); ungap_sub_aln (A, ns[1],ls[1]); A->nseq=MAX(ns[0],ns[1]); } else { LT->aligned=1; RT->aligned=1; fprintf (CL->local_stderr, "[Score=%4d][Len=%5d]",sub_aln2sub_aln_score (A, CL, CL->evaluate_mode,ns, ls), strlen ( A->seq_al[ls[0][0]])); A->nseq=ns[0]+ns[1]; } P->nseq=0; for (a=0; anseq;a++)P->lseq[P->nseq++]=LT->lseq[a]; for (a=0; anseq;a++)P->lseq[P->nseq++]=RT->lseq[a]; P->aligned=1; } vfree ( ns); free_int ( ls,-1); return A; } Alignment* delayed_tree_aln2 ( NT_node P,Alignment*A,Constraint_list *CL, int thr) { NT_node LT, RT, D; static NT_node R; LT=P->left; RT=P->right; if (!P->parent) { R=P; fprintf (CL->local_stderr, "\n"); } if (!LT->aligned && !RT->aligned) { printf_exit (EXIT_FAILURE, stderr, "ERROR: Unresolved Node On Groups %d [FATAL:%s]\n", P->group,PROGRAM); } else if (!LT->aligned || !RT->aligned) { int *ns, **ls, sim; ns=vcalloc (2, sizeof (int)); ls=declare_int (2, R->nseq); node2seq_list (R,&ns[0], ls[0]); D=(!LT->aligned)?LT:RT; D->aligned=1; node2seq_list (D,&ns[1], ls[1]); fprintf (CL->local_stderr, "\tS-Delayed Group %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->",P->group,D->group, ns[1],R->group, ns[0]); P->score=A->score_aln=pair_wise (A, ns, ls,CL); sim=sub_aln2max_sim(A, ns, ls, "idmat_sim1"); if (simlocal_stderr, " [Further Delayed]\n"); ungap_sub_aln (A, ns[0],ls[0]); ungap_sub_aln (A, ns[1],ls[1]); D->aligned=0; } else { fprintf (CL->local_stderr, "[Score=%4d][Len=%5d][thr=%d]\n",sub_aln2sub_aln_score (A, CL, CL->evaluate_mode,ns, ls), (int)strlen ( A->seq_al[ls[0][0]]), thr); D->aligned=1; } vfree (ns);free_int (ls, -1); } else { ; } if (LT->leaf==0)A=delayed_tree_aln2 (LT, A,CL, thr); if (RT->leaf==0)A=delayed_tree_aln2 (RT, A,CL, thr); return A; } int delayed_pair_wise (Alignment *A, int *ns, int **ls,Constraint_list *CL) { int s,s1, s2, a, b; int **sim; pair_wise (A, ns, ls, CL); sim=fast_aln2sim_list (A, "sim3", ns, ls); sort_int_inv ( sim,3, 2,0, ns[0]*ns[1]-1); for (a=0; a< 2; a++) for ( b=0; b< ns[a]; b++) A->order[ls[a][b]][4]=-1; for (a=0; a< 10 && sim[a][0]!=-1; a++) { s1=sim[a][0]; s2=sim[a][1]; A->order[s1][4]=0; A->order[s2][4]=0; } ungap_sub_aln (A, ns[0],ls[0]); ungap_sub_aln (A, ns[1],ls[1]); s=pair_wise (A, ns, ls, CL); for (a=0; a< 2; a++) for ( b=0; b< ns[a]; b++) A->order[ls[a][b]][4]=0; free_int (sim, -1); return s; } int node2seq_list2 (NT_node P, int *ns, int *ls) { if ( !P || P->visited ) return ns[0]; else P->visited=1; if ( P->isseq) { ls[ns[0]++]=P->lseq[0]; } if (P->left && (P->left) ->aligned)node2seq_list2 (P->left, ns,ls); if (P->right && (P->right)->aligned)node2seq_list2 (P->right,ns,ls); if (P->aligned && P->parent)node2seq_list2 (P->parent,ns,ls); return ns[0]; } int node2seq_list (NT_node P, int *ns, int *ls) { if ( P->isseq && P->aligned) { ls[ns[0]++]=P->lseq[0]; } else { if (P->left && (P->left) ->aligned)node2seq_list (P->left, ns,ls); if (P->right && (P->right)->aligned)node2seq_list (P->right,ns,ls); } return ns[0]; } int paint_nodes2aligned ( NT_node P,char **list, int n) { int r=0; if ( P->leaf) { if ( list==NULL) P->aligned=1; else if ( name_is_in_list ( P->name, list, n, 100)!=-1) P->aligned=1; else P->aligned=0; return P->aligned; } else { r+=paint_nodes2aligned (P->left, list, n); r+=paint_nodes2aligned (P->right, list, n); } return r; } int reset_visited_nodes ( NT_node P) { while (P->parent)P=P->parent; return reset_visited_nodes2 (P); } int reset_visited_nodes2 ( NT_node P) { int r=0; if (P->left)r+=reset_visited_nodes2(P->left); if (P->right)r+=reset_visited_nodes2(P->right); r+=P->visited; P->visited=0; return r; } //////////////////////////////////////////////////////////////////////////////////////// // // DPA_MSA // //////////////////////////////////////////////////////////////////////////////////////// Alignment* dpa_msa2 ( NT_node P,Alignment*A,Constraint_list *CL); Alignment *dpa_align_node (NT_node P,Alignment*A,Constraint_list *CL); char *node2profile_list (NT_node P,Alignment*A,Constraint_list *CL, char *list); char * output_node_aln (NT_node P, Alignment *A, char *name); int node2nleaf ( NT_node P); Alignment* dpa_aln (Alignment*A,Constraint_list *CL) { NT_node P; A=iterative_tree_aln (A,1, CL); P=make_root_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1); degap_aln (A); while (!P->leaf) A=dpa_msa2(P, A, CL); return A; } int node2nleaf ( NT_node P) { int n=0; if ( P->leaf) return 1; else { n+=node2nleaf ( P->left); n+=node2nleaf ( P->right); } return n; } Alignment* dpa_msa2 ( NT_node P,Alignment*A,Constraint_list *CL) { int maxnseq=20; int n, n_l, n_r; n=node2nleaf (P); if ( n>maxnseq) { n_l=node2nleaf (P->left); n_r=node2nleaf (P->right); if (n_l>n_r) { return dpa_msa2 (P->left, A, CL); } else { return dpa_msa2 (P->right, A, CL); } } A=dpa_align_node (P, A, CL); P->leaf=1; return A; } Alignment *dpa_align_node (NT_node P,Alignment*A,Constraint_list *CL) { char *list, *tmp_aln; int a, b; Alignment *B; list=vcalloc ( P->nseq*100, sizeof (char)); list=node2profile_list (P,A, CL, list); printf_system ( "t_coffee -profile %s -outfile=%s -dp_mode gotoh_pair_wise_lgp -msa_mode iterative_tree_aln -quiet", list,tmp_aln=vtmpnam (NULL)); B=main_read_aln (tmp_aln, NULL); A=realloc_aln (A, B->len_aln+1); for ( a=0; a< B->nseq; a++) if ( (b=name_is_in_list (B->name[a], A->name, A->nseq, 100))!=-1) sprintf (A->seq_al[b], "%s", B->seq_al[a]); A->len_aln=B->len_aln; free_aln (B); vfree (list); return A; } char *node2profile_list (NT_node P,Alignment*A,Constraint_list *CL, char *list) { if (!P->leaf) { list=node2profile_list (P->left, A, CL, list); list=node2profile_list (P->right, A, CL, list); } else { list=strcatf (list," %s", output_node_aln (P, A, NULL)); if ( !P->isseq)P->leaf=0; } return list; } char * output_node_aln (NT_node P, Alignment *A, char *name) { FILE *fp; int a; if (name==NULL) name=vtmpnam (NULL); fp=vfopen (name, "w"); for (a=0; a< P->nseq; a++) fprintf ( fp, ">%s\n%s", A->name[P->lseq[a]], A->seq_al[P->lseq[a]]); vfclose (fp); return name; } //////////////////////////////////////////////////////////////////////////////////////// // // NEW_DPA_MSA // //////////////////////////////////////////////////////////////////////////////////////// Alignment * new_dpa_aln (Alignment *A,Constraint_list *CL) { NT_node P; char *tmp_aln; char *list; A=make_delayed_tree_aln (A,1, CL); P=make_root_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1); set_node_score (A, P, "idmat_sim"); list=split_nodes_nseq (A,P,15, list=vcalloc (P->nseq*200, sizeof (char))); printf_system ( "t_coffee -profile %s -outfile=%s -dp_mode gotoh_pair_wise_lgp -msa_mode iterative_tree_aln", list,tmp_aln=vtmpnam (NULL)); return main_read_aln (tmp_aln, NULL); } char *split_nodes_nseq (Alignment *A, NT_node P, int nseq, char *list) { int a,n; n=P->nseq; a=100; while ( n>=nseq) { a--; n=count_threshold_nodes (A, P, a); } return split_nodes_idmax (A, P, a,list); } char *split_nodes_idmax (Alignment *A, NT_node P, int t, char *list) { if (P->isseq || P->score>=t) { list=strcatf (list," %s", output_node_aln (P, A, NULL)); } else if ( P->scoreleft,t, list); list=split_nodes_idmax (A, P->right,t,list); } return list; } int count_threshold_nodes (Alignment *A, NT_node P, int t) { int s=0; if (P->isseq || P->score>=t) { s=1; } else if ( P->scoreleft,t); s+=count_threshold_nodes (A, P->right,t); } return s; } int set_node_score (Alignment *A, NT_node P, char *mode) { int a; int ns[2], *ls[2]; if (P->isseq) return 0; for (a=0; a<2; a++) { NT_node N; N=(a==0)?P->left:P->right; ns[a]=N->nseq; ls[a]=N->lseq; } P->score=sub_aln2max_sim(A, ns, ls,mode); set_node_score (A,P->left, mode); set_node_score (A,P->right, mode); return 1; } ///////////////////////////////////////////////////////////////////////////////////////// int split_condition (int nseq, int score, Constraint_list *CL) { int cond1=1, cond2=1; if ( CL->split_nseq_thres)cond1 =(nseq<=CL->split_nseq_thres)?1:0; if ( CL->split_score_thres)cond2=(score>=CL->split_score_thres)?1:0; return (cond1 && cond2); } int profile_pair_wise (Alignment *A, int n1, int *l1, int n2, int *l2, Constraint_list *CL) { static int *ns; static int **ls; if (!ns) { ns=vcalloc (2, sizeof (int)); ls=vcalloc (2, sizeof (int*)); } ns[0]=n1; ls[0]=l1; ns[1]=n2; ls[1]=l2; return pair_wise (A, ns, ls, CL); } int pair_wise (Alignment *A, int*ns, int **l_s,Constraint_list *CL ) { /* CL->maximise CL->gop; CL->gep CL->TG_MODE; */ int score; int glocal; Pwfunc function; /*Make sure evaluation functions update their cache if needed*/ A=update_aln_random_tag (A); if (! CL->pw_parameters_set) { fprintf ( stderr, "\nERROR pw_parameters_set must be set in pair_wise [FATAL]\n" );crash(""); } function=get_pair_wise_function(CL->pair_wise, CL->dp_mode,&glocal); if ( CL->get_dp_cost==NULL)CL->get_dp_cost=get_dp_cost; if (strlen ( A->seq_al[l_s[0][0]])==0 || strlen ( A->seq_al[l_s[1][0]])==0) score=empty_pair_wise ( A, ns, l_s, CL, glocal); else score=function ( A, ns, l_s, CL); return score; } int empty_pair_wise ( Alignment *A, int *ns, int **l_s, Constraint_list *CL, int glocal) { int n=0, a, b; int *l=NULL; char *string; int l0, l1, len; if ( glocal==GLOBAL) { l0=strlen (A->seq_al[l_s[0][0]]); l1=strlen (A->seq_al[l_s[1][0]]); len=MAX(l1,l0); if ( len==0)return 0; else if (l0>l1){n=ns[1];l=l_s[1];} else if (l0seq_al[l[a]], "%s", string); A->score=A->score_aln=0; A->len_aln=len; vfree ( string); return 0; } else if ( glocal==LALIGN) { A->A=declare_aln (A->S); (A->A)->len_aln=0; for ( a=0; a< 2; a++) for ( b=0; bseq_al[l_s[a][b]][0]='\0'; (A->A)->score_aln=(A->A)->score=0; return 0; } else return 0; } Pwfunc get_pair_wise_function (Pwfunc pw,char *dp_mode, int *glocal) { /*Returns a function and a mode (Glogal, Local...)*/ int a; static int npw; static Pwfunc *pwl; static char **dpl; static int *dps; /*The first time: initialize the list of pairwse functions*/ if ( npw==0) { pwl=vcalloc ( 100, sizeof (Pwfunc)); dpl=declare_char (100, 100); dps=vcalloc ( 100, sizeof (int)); pwl[npw]=fasta_cdna_pair_wise; sprintf (dpl[npw], "fasta_cdna_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=cfasta_cdna_pair_wise; sprintf (dpl[npw], "cfasta_cdna_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=idscore_pair_wise; sprintf (dpl[npw], "idscore_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=gotoh_pair_wise; sprintf (dpl[npw], "gotoh_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=gotoh_pair_wise_lgp; sprintf (dpl[npw], "gotoh_pair_wise_lgp"); dps[npw]=GLOBAL; npw++; pwl[npw]=gotoh_pair_wise_lgp_sticky; sprintf (dpl[npw], "gotoh_pair_wise_lgp_sticky"); dps[npw]=GLOBAL; npw++; pwl[npw]=proba_pair_wise; sprintf (dpl[npw], "proba_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=biphasic_pair_wise; sprintf (dpl[npw], "biphasic_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=subop1_pair_wise; sprintf (dpl[npw], "subop1_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=subop2_pair_wise; sprintf (dpl[npw], "subop2_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=myers_miller_pair_wise; sprintf (dpl[npw], "myers_miller_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=test_pair_wise; sprintf (dpl[npw], "test_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=fasta_gotoh_pair_wise; sprintf (dpl[npw], "fasta_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=cfasta_gotoh_pair_wise; sprintf (dpl[npw], "cfasta_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=very_fast_gotoh_pair_wise; sprintf (dpl[npw], "very_fast_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=gotoh_pair_wise_sw; sprintf (dpl[npw], "gotoh_pair_wise_sw"); dps[npw]=LOCAL; npw++; pwl[npw]=cfasta_gotoh_pair_wise_sw; sprintf (dpl[npw], "cfasta_sw_pair_wise"); dps[npw]=LOCAL; npw++; pwl[npw]=gotoh_pair_wise_lalign; sprintf (dpl[npw], "gotoh_pair_wise_lalign"); dps[npw]=LALIGN; npw++; pwl[npw]=sim_pair_wise_lalign; sprintf (dpl[npw], "sim_pair_wise_lalign"); dps[npw]=LALIGN; npw++; pwl[npw]=domain_pair_wise; sprintf (dpl[npw], "domain_pair_wise"); dps[npw]=MOCCA; npw++; pwl[npw]=gotoh_pair_wise; sprintf (dpl[npw], "ssec_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=ktup_pair_wise; sprintf (dpl[npw], "ktup_pair_wise"); dps[npw]=LOCAL; npw++; pwl[npw]=precomputed_pair_wise; sprintf (dpl[npw], "precomputed_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=myers_miller_pair_wise; sprintf (dpl[npw], "default"); dps[npw]=GLOBAL; npw++; pwl[npw]=viterbi_pair_wise; sprintf (dpl[npw], "viterbi_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=viterbiL_pair_wise; sprintf (dpl[npw], "viterbiL_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=viterbiD_pair_wise; sprintf (dpl[npw], "viterbiD_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=seq_viterbi_pair_wise; sprintf (dpl[npw], "seq_viterbi_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=pavie_pair_wise; sprintf (dpl[npw], "pavie_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=glocal_pair_wise; sprintf (dpl[npw], "glocal_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=linked_pair_wise; sprintf (dpl[npw], "linked_pair_wise"); dps[npw]=GLOBAL; npw++; pwl[npw]=clinked_pair_wise; sprintf (dpl[npw], "clinked_pair_wise"); dps[npw]=GLOBAL; npw++; /* pwl[npw]=viterbiDGL_pair_wise; sprintf (dpl[npw], "viterbiDGL_pair_wise"); dps[npw]=GLOBAL; npw++; */ } for ( a=0; a< npw; a++) { if ( (dp_mode && strm (dpl[a], dp_mode)) || pwl[a]==pw) { pw=pwl[a]; if (dp_mode)sprintf (dp_mode,"%s", dpl[a]); glocal[0]=dps[a]; return pw; } } fprintf ( stderr, "\n[%s] is an unknown mode for dp_mode[FATAL]\n", dp_mode); crash ( "\n"); return NULL; } /*******************************************************************************/ /* */ /* */ /* Util Functions */ /* */ /* */ /*******************************************************************************/ char *build_consensus ( char *seq1, char *seq2, char *dp_mode) { Alignment *A; char *buf; int a; char c1, c2; static char *mat; if ( !mat) mat=vcalloc ( STRING, sizeof (char)); A=align_two_sequences (seq1, seq2, strcpy(mat,"idmat"), 0, 0,dp_mode); buf=vcalloc ( A->len_aln+1, sizeof (char)); for ( a=0; a< A->len_aln; a++) { c1=A->seq_al[0][a]; c2=A->seq_al[1][a]; if (is_gap(c1) && is_gap(c2))buf[a]='-'; else if (is_gap(c1))buf[a]=c2; else if (is_gap(c2))buf[a]=c1; else if (c1!=c2){vfree (buf);buf=NULL;free_aln(A);return NULL;} else buf[a]=c1; } buf[a]='\0'; free_sequence (free_aln (A), -1); return buf; } #ifdef FASTAL combine_profile () Comobine two profiles into one, using the edit sequence produce by the DP edit_sequence () insert the gaps using the int fastal (int argv, char **arg) { Sequence *S; int a, b; SeqHasch *H=NULL; int ktup=2; S=get_fasta_sequence (arg[1], NULL); for (a=0; anseq-1; a++) { for (b=a+1; bnseq; b++) { mat[b][a]=mat[a][b]addrand()%100; } } int_dist2nj_tree (s, S->name, S->nseq, tree_name); T=main_read_tree (BT); =fastal_tree_aln (T->L,T->R,S); } NT_node fastal_tree_aln ( NT_node P, Sequence *S) { int score; if (!P || P->nseq==1) return NULL; R=P->right;L=P->left; fastal_tree_aln (P->left,S); fastal_tree_aln (P->right,S); fastal_pair_wise (P); return P; } NT_node fastal_pair_wise (NT_node P) { //X- 1 //-X 2 //XX 3 //-- 4 tb=fastal_align_profile ((P->right)->prf, (P->left)->prf); l=strlen (tb); for (a=0; a< l; a++) { pr1=pr2=0; if (tb[a]== 1 || tb[a] ==3)pr1=1; if (tb[a]== 2 || tb[a] ==3)pr2=1; for (b=0; b<20; b++) P->prf[a][b]=((pr1==1)?(P->right)->prf[ppr1][b]:0) + ((pr2==1)?(P->left)->prf[ppr2][b]:0); ppr1+=pr1; ppr2+=pr2; } free_int ((P->left)->prf, -1); free_int ((P->right)->prf, -1); } #endif /*********************************COPYRIGHT NOTICE**********************************/ /*© Centro de Regulacio Genomica */ /*and */ /*Cedric Notredame */ /*Tue Oct 27 10:12:26 WEST 2009. */ /*All rights reserved.*/ /*This file is part of T-COFFEE.*/ /**/ /* T-COFFEE is free software; you can redistribute it and/or modify*/ /* it under the terms of the GNU General Public License as published by*/ /* the Free Software Foundation; either version 2 of the License, or*/ /* (at your option) any later version.*/ /**/ /* T-COFFEE is distributed in the hope that it will be useful,*/ /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ /* GNU General Public License for more details.*/ /**/ /* You should have received a copy of the GNU General Public License*/ /* along with Foobar; if not, write to the Free Software*/ /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ /*............................................... |*/ /* If you need some more information*/ /* cedric.notredame@europe.com*/ /*............................................... |*/ /**/ /**/ /* */ /*********************************COPYRIGHT NOTICE**********************************/