1 /******************************COPYRIGHT NOTICE*******************************/
2 /* (c) Centro de Regulacio Genomica */
5 /* 12 Aug 2014 - 22:07. */
6 /*All rights reserved. */
7 /*This file is part of T-COFFEE. */
9 /* T-COFFEE is free software; you can redistribute it and/or modify */
10 /* it under the terms of the GNU General Public License as published by */
11 /* the Free Software Foundation; either version 2 of the License, or */
12 /* (at your option) any later version. */
14 /* T-COFFEE is distributed in the hope that it will be useful, */
15 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
16 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
17 /* GNU General Public License for more details. */
19 /* You should have received a copy of the GNU General Public License */
20 /* along with Foobar; if not, write to the Free Software */
21 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
22 /*............................................... */
23 /* If you need some more information */
24 /* cedric.notredame@europe.com */
25 /*............................................... */
26 /******************************COPYRIGHT NOTICE*******************************/
56 /*Control of alignment sizes*/
57 int set_landscape_msa (int len);
58 int get_msa_line_length (int line, int aln_len);
60 int seq_reformat (int argc, char **argv);
62 Sequence_data_struc *read_data_structure ( char *in_format, char *in_file,Action_data_struc *RAD);
63 Alignment * read_fasta_aln_noceck ( char *name, Alignment *A);
64 Alignment * main_read_aln ( char *name, Alignment *A);
65 Sequence * read_sequences ( char *name);
66 Sequence * read_alifold ( char *name);
67 Alignment *alifold2aln ( char *name);
68 Sequence * main_read_seq ( char *mname);
69 int output_format_aln ( char *format, Alignment *A, Alignment *EA,char *name);
70 int main_output ( Sequence_data_struc *D1, Sequence_data_struc *D2, Sequence_data_struc *DST, char *out_format, char *out_file);
72 char * identify_seq_format ( char *file);
73 char * name2type_name ( char *name);
74 char identify_format (char **fname);
75 char **identify_list_format ( char **list, int n);
77 int type_is_exon_boundaries(char **seq, int n);
79 int format_is_oligo ( char *file);
80 int format_is_msf ( char *file);
81 int format_is_fasta( char *file);
82 // int format_is_fasta_aln( char *file);
83 int format_is_fasta_aln ( char *file, int i_know_that_it_not_seq);
84 int format_is_fasta_seq( char *file);
85 int is_pir_name (char *name);
86 int format_is_pir ( char *file);
87 int format_is_pir_aln( char *file);
88 int format_is_pir_seq( char *file);
89 int pir_name (char *name);
90 int format_is_conc_aln (char *file);
91 int format_is_saga ( char *file);
92 int format_is_swissprot (char *name);
94 int is_seq ( char *name);
95 int is_aln ( char *name);
96 int has_pdb (char *name);
97 int is_stockhom_aln ( char *name);
98 int is_blast_file (char *name);
99 int is_sap_file (char *name);
100 int is_pdb_file ( char *name);
101 int is_simple_pdb_file ( char *name);
102 char *fix_pdb_file (char *name);
104 int is_pdb_name ( char *name);
105 char* get_pdb_id(char *name);
106 char* get_pdb_struc(char *name, int start, int end);
107 char* seq_is_pdb_struc ( Sequence *S, int i);
108 char* is_pdb_struc ( char *name); /*Returns NULL if not a PDB structure Or a the name of a file containing a PDB structure*/
109 int is_matrix (char *name);
111 int is_lib (char *name);
112 int is_lib_01 (char *name);
113 int is_lib_02 (char *name);
114 int is_lib_list ( char *name);
115 int is_single_seq_weight_file (char *fname);
116 int is_treelist(char *name);
117 int is_newick (char *name);
118 int is_nexus (char *file);
120 int is_method ( char *file);
122 char *format_name2aln_format_name (char *name);
123 int is_in_format_list ( char *name);
124 int is_out_format_list ( char *name);
125 int is_struc_in_format_list ( char *name);
126 int is_struc_out_format_list ( char *name);
127 /*******************************************************************************************/
132 /***************************************************************************************** */
134 char *** read_rename_file ( char *fname, int mode);
135 void get_barton_list_tc_seq ( char *in_file);
136 int process_barton_entry (char *buf, char *name);
138 Structure *read_rna_struc_number ( Alignment *A, char *fname);
139 char ** read_lib_list (char *name, int *n);
140 /*******************************************************************************************/
145 /***************************************************************************************** */
146 Weights* get_amps_sd_scores ( char *fname);
147 Weights *read_seq_weight (char **name, int nseq, char* seq_weight);
148 /*******************************************************************************************/
151 /* INPUT SEQUENCES */
153 /***************************************************************************************** */
154 char ***read_group ( char *file);
155 Sequence* get_pdb_sequence ( char *fname);
156 Sequence* get_struc_gor ( char *fname);
157 Sequence* get_dialign_sequence ( char *fname);
158 Sequence* get_pima_sequence ( char *fname);
159 Sequence* get_sequence_dali ( char *fname);
160 Sequence* get_pir_sequence ( char *fname, char *comment_name);
161 Sequence* perl_reformat2fasta ( char *perl_script, char *file);
163 Sequence* get_fasta_sequence ( char *fname, char *comment_name);
164 Sequence* get_fasta_sequence_num ( char *fname, char *comment_name);
165 Sequence* get_fasta_sequence_raw ( char *fname, char *comment_name);
166 Sequence *get_file_list ( char *fname);
167 Sequence *get_tree_file_list ( char *fname);
169 Sequence* get_gor_sequence ( char *fname, char *comment_name);
170 Sequence* get_swissprot_sequence ( char *fname, char *comment_name);
171 int fscanf_seq_name ( FILE *fp, char *sname);
173 void read_check ( Alignment *A, char *check_file);
174 void read_stockholm_aln ( char *fname, Alignment *A);
175 void read_aln ( char *fname, Alignment *A);
176 void read_number_aln ( char *fname, Alignment *A);
177 Alignment *read_blast_aln ( char *fname, Alignment *A);
178 void read_msf_aln ( char *fname, Alignment *A);
179 void read_amps_aln ( char *in_file, Alignment *A);
180 int get_amps_seq_name ( char **name, char* fname);
181 Alignment *read_gotoh_aln ( char *fname, Alignment *A);
183 void undump_msa ( Alignment *A, char *tmp);
184 void dump_msa ( char *file,Alignment *A, int nseq, int *lseq);
185 /*******************************************************************************************/
188 /* OUTPUT MATRICES */
190 /***************************************************************************************** */
191 int output_freq_mat ( char *outfile, Alignment *A);
192 /*******************************************************************************************/
195 /* OUTPUT P-Values */
197 /***************************************************************************************** */
198 float output_maln_pval ( char *outfile, Alignment *A);
199 /*******************************************************************************************/
204 /***************************************************************************************** */
205 void output_similarities (char *file, Alignment *A, char *mode);
206 void output_similarities_pw (char *file, Alignment *A, Alignment *B, char *mode);
207 Alignment * similarities_file2aln ( char *file);
208 int** input_similarities (char *file, Alignment *A, char *mode);
210 void output_statistics (char *file, Alignment *A, char *mode);
211 void output_pw_weights4saga ( Weights *W, float **w_list, char *wfile);
212 int output_seq_weights ( Weights *W, char *wfile);
213 FILE * display_weights (Weights *W, FILE *fp);
214 /*******************************************************************************************/
219 /***************************************************************************************** */
220 char** clean_seq_names (char **names, int n, int mode);
221 char *clean_seq_name (char *name, int mode);
224 void output_pir_seq1 (char *fname, Alignment*A );
225 void output_pir_seq (char *fname, Alignment*A );
226 void output_gor_seq (char *fname, Alignment*A );
227 void output_mult_fasta_seq (char *fname, Alignment*A, int n );
229 void main_output_fasta_seq ( char *fname, Alignment *A, int header);
230 void output_fasta_simple ( char *name, Sequence *S);
231 void output_fasta_seqS (char *fname, Sequence *S );
232 void output_fasta_seq1 (char *fname, Alignment*A );
233 void output_fasta_seq2 (char *fname, Alignment*A );
234 char *output_fasta_seqX (char *name, char *mode, Sequence *S, Alignment *A, int i);
236 void output_pir_check (char *fname,int nseq, char **A );
237 void output_fasta_seq (char *fname, Alignment*A );
238 void output_gotoh_seq (char *fname, Alignment*A );
239 void output_est_prf (char *fname, Alignment *A);
240 void output_gor_seq (char *fname, Alignment*A );
241 /*******************************************************************************************/
246 /***************************************************************************************** */
247 void output_pir_aln ( char *fname,Alignment*A);
248 void output_model_aln ( char *fname,Alignment*A );
249 char * output_fasta_sub_aln (char *fname, Alignment*A, int ns, int *ls );
250 char * output_fasta_sub_aln2 (char *fname, Alignment*A, int *ns, int **ls );
252 void ouput_suchard_aln ( char *fname,Alignment*A);
253 void output_fasta_aln ( char *fname, Alignment *A);
254 void output_mfasta_aln ( char *fname, Alignment *A);
256 void output_xmfa_aln ( char *fname, Alignment *A);
257 void output_msf_aln ( char *fname,Alignment*B);
258 FILE * output_generic_interleaved_aln (FILE *fp, Alignment *B, int line, char gap, char *mode);
259 void output_stockholm_aln (char *file, Alignment *A, Alignment *ST);
260 void output_clustal_aln( char *name, Alignment*B);
261 void output_strict_clustal_aln( char *name, Alignment*B);
262 void output_generic_clustal_aln( char *name, Alignment*B, char *format);
263 void output_saga_aln ( char *name, Alignment*B);
264 void output_rphylip_aln ( char *name, Alignment*B);
265 void output_phylip_aln ( char *name, Alignment*B);
266 void output_mocca_aln ( char *name, Alignment*B,Alignment*S);
267 void output_rnalign (char *out_file, Alignment*A,Sequence *STRUC);
268 void output_pw_lib_saga_aln (char *lib_name, Alignment *A );
269 void output_lib (char *lib_name, Alignment *A );
270 void output_compact_aln( char *name, Alignment *B);
272 void print_sub_aln ( Alignment *B, int *ns, int **ls);
273 void print_aln ( Alignment *B);
274 FILE * output_aln( Alignment *B, FILE *fp);
277 FILE * output_aln_score ( Alignment *B, FILE *fp);
278 FILE * output_aln_with_res_number ( Alignment *B, FILE *fp);
281 FILE* output_Alignment ( Alignment *B, FILE *fp);
282 FILE* output_Alignment_without_header ( Alignment *B, FILE *fp);
283 FILE * output_Alignment_score ( Alignment *B, FILE *fp);
284 FILE * output_Alignment_with_res_number ( Alignment *B, FILE *fp);
285 void output_constraints ( char *fname, char *mode, Alignment *A);
287 Alignment *input_conc_aln ( char *name, Alignment *A);
288 void output_conc_aln ( char *name, Alignment *B);
289 void output_glalign ( char *name, Alignment *B, Alignment *S);
290 void output_lalign_header( char *name, Alignment *B);
291 void output_lalign ( char *name, Alignment *B);
292 void output_lalign_aln ( char *name, Alignment *B);
294 /**************************************************************************************************/
297 /* INPUT/OUTPUT MATRICES */
299 /**************************************************************************************************/
300 int is_blast_matrix (char *fname);
301 int is_pavie_matrix (char *fname);
302 int is_clustalw_matrix (char *fname);
304 int is_distance_matrix_file (char *name);
305 int is_similarity_matrix_file (char *name);
307 void aln2proba_mat (Sequence *S);
308 void aln2mat (Sequence *S);
309 void aln2mat_diaa (Sequence *S);
310 int **seq2latmat ( Sequence *S, char *fname);
311 int output_mat (int **mat, char *fname, char *alp, int offset);
312 int ** read_blast_matrix ( char *mat_name);
313 int output_blast_mat (int **mat, char *fname);
314 double* mat2cmp (int **mat1, int **mat2);
316 void output_pavie_mat (int **mat, char *fname, double gep, char *alp);
317 int ** read_pavie_matrix ( char *mat_name);
319 /****************************************************************************************************/
320 /*************************** *************************************/
321 /*************************** PROCESSING *************************************/
322 /*************************** *************************************/
323 /*******************************************************************************************/
327 /***************************************************************************************** */
332 Structure * declare_rna_structure_num (Sequence *SA);
334 char *thread_aa_seq_on_dna_seq( char *s);
335 void thread_seq_struc2aln ( Alignment *A, Sequence *ST);
336 Alignment *thread_dnaseq_on_prot_aln (Sequence *S, Alignment *A);
337 void cache_id ( Alignment *A);
341 int process_est_sequence ( Sequence *S, int *cluster_list);
342 char * invert_seq ( char *seq);
343 int get_best_match ( char *seq1, char *seq2);
344 int** extract_m_diag_streches ( int ** m, int l1, int l2,char *seq1, char *seq2, int *n_mdiag);
345 int is_strech ( char *AA, char *seq1, char *seq2, int len, int x, int y);
347 int search_for_cluster ( int seq, int cluster_number, int *cluster_list, int T, int nseq, int **S);
348 int * SHC ( int nseq, int **NST, int **ST);
349 int mutate_sol (int *sol, int nseq);
350 int evaluate_sol ( int*sol, int nseq, int **ST, int **NST);
354 char **make_symbols ( char *name, int *n);
355 Alignment *code_dna_aln (Alignment *A);
356 char* back_translate_dna_codon ( char aa, int deterministic);
357 int translate_dna_codon ( char *seq, char stop);
358 char* mutate_amino_acid ( char aa, char *mode);
359 Alignment * mutate_aln ( Alignment *A, char *r);
361 int extend_seqaln (Sequence *S, Alignment *A);
362 int unextend_seqaln (Sequence *S, Alignment *A);
363 char *extend_seq (char *seq);
364 char *unextend_seq (char *seq);
366 Sequence * transform_sequence ( Sequence *S, char *mode);
367 Alignment *translate_splice_dna_aln (Alignment *A,Alignment *ST );
368 Alignment * mutate_cdna_aln ( Alignment *A);
370 char *test_dna2gene (char *dna, int *w);
371 Sequence *dnaseq2geneseq (Sequence *S, int **w);
373 int ** shift_res_weights ( Sequence *R, int **w, int shift);
374 int res_weights2min(Sequence *R, int **w);
375 int res_weights2max(Sequence *R, int **w);
376 int res_weights2avg(Sequence *R, int **w);
377 int output_wexons (char *name, Alignment *A);
378 int scan_res_weights4ac (Sequence *R, int **w, int start, int end, int step);
379 float *res_weights2accuracy_counts ( Sequence *R, int **w,int T, float *result);
380 float* genepred_seq2accuracy_counts (Sequence *R, Sequence *T,float *result);
381 void genepred_seq2accuracy_counts4all (Sequence *R, Sequence *Ts); //JM
382 float* genepred2accuracy_counts (char *ref, char *target , float *result);
384 char *dna2gene (char *dna, int *w);
385 char * translate_dna_seq_on3frame ( char *dna_seq, char stop, char *prot);
387 char * translate_dna_seq ( char *dna_seq, int frame, char stop, char *prot);
388 int is_stop (char r1, char r2, char r3);
389 int seq2tblastx_db (char *file,Sequence *S, int strand);
390 int seq2blastdb (char *out, Sequence *S);
391 char * back_translate_dna_seq ( char *in_seq,char *out_seq, int mode);
392 Alignment *back_translate_dna_aln (Alignment *A);
393 Sequence *translate_dna_seqS (Sequence *S, int frame, int stop);
394 Alignment *translate_dna_aln (Alignment *A, int frame);
395 char *dna_seq2pep_seq (char *seq, int frame);
397 Alignment *clean_gdna_aln (Alignment *A);
398 Alignment *clean_cdna_aln (Alignment *A);
399 Alignment *clean_est (Alignment *A);
400 /**************************************************************************************************/
401 /******************************** ********************************************/
402 /******************************** PROCESSING ********************************************/
403 /*************** **************** ********************************************/
404 void modify_data (Sequence_data_struc *D1, Sequence_data_struc *D2, Sequence_data_struc *DST, char **action_list,int n_actions, Action_data_struc *RAD);
410 Alignment *clean_aln (Alignment *A);
411 Sequence *clean_sequence ( Sequence *S);
412 char ** translate_names (int n, char **name);
413 char * translate_name ( char *name);
414 char *decode_name (char *name, int mode);
415 FILE * display_sequences_names (Sequence *S, FILE *fp, int check_pdb_status, int print_templates);
416 Sequence *add_file2file_list (char *name, Sequence *S);