JWS-112 Bumping version of T-Coffee to version 11.00.8cbe486.
[jabaws.git] / binaries / src / tcoffee / t_coffee_source / util_lib / reformat.h
1 /******************************COPYRIGHT NOTICE*******************************/
2 /*  (c) Centro de Regulacio Genomica                                                        */
3 /*  and                                                                                     */
4 /*  Cedric Notredame                                                                        */
5 /*  12 Aug 2014 - 22:07.                                                                    */
6 /*All rights reserved.                                                                      */
7 /*This file is part of T-COFFEE.                                                            */
8 /*                                                                                          */
9 /*    T-COFFEE is free software; you can redistribute it and/or modify                      */
10 /*    it under the terms of the GNU General Public License as published by                  */
11 /*    the Free Software Foundation; either version 2 of the License, or                     */
12 /*    (at your option) any later version.                                                   */
13 /*                                                                                          */
14 /*    T-COFFEE is distributed in the hope that it will be useful,                           */
15 /*    but WITHOUT ANY WARRANTY; without even the implied warranty of                        */
16 /*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the                         */
17 /*    GNU General Public License for more details.                                          */
18 /*                                                                                          */
19 /*    You should have received a copy of the GNU General Public License                     */
20 /*    along with Foobar; if not, write to the Free Software                                 */
21 /*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA             */
22 /*...............................................                                           */
23 /*  If you need some more information                                                       */
24 /*  cedric.notredame@europe.com                                                             */
25 /*...............................................                                           */
26 /******************************COPYRIGHT NOTICE*******************************/
27 #ifndef __REFORMAT_H
28 #define __REFORMAT_H
29
30 typedef struct
31     {
32       Alignment *A;
33       Weights *W;
34       Sequence *S;
35       int **M;
36       Structure *RNA_ST;
37       NT_node T;
38       Constraint_list *CL;
39       char format[100];
40       char file[100];
41       int rm_gap;
42
43 }Sequence_data_struc;
44
45 typedef struct
46     {
47         char **symbol_list;
48         int n_symbol;
49         char *coor_file;
50         int rm_gap;
51         int keep_case;
52         int keep_name;
53         int use_consensus;
54 }Action_data_struc;
55
56 /*Control of alignment sizes*/
57 int  set_landscape_msa (int len);
58 int get_msa_line_length (int line, int aln_len);
59
60 int seq_reformat (int argc, char **argv);
61
62 Sequence_data_struc *read_data_structure ( char *in_format, char *in_file,Action_data_struc *RAD);
63 Alignment * read_fasta_aln_noceck ( char *name, Alignment *A);
64 Alignment * main_read_aln ( char *name, Alignment *A);
65 Sequence  * read_sequences ( char *name);
66 Sequence  * read_alifold   ( char *name);
67 Alignment *alifold2aln     ( char *name);
68 Sequence  * main_read_seq ( char *mname);
69 int output_format_aln ( char *format, Alignment *A, Alignment *EA,char *name);
70 int main_output   ( Sequence_data_struc *D1, Sequence_data_struc *D2, Sequence_data_struc *DST, char *out_format, char *out_file);
71
72 char * identify_seq_format ( char *file);
73 char * name2type_name ( char *name);
74 char identify_format (char **fname);
75 char **identify_list_format ( char **list, int n);
76
77 int type_is_exon_boundaries(char **seq, int n);
78
79 int format_is_oligo  ( char *file);
80 int format_is_msf  ( char *file);
81 int format_is_fasta( char *file);
82 // int format_is_fasta_aln( char *file);
83 int format_is_fasta_aln ( char *file, int i_know_that_it_not_seq);
84 int format_is_fasta_seq( char *file);
85 int is_pir_name (char *name);
86 int format_is_pir  ( char *file);
87 int format_is_pir_aln( char *file);
88 int format_is_pir_seq( char *file);
89 int pir_name (char *name);
90 int format_is_conc_aln (char *file);
91 int format_is_saga  ( char *file);
92 int format_is_swissprot (char *name);
93
94 int is_seq ( char *name);
95 int is_aln ( char *name);
96 int has_pdb (char *name);
97 int is_stockhom_aln ( char *name);
98 int is_blast_file (char *name);
99 int is_sap_file (char *name);
100 int is_pdb_file ( char *name);
101 int is_simple_pdb_file ( char *name);
102 char *fix_pdb_file (char *name);
103
104 int is_pdb_name ( char *name);
105 char* get_pdb_id(char *name);
106 char* get_pdb_struc(char *name, int start, int end);
107 char*  seq_is_pdb_struc ( Sequence *S, int i);
108 char* is_pdb_struc ( char *name); /*Returns NULL if not a PDB structure Or a the name of a file containing a PDB structure*/
109 int is_matrix (char *name);
110
111 int is_lib (char *name);
112 int is_lib_01 (char *name);
113 int is_lib_02 (char *name);
114 int is_lib_list ( char *name);
115 int is_single_seq_weight_file (char *fname);
116 int is_treelist(char *name);
117 int is_newick  (char *name);
118 int is_nexus (char *file);
119
120 int is_method ( char *file);
121
122 char *format_name2aln_format_name (char *name);
123 int is_in_format_list ( char *name);
124 int is_out_format_list ( char *name);
125 int is_struc_in_format_list ( char *name);
126 int is_struc_out_format_list ( char *name);
127 /*******************************************************************************************/
128 /*                                                                                         */
129 /*                                                                                         */
130 /*                               INPUT MISC                                               */
131 /*                                                                                         */
132 /***************************************************************************************** */
133
134 char *** read_rename_file ( char *fname, int mode);
135 void get_barton_list_tc_seq ( char *in_file);
136 int process_barton_entry (char *buf, char *name);
137
138 Structure *read_rna_struc_number ( Alignment *A, char *fname);
139 char ** read_lib_list (char *name, int *n);
140 /*******************************************************************************************/
141 /*                                                                                         */
142 /*                                                                                         */
143 /*                               INPUT WEIGHTS                                             */
144 /*                                                                                         */
145 /***************************************************************************************** */
146 Weights* get_amps_sd_scores ( char *fname);
147 Weights *read_seq_weight (char **name, int nseq, char* seq_weight);
148 /*******************************************************************************************/
149 /*                                                                                         */
150 /*                                                                                         */
151 /*                               INPUT SEQUENCES                                            */
152 /*                                                                                         */
153 /***************************************************************************************** */
154 char ***read_group ( char *file);
155 Sequence* get_pdb_sequence           ( char *fname);
156 Sequence* get_struc_gor              ( char *fname);
157 Sequence* get_dialign_sequence       ( char *fname);
158 Sequence* get_pima_sequence          ( char *fname);
159 Sequence* get_sequence_dali          ( char *fname);
160 Sequence* get_pir_sequence           ( char *fname, char *comment_name);
161 Sequence* perl_reformat2fasta        ( char *perl_script, char *file);
162
163 Sequence* get_fasta_sequence         ( char *fname, char *comment_name);
164 Sequence* get_fasta_sequence_num     ( char *fname, char *comment_name);
165 Sequence* get_fasta_sequence_raw     ( char *fname, char *comment_name);
166 Sequence *get_file_list ( char *fname);
167 Sequence *get_tree_file_list ( char *fname);
168
169 Sequence* get_gor_sequence           ( char *fname, char *comment_name);
170 Sequence* get_swissprot_sequence     ( char *fname, char *comment_name);
171 int  fscanf_seq_name ( FILE *fp, char *sname);
172
173 void read_check ( Alignment *A, char *check_file);
174 void read_stockholm_aln ( char *fname, Alignment *A);
175 void read_aln ( char *fname, Alignment *A);
176 void read_number_aln ( char *fname, Alignment *A);
177 Alignment *read_blast_aln  ( char *fname, Alignment *A);
178 void read_msf_aln ( char *fname, Alignment *A);
179 void read_amps_aln ( char *in_file, Alignment *A);
180 int get_amps_seq_name ( char **name, char* fname);
181 Alignment *read_gotoh_aln ( char *fname, Alignment *A);
182
183 void undump_msa ( Alignment *A, char *tmp);
184 void dump_msa ( char *file,Alignment *A, int nseq, int *lseq);
185 /*******************************************************************************************/
186 /*                                                                                         */
187 /*                                                                                         */
188 /*                               OUTPUT MATRICES                                           */
189 /*                                                                                         */
190 /***************************************************************************************** */
191 int output_freq_mat ( char *outfile, Alignment *A);
192 /*******************************************************************************************/
193 /*                                                                                         */
194 /*                                                                                         */
195 /*                               OUTPUT P-Values                                           */
196 /*                                                                                         */
197 /***************************************************************************************** */
198 float output_maln_pval ( char *outfile, Alignment *A);
199 /*******************************************************************************************/
200 /*                                                                                         */
201 /*                                                                                         */
202 /*                               OUTPUT WEIGHTS                                            */
203 /*                                                                                         */
204 /***************************************************************************************** */
205 void  output_similarities (char *file, Alignment *A, char *mode);
206 void  output_similarities_pw (char *file, Alignment *A, Alignment *B, char *mode);
207 Alignment * similarities_file2aln ( char *file);
208 int** input_similarities (char *file, Alignment *A, char *mode);
209
210 void output_statistics (char *file, Alignment *A, char *mode);
211 void output_pw_weights4saga ( Weights *W, float **w_list, char *wfile);
212 int  output_seq_weights ( Weights *W, char *wfile);
213 FILE * display_weights (Weights *W, FILE *fp);
214 /*******************************************************************************************/
215 /*                                                                                         */
216 /*                                                                                         */
217 /*                               OUTPUT SEQ                                                */
218 /*                                                                                         */
219 /***************************************************************************************** */
220 char** clean_seq_names (char **names, int n, int mode);
221 char *clean_seq_name (char *name, int mode);
222
223
224 void output_pir_seq1 (char *fname, Alignment*A );
225 void output_pir_seq (char *fname, Alignment*A );
226 void output_gor_seq (char *fname, Alignment*A );
227 void output_mult_fasta_seq (char *fname, Alignment*A, int n );
228
229 void main_output_fasta_seq ( char *fname, Alignment *A, int header);
230 void output_fasta_simple   ( char *name, Sequence *S);
231 void output_fasta_seqS (char *fname, Sequence *S );
232 void output_fasta_seq1 (char *fname, Alignment*A );
233 void output_fasta_seq2 (char *fname, Alignment*A );
234 char *output_fasta_seqX (char *name, char *mode, Sequence *S, Alignment *A, int i);
235
236 void output_pir_check (char *fname,int nseq, char **A );
237 void output_fasta_seq (char *fname, Alignment*A );
238 void output_gotoh_seq (char *fname, Alignment*A );
239 void output_est_prf   (char *fname, Alignment *A);
240 void output_gor_seq (char *fname, Alignment*A );
241 /*******************************************************************************************/
242 /*                                                                                         */
243 /*                                                                                         */
244 /*                               OUTPUT ALN                                                */
245 /*                                                                                         */
246 /***************************************************************************************** */
247 void output_pir_aln    ( char *fname,Alignment*A);
248 void output_model_aln  ( char *fname,Alignment*A );
249 char * output_fasta_sub_aln (char *fname, Alignment*A, int ns, int *ls  );
250 char * output_fasta_sub_aln2 (char *fname, Alignment*A, int *ns, int **ls  );
251
252 void ouput_suchard_aln ( char *fname,Alignment*A);
253 void output_fasta_aln  ( char *fname, Alignment *A);
254 void output_mfasta_aln  ( char *fname, Alignment *A);
255
256 void output_xmfa_aln  ( char *fname, Alignment *A);
257 void output_msf_aln    ( char *fname,Alignment*B);
258 FILE * output_generic_interleaved_aln (FILE *fp, Alignment *B, int line, char gap, char *mode);
259 void output_stockholm_aln (char *file, Alignment *A, Alignment *ST);
260 void output_clustal_aln( char *name, Alignment*B);
261 void output_strict_clustal_aln( char *name, Alignment*B);
262 void output_generic_clustal_aln( char *name, Alignment*B, char *format);
263 void output_saga_aln   ( char *name, Alignment*B);
264 void output_rphylip_aln ( char *name, Alignment*B);
265 void output_phylip_aln ( char *name, Alignment*B);
266 void output_mocca_aln  ( char *name, Alignment*B,Alignment*S);
267 void output_rnalign    (char *out_file, Alignment*A,Sequence *STRUC);
268 void output_pw_lib_saga_aln (char *lib_name, Alignment *A );
269 void output_lib        (char *lib_name, Alignment *A );
270 void output_compact_aln( char *name, Alignment *B);
271
272 void print_sub_aln ( Alignment *B, int *ns, int **ls);
273 void print_aln ( Alignment *B);
274 FILE * output_aln( Alignment *B, FILE *fp);
275
276
277 FILE * output_aln_score ( Alignment *B, FILE *fp);
278 FILE * output_aln_with_res_number ( Alignment *B, FILE *fp);
279
280
281 FILE* output_Alignment ( Alignment *B, FILE *fp);
282 FILE* output_Alignment_without_header ( Alignment *B, FILE *fp);
283 FILE * output_Alignment_score ( Alignment *B, FILE *fp);
284 FILE * output_Alignment_with_res_number ( Alignment *B, FILE *fp);
285 void output_constraints ( char *fname, char *mode, Alignment *A);
286
287 Alignment *input_conc_aln ( char *name, Alignment *A);
288 void output_conc_aln ( char *name, Alignment *B);
289 void output_glalign       ( char *name, Alignment *B, Alignment *S);
290 void output_lalign_header( char *name, Alignment *B);
291 void output_lalign       ( char *name, Alignment *B);
292 void output_lalign_aln   ( char *name, Alignment *B);
293
294 /**************************************************************************************************/
295 /*                                                                                                */
296 /*                                                                                                */
297 /*                               INPUT/OUTPUT MATRICES                                                  */
298 /*                                                                                                */
299 /**************************************************************************************************/
300 int is_blast_matrix (char *fname);
301 int is_pavie_matrix (char *fname);
302 int is_clustalw_matrix (char *fname);
303
304 int is_distance_matrix_file (char *name);
305 int is_similarity_matrix_file (char *name);
306
307 void aln2proba_mat (Sequence *S);
308 void aln2mat (Sequence *S);
309 void aln2mat_diaa (Sequence *S);
310 int **seq2latmat ( Sequence *S, char *fname);
311 int output_mat (int **mat, char *fname, char *alp, int offset);
312 int ** read_blast_matrix ( char *mat_name);
313 int output_blast_mat (int **mat, char *fname);
314 double* mat2cmp (int **mat1, int **mat2);
315
316 void output_pavie_mat (int **mat, char *fname, double gep, char *alp);
317 int ** read_pavie_matrix ( char *mat_name);
318
319 /****************************************************************************************************/
320 /***************************                                    *************************************/
321 /***************************             PROCESSING             *************************************/
322 /***************************                                    *************************************/
323 /*******************************************************************************************/
324 /*                                                                                         */
325 /*                                                                                         */
326 /*                              THREADING                                                  */
327 /***************************************************************************************** */
328
329
330
331
332 Structure * declare_rna_structure_num (Sequence *SA);
333
334 char *thread_aa_seq_on_dna_seq( char *s);
335 void thread_seq_struc2aln ( Alignment *A, Sequence *ST);
336 Alignment *thread_dnaseq_on_prot_aln (Sequence *S, Alignment *A);
337 void cache_id ( Alignment *A);
338
339
340
341 int process_est_sequence ( Sequence *S, int *cluster_list);
342 char * invert_seq ( char *seq);
343 int get_best_match ( char *seq1, char *seq2);
344 int** extract_m_diag_streches ( int ** m, int l1, int l2,char *seq1, char *seq2, int *n_mdiag);
345 int is_strech ( char *AA, char *seq1, char *seq2, int len, int x, int y);
346
347 int search_for_cluster ( int seq, int cluster_number, int *cluster_list, int T, int nseq, int **S);
348 int * SHC ( int nseq, int **NST, int **ST);
349 int mutate_sol (int *sol, int nseq);
350 int evaluate_sol ( int*sol, int nseq, int **ST, int **NST);
351
352
353
354 char **make_symbols ( char *name, int *n);
355 Alignment *code_dna_aln (Alignment *A);
356 char* back_translate_dna_codon ( char aa, int deterministic);
357 int translate_dna_codon ( char *seq, char stop);
358 char* mutate_amino_acid ( char aa, char *mode);
359 Alignment * mutate_aln ( Alignment *A, char *r);
360
361 int extend_seqaln (Sequence *S, Alignment *A);
362 int unextend_seqaln (Sequence *S, Alignment *A);
363 char *extend_seq (char *seq);
364 char *unextend_seq (char *seq);
365
366 Sequence * transform_sequence ( Sequence *S, char *mode);
367 Alignment *translate_splice_dna_aln (Alignment *A,Alignment *ST );
368 Alignment * mutate_cdna_aln ( Alignment *A);
369
370 char *test_dna2gene (char *dna, int *w);
371 Sequence *dnaseq2geneseq (Sequence *S, int **w);
372
373 int ** shift_res_weights ( Sequence *R, int **w, int shift);
374 int res_weights2min(Sequence *R, int **w);
375 int res_weights2max(Sequence *R, int **w);
376 int res_weights2avg(Sequence *R, int **w);
377 int output_wexons (char *name, Alignment *A);
378 int scan_res_weights4ac (Sequence *R, int **w, int start, int end, int step);
379 float *res_weights2accuracy_counts ( Sequence *R, int **w,int T, float *result);
380 float* genepred_seq2accuracy_counts (Sequence *R, Sequence *T,float *result);
381 void genepred_seq2accuracy_counts4all (Sequence *R, Sequence *Ts); //JM
382 float* genepred2accuracy_counts     (char *ref,  char *target , float *result);
383
384 char *dna2gene (char *dna, int *w);
385 char * translate_dna_seq_on3frame (  char *dna_seq, char stop, char *prot);
386
387 char * translate_dna_seq ( char *dna_seq, int frame, char stop, char *prot);
388 int is_stop (char r1, char r2, char r3);
389 int seq2tblastx_db (char *file,Sequence *S, int strand);
390 int seq2blastdb (char *out, Sequence *S);
391 char * back_translate_dna_seq ( char *in_seq,char *out_seq, int mode);
392 Alignment *back_translate_dna_aln (Alignment *A);
393 Sequence  *translate_dna_seqS     (Sequence *S, int frame, int stop);
394 Alignment *translate_dna_aln (Alignment *A, int frame);
395 char *dna_seq2pep_seq (char *seq, int frame);
396
397 Alignment *clean_gdna_aln (Alignment *A);
398 Alignment *clean_cdna_aln (Alignment *A);
399 Alignment *clean_est      (Alignment *A);
400 /**************************************************************************************************/
401 /********************************                      ********************************************/
402 /********************************    PROCESSING        ********************************************/
403 /*************** ****************                      ********************************************/
404 void modify_data  (Sequence_data_struc *D1, Sequence_data_struc *D2, Sequence_data_struc *DST, char **action_list,int n_actions, Action_data_struc *RAD);
405
406 //
407 // Name MAnipulation
408 //
409
410 Alignment *clean_aln (Alignment *A);
411 Sequence *clean_sequence ( Sequence *S);
412 char ** translate_names (int n, char **name);
413 char * translate_name ( char *name);
414 char *decode_name (char *name, int mode);
415 FILE * display_sequences_names (Sequence *S, FILE *fp, int check_pdb_status, int print_templates);
416 Sequence *add_file2file_list (char *name, Sequence *S);
417
418 #endif