3 #define ENLARGEMENT_PER_STEP 50
4 #define PROFILE_ENLARGEMENT 550
6 // static char pos2aa[] = {'A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y'};
10 * \brief Struct to save a diagonal
15 ///Start of diagonal in seq1.
17 ///Start of diagonal in seq2.
19 ///Length of the diagonal.
21 ///expansion at the beginning;
23 ///expansion at the end;
32 * \brief Struct to save all informations of a profile.
37 /// Number of sequences in this profile
39 /// number of the profile
41 ///0 = combination of two profiles, 1 = profile of a single sequence -> name1 = seq_name
43 ///length of the profile
45 ///weight of the sequence
47 ///saves the amount of allocated memory
49 ///the profile itself [alphabet_size][profile_length]
51 ///number_of_sequences
52 int number_of_sequences;
57 * \brief Struct to save all parameters for fastal.
62 /// size of alphabet_size
64 /// converting char2position (for profile)
66 /// converting pos2char (for profile)
70 /// gap extension costs
74 ///method to align profile
78 ///saves the diag method -> move to sparse!
84 * \brief Struct to save the parameters and memory for the sparse dynamic programming algorithm.
88 /// saves the diagonals
92 /// list of points to be considered during the alignment process
94 /// number of points in \a list
98 // static char *file_name1 = vtmpnam(NULL);
99 // static char *file_name2 = vtmpnam(NULL);
101 Sparse_dynamic_param;
116 * \brief Struct to save the parameters and memory for the needleman-wunsch algorithm.
120 /// dynamic programming matrix
121 double ** dyn_matrix;
122 /// length of dimension 1
124 /// length of dimension 2
126 /// summed up version of profile
128 /// number of entries in \a sumup_prf
136 * \brief Struct to save the parameters and memory for the Gotoh algorithm.
140 /// dynamic programming matrix
142 /// dynamic programming matrix
144 /// dynamic programming matrix
146 /// length of dimension 1
148 /// length of dimension 2
150 /// summed up version of profile
152 /// number of entries in \a sumup_prf
180 void generate_random_tree(int number);
183 Fastal_profile* make_profile_of_sequence(char *seq_name, char *sequence, int number);
189 //********************* input/output **********************************
190 void file2profile(FILE* profile_f, Fastal_profile *profile, int prf_number, Fastal_param *param_set);
191 void file_pos2profile(FILE *seq_file, long off_set, Fastal_profile *profile, int prf_number, Fastal_param *param_set);
192 void profile2file(Fastal_profile *profile, FILE* prf_f, Fastal_param *param_set);
195 int make_index_of_file(char *file_name, long **result);
198 //********************* pairwise alignment methods ************************
201 int prf_nw(Fastal_profile *profile1, Fastal_profile *profile2, double **prog_matrix, FILE *edit_file_name, int **sumup_prf, int *sumup_length, Fastal_param *param_set);
202 int nw_matrix2edit_file(double **prog_matrix, Fastal_profile *profile1, Fastal_profile *profile2, FILE *edit_f, int **prf_field, int *field_length, Fastal_param *param_set);
203 int** sumup_profile(Fastal_profile *profile, int **sumup_prf, Fastal_param *param_set);
204 void write2file(int **sumup_prf, int length, FILE *file, int number, int num_sequences, Fastal_param *param_set);
207 void free_gotoh(Gotoh_param* method_arguments_p, int alphabet_size);
208 void fill_arguments_gotoh(Gotoh_param* method_arguments_p, int alphabet_size);
209 int prf_gotoh(Fastal_profile *profile1, Fastal_profile *profile2, FILE *edit_file_name, Gotoh_param *arguments, Fastal_param *param_set);
211 //Sparse dynamic programming
212 void free_sparse(Sparse_dynamic_param* method_arguments_p);
213 void fill_arguments_sparse(Sparse_dynamic_param* method_arguments_p);
214 int **diagonals2int(int *diagonals, int num_diagonals, char *seq1, char *seq2, int *num_points, Fastal_param *param_set);
215 int seq_pair2blast_diagonal(char *seq_file_name1, char *seq_file_name2, int **diagonals, int *dig_length, int l1, int l2, int is_dna);
216 int sparse_dyn(Fastal_profile **profiles, Fastal_param *param_set, void *method_arguments_p, int is_dna, FILE *edit_file, FILE *prof_file, int number);
217 char *profile2consensus(Fastal_profile *profile, Fastal_param *param_set);
218 int ** diagonals2int_gap_test(int *diagonals, int num_diagonals, char *seq1, char *seq2, Fastal_profile *profile1, Fastal_profile *profile2, int *num_points, Fastal_param *param_set);
219 int ** diagonals2int_euclidf(int *diagonals, int num_diagonals, char *seq1, char *seq2, Fastal_profile *profile1, Fastal_profile *profile2, int *num_points, Fastal_param *param_set);
220 int ** diagonals2int_dot(int *diagonals, int num_diagonals, char *seq1, char *seq2, Fastal_profile *profile1, Fastal_profile *profile2, int *num_points, Fastal_param *param_set);
221 int seq_pair2blastz_diagonal(char *seq_file_name1, char *seq_file_name2, int **diagonals, int *dig_length, int l1, int l2, int is_dna);
222 int list2linked_pair_wise_fastal(Fastal_profile *prf1, Fastal_profile *prf2, Fastal_param *param_set, int **list, int n, FILE *edit_f, FILE *prof_f, int node_number);
224 //edit_files 2 alignment
225 void edit2alignment(FILE *sequence_file, long *seq_positions, FILE *edit_file, long *edit_positions, int node_number, int number_of_sequences, char *aligned_sequence, int alignment_length, FILE *edit_seq_file, int offset, FILE* alignment_file);
226 void edit_seq2aligned_seq(char *aligned_sequence, FILE *sequence_file, long sequence_position, FILE *alignment_file);
230 int fastal(int argc, char **argv);
231 void alignment2files(Fastal_profile **profiles, Fastal_param *param_set,int **alignment, int alignment_length, FILE *edit_f, FILE *prof_f, int node_number);
234 double calculate_sum_of_pairs_score_affine(char *alignment_file_name, int **score_matrix, double gop, double gep);
235 double calculate_sum_of_pairs_score_affine_test(char *alignment_file_name, int **score_matrix, double gop, double gep);
236 void initiate_profile_files(FILE **profile_files);
237 void initiate_profiles(Fastal_profile **profiles, Fastal_param *param_set);
238 void free_fastal_profile(Fastal_profile *profile, int alphabet_size);
239 double **resize_dyn_matrix(double **dyn_matrix, int old_length1, int old_length2, int length1, int length2);
240 void free_dyn_matrix(int length1, double **dyn_matrix);
241 void fill_parameters(int is_dna, Fastal_param *param_set, char *method, char *diag_method, char *mat);
244 int seq_pair2diagonal_own(char *seq1, char *seq2, int **diagonals, int *dig_length, int l1, int l2, int is_dna, int word_length);
254 ///field saving the positions [x1,y1,l1,x2,y2,l2,...]
256 /// points to the current used segment
258 /// saves the previous diagonal position.
266 Segment* extend_diagonals(Diagonal *diagonals, int *num_diagonals, int l1, int l2);
267 int seq_pair2blast_diagonal2(char *seq_file_name1, char *seq_file_name2, Diagonal **diagonals, int *dig_length, int l1, int l2, int is_dna);
269 int ** segments2int(Segment *diagonals, int num_diagonals, char *seq1, char *seq2, Fastal_profile *profile1, Fastal_profile *profile2, int *num_points, Fastal_param *param_set);
270 // #include "string.h"
272 struct Fastal_arguments
314 void tree_parse(struct Fastal_arguments *arguments, char* param);
318 void arg_parse (int argc, char **argv, struct Fastal_arguments *arguments);
323 double calculate_sum_of_pairs_score_affine(char *alignment_file_name, int **score_matrix, double gop, double gep);
326 //compare with reference alignment
327 void make_ref_alignment(char *seq_file_name, char *tree_file_name, char *ref_aln_name, int num_seq_in_ref);
328 double agreement_score(char *ref_file_name, char *aln_file_name);
331 void seq2profile2(char *seq, Fastal_profile *prf, int *char2pos);
332 void split_set(FILE *aln_file_name, Fastal_profile *gap_prf, Fastal_profile *no_gap_prf, char *seq, int index, int *char2pos, char* split_file_name);
333 void iterate(Fastal_param *param, void *method_arguments_p, char *aln_file_name, char *out_file_name, int iteration_number);
335 void edit2seq_pattern(FILE *edit_file, char *seq1, char *seq2);
336 int *del_gap_from_profile(Fastal_profile *prf, int alphabet_size, int *gap_list, int *gap_list_length, int *num_gaps);
338 void write_iterated_aln(char* old_aln_file_name, char* new_aln_file_name, char *gap_file_name, char *seq1, int *gap_list1, int num_gap1, char *seq2, int *gap_list2, int num_gap2);
339 /******************************COPYRIGHT NOTICE*******************************/
340 /*© Centro de Regulacio Genomica */
342 /*Cedric Notredame */
343 /*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
344 /*All rights reserved.*/
345 /*This file is part of T-COFFEE.*/
347 /* T-COFFEE is free software; you can redistribute it and/or modify*/
348 /* it under the terms of the GNU General Public License as published by*/
349 /* the Free Software Foundation; either version 2 of the License, or*/
350 /* (at your option) any later version.*/
352 /* T-COFFEE is distributed in the hope that it will be useful,*/
353 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
354 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
355 /* GNU General Public License for more details.*/
357 /* You should have received a copy of the GNU General Public License*/
358 /* along with Foobar; if not, write to the Free Software*/
359 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
360 /*............................................... |*/
361 /* If you need some more information*/
362 /* cedric.notredame@europe.com*/
363 /*............................................... |*/
367 /******************************COPYRIGHT NOTICE*******************************/