#include #include #include #include "io_lib_header.h" #include "util_lib_header.h" #include "fastal_lib_header.h" /*! * \file diagonal.c * \brief Source code for the calculation and preparation of diagonals */ int diagonal_compare (const void * a, const void * b) { int tmp = ((((Diagonal*)a)->y - ((Diagonal*)a)->x) - (((Diagonal*)b)->y - ((Diagonal*)b)->x)); if (tmp != 0) return tmp; return (((Diagonal*)a)->y - ((Diagonal*)b)->y); } int max(int a, int b) { if (a < b) return b; else return a; } Segment* extend_diagonals(Diagonal *diagonals, int *num_diagonal, int l1, int l2) { // sort diagonal by diagonal number int i; // printf("INPUT_PRE\n"); // for (i = 0; i < num_diagonals; ++i) // { // printf("%i %i %i\n",diagonals[i].x, diagonals[i].y, diagonals[i].length); // } int num_diagonals = *num_diagonal; qsort (diagonals, num_diagonals, sizeof(Diagonal), diagonal_compare); // find nearest diagonal and expand // make shure that overlapping segments on the same diagonal are merged int diff; // printf("INPUT\n"); for (i = 0; i < num_diagonals; ++i) { printf("%i %i %i\n",diagonals[i].x, diagonals[i].y, diagonals[i].length); } for (i = 0; i < num_diagonals; ++i) { diagonals[i].end_exp = 0; diagonals[i].front_exp = 0; } int first = 0; int next = 1; while (next < num_diagonals) { if (!((diagonals[next].y >= diagonals[first].y) && diagonals[next].x <= diagonals[first].x)) { printf("%i %i %i %i %i %i\n",diagonals[first].x, diagonals[first].y, diagonals[next].x, diagonals[next].y, diagonals[first].y - diagonals[first].x, diagonals[next].y - diagonals[next].x); diff = diagonals[next].y - (diagonals[first].y + diagonals[first].length); if (diff > 0) { if ((diagonals[first].y - diagonals[first].x) != ((diagonals[next].y - diagonals[next].x))) { diagonals[first].end_exp = max(diagonals[first].end_exp, diff); diagonals[next].front_exp = max(diagonals[next].front_exp, diff); // if (diagonals[next].x - diagonals[next].front_exp < 0) while (diagonals[++first].x == -1); } else { printf("ICH TUWAS\n"); diagonals[first].length = diagonals[next].y + diagonals[next].length - diagonals[first].y; diagonals[first].end_exp = diagonals[next].end_exp; diagonals[next].x = -1; } } diff = diagonals[first].x - (diagonals[next].x + diagonals[next].length); if (diff > 0) { // diff = diagonals[next].x + diagonals[next].length - diagonals[first].x; diagonals[first].front_exp = max(diagonals[first].front_exp, diff); diagonals[next].end_exp = max(diagonals[next].end_exp, diff); // ++num_segments; // if (diagonals[first].x - diagonals[first].front_exp < 0) while (diagonals[++first].x == -1); } } else ++ first; ++next; } int num_segments =0; Segment *seg = vcalloc(num_diagonals, sizeof(Segment)); int pos = 0; // int diag_num1, diag_num2; int *tmp1; Diagonal *tmp2; for (i = 0; i < num_diagonals; ++i) { if (diagonals[i].x != -1) { ++num_segments; tmp2 = &diagonals[i]; seg[pos].segments = vcalloc(6, sizeof(int)); seg[pos].current_pos = seg[pos].segments; tmp1 = seg[pos].segments; tmp1[0] = tmp2->x - tmp2->front_exp; tmp1[1] = tmp2->y - tmp2->front_exp; tmp1[2] = tmp2->front_exp + tmp2->length + tmp2->end_exp; tmp1[5] = 1; tmp1[4] = l2; tmp1[3] = tmp1[0] + (l2 - tmp1[1]); ++pos; } } for ( i = 0; i < num_segments; ++i ) { printf("%i %i %i || %i %i %i\n", seg[i].current_pos[0], seg[i].current_pos[1], seg[i].current_pos[2], seg[i].current_pos[3], seg[i].current_pos[4], seg[i].current_pos[5]); } // exit(1); vrealloc(seg,num_segments*sizeof(Segment)); *num_diagonal = num_segments; // vfree(diagonals); return seg; } int ** segments2int(Segment *diagonals, int num_diagonals, char *seq1, char *seq2, Fastal_profile *profile1, Fastal_profile *profile2, int *num_points, Fastal_param *param_set) { // printf("NUM: %i\n", num_diagonals); int l1 = strlen(seq1); int l2 = strlen(seq2); int gep = param_set->gep; int dig_length; if (seq1 > seq2) dig_length = l1; else dig_length = l2; int current_size = num_diagonals*dig_length + l1 +l2; int **list = vcalloc(current_size, sizeof(int*)); // int *diags = vcalloc(num_diagonals, sizeof(int)); int i; // for (i = 0; i < num_diagonals; ++i) // { // diags[i] = l1 - diagonals[i*3] + diagonals[i*3+1]; // } // qsort (diags, num_diagonals, sizeof(int), fastal_compare); // int *diagx = vcalloc(num_diagonals, sizeof(int)); // int *diagy = vcalloc(num_diagonals, sizeof(int)); //+1 because diagonals start here at position 1, like in "real" dynamic programming int a = -1, b = -1; for (i = 0; i < num_diagonals; ++i) { if (diagonals[i].current_pos[1] - diagonals[i].current_pos[0] < 0) { // diagx[i] = l1 - diags[i]; // diagy[i] = 0; a= i; } else break; } ++a; b=a-1; for (; i < num_diagonals; ++i) { // diagx[i] = 0; // diagy[i] = diags[i]-l1; diagonals[i].prev_position = diagonals[i].current_pos[1] - diagonals[i].current_pos[0]; b = i; } // vfree(diags); int tmpy_pos; int tmpy_value; int **M = param_set->M; int *last_y = vcalloc(l2+1, sizeof(int)); int *last_x = vcalloc(l1+1, sizeof(int)); last_y[0] = 0; last_x[0] = 0; list[0] = vcalloc(6, sizeof(int)); int list_pos = 1; int dig_num = l1; int tmp_l2 = l2 + 1; //left border for (; list_pos < tmp_l2; ++list_pos) { list[list_pos] = vcalloc(6, sizeof(int)); list[list_pos][0] = 0; list[list_pos][1] = list_pos; last_y[list_pos] = list_pos; list[list_pos][2] = list_pos*gep; list[list_pos][4] = list_pos-1; } int pos_x = 0; int y; int tmp_l1 = l1-1; int tmpx, tmpy; while (pos_x < tmp_l1) { if (list_pos + num_diagonals+2 > current_size) { current_size += num_diagonals*1000; list = vrealloc(list, current_size * sizeof(int*)); } //upper border list[list_pos] = vcalloc(6, sizeof(int)); list[list_pos][0] = ++pos_x; list[list_pos][1] = 0; list[list_pos][2] = pos_x * gep; list[list_pos][3] = last_y[0]; // tmpy_value = list_pos; // tmpy_pos = 0; last_x[pos_x] = list_pos; ++list_pos; //diagonals for (i = a; i <= b; ++i) { if (pos_x == diagonals[i].current_pos[0]) { if (diagonals[i].current_pos[2] == 0) { diagonals[i].current_pos+=3; } list[list_pos] = vcalloc(6, sizeof(int)); tmpx = diagonals[i].current_pos[0]; tmpy = diagonals[i].current_pos[1]; list[list_pos][0] = tmpx; list[list_pos][1] = tmpy; list[list_pos][3] = last_y[tmpy]; list[list_pos][4] = list_pos-1; list[list_pos][5] = diagonals[i].prev_position;//last_y[tmpy-1]; diagonals[i].prev_position = list_pos; // printf("A: %i %i\n",tmpx-1, tmpy-1); list[list_pos][2] = M[toupper(seq1[tmpx-1])-'A'][toupper(seq2[tmpy-1])-'A']; last_y[tmpy] = list_pos; // tmpy_value = list_pos; // tmpy_pos = tmpy; --diagonals[i].current_pos[2]; ++diagonals[i].current_pos[0]; ++diagonals[i].current_pos[1]; ++list_pos; } } // last_y[tmpy_pos] = tmpy_value; //lower border if (list[list_pos-1][1] != l2) { list[list_pos] = vcalloc(6, sizeof(int)); list[list_pos][0] = pos_x; list[list_pos][1] = l2; list[list_pos][3] = last_y[l2]; list[list_pos][2] = -1000; list[list_pos][4] = list_pos-1; if (pos_x > l2) list[list_pos][5] = last_x[pos_x-l2]; else list[list_pos][5] = l2-pos_x; last_y[l2] = list_pos; ++list_pos; } if ((b >= 0) && (diagonals[b].current_pos[1] > l2)) --b; if ((a >0) && (diagonals[a-1].current_pos[0] - diagonals[a-1].current_pos[1] == pos_x)) { --a; diagonals[a].prev_position = last_x[pos_x-1]; } } dig_num = -1; if (list_pos + l2+2 > current_size) { current_size += list_pos + l2 + 2; list = vrealloc(list, current_size * sizeof(int*)); } // right border list[list_pos] = vcalloc(6, sizeof(int)); list[list_pos][0] = l1; list[list_pos][1] = 0; list[list_pos][3] = last_x[l1-1]; list[list_pos][2] = -1000; ++list_pos; for (i = 1; i <= l2; ++i) { list[list_pos] = vcalloc(6, sizeof(int)); list[list_pos][0] = l1; list[list_pos][1] = i; list[list_pos][3] = last_y[i]; list[list_pos][4] = list_pos-1; y = last_y[i-1]; if ((list[y][0] == l1-1) && (list[y][1] == i-1)) { list[list_pos][5] = y; list[list_pos][2] = M[toupper(seq1[l1-1])-'A'][toupper(seq2[i-1])-'A']; } else { if (i <= l1) { list[list_pos][5] = last_x[l1-i]; } else { list[list_pos][5] = i-l1; } list[list_pos][2] = -1000; } ++list_pos; } list[list_pos - l2][2] = -1000; *num_points = list_pos; // vfree(diagx); // vfree(diagy); return list; } int seq_pair2blast_diagonal2(char *seq_file_name1, char *seq_file_name2, Diagonal **diagonals, int *dig_length, int l1, int l2, int is_dna) { char *out_file = vtmpnam(NULL); char blast_command[200]; char blast_command2[200]; if (is_dna) { sprintf(blast_command, "bl2seq -p blastn -i %s -j %s -D 1 -g F -o %s -S 1 -F F", seq_file_name1, seq_file_name2, out_file); } else { sprintf(blast_command, "bl2seq -p blastp -i %s -j %s -D 1 -g F -o %s -F F -S 1", seq_file_name1, seq_file_name2, out_file); } system(blast_command); Diagonal *diags = diagonals[0]; FILE *diag_f = fopen(out_file,"r"); char line[300]; fgets(line, 300, diag_f); fgets(line, 300, diag_f); fgets(line, 300, diag_f); char delims[] = "\t"; int length, pos_q, pos_d; int current_pos = 0; while (fgets(line, 300, diag_f) != NULL) { strtok(line, delims); strtok(NULL, delims); strtok(NULL, delims); length = atoi(strtok(NULL, delims)); strtok(NULL, delims); strtok(NULL, delims); pos_q = atoi(strtok(NULL, delims)); strtok(NULL, delims); pos_d = atoi(strtok(NULL, delims)); if (current_pos >= *dig_length) { (*dig_length) += 40; diags = vrealloc(diags, sizeof(Diagonal)*(*dig_length)); } diags[current_pos].x = pos_q; diags[current_pos].y = pos_d; diags[current_pos++].length = length; } fclose(diag_f); diagonals[0] = diags; return current_pos; } /******************************COPYRIGHT NOTICE*******************************/ /*© Centro de Regulacio Genomica */ /*and */ /*Cedric Notredame */ /*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */ /*All rights reserved.*/ /*This file is part of T-COFFEE.*/ /**/ /* T-COFFEE is free software; you can redistribute it and/or modify*/ /* it under the terms of the GNU General Public License as published by*/ /* the Free Software Foundation; either version 2 of the License, or*/ /* (at your option) any later version.*/ /**/ /* T-COFFEE is distributed in the hope that it will be useful,*/ /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ /* GNU General Public License for more details.*/ /**/ /* You should have received a copy of the GNU General Public License*/ /* along with Foobar; if not, write to the Free Software*/ /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ /*............................................... |*/ /* If you need some more information*/ /* cedric.notredame@europe.com*/ /*............................................... |*/ /**/ /**/ /* */ /******************************COPYRIGHT NOTICE*******************************/