--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "io_lib_header.h"
+#include "util_lib_header.h"
+#include "fastal_lib_header.h"
+
+
+
+/*!
+ * \file diagonal.c
+ * \brief Source code for the calculation and preparation of diagonals
+ */
+
+
+
+
+
+
+
+int
+diagonal_compare (const void * a, const void * b)
+{
+
+ int tmp = ((((Diagonal*)a)->y - ((Diagonal*)a)->x) - (((Diagonal*)b)->y - ((Diagonal*)b)->x));
+ if (tmp != 0)
+ return tmp;
+ return (((Diagonal*)a)->y - ((Diagonal*)b)->y);
+}
+
+int
+max(int a, int b)
+{
+ if (a < b)
+ return b;
+ else
+ return a;
+}
+
+
+
+Segment*
+extend_diagonals(Diagonal *diagonals, int *num_diagonal, int l1, int l2)
+{
+// sort diagonal by diagonal number
+ int i;
+// printf("INPUT_PRE\n");
+// for (i = 0; i < num_diagonals; ++i)
+// {
+// printf("%i %i %i\n",diagonals[i].x, diagonals[i].y, diagonals[i].length);
+// }
+ int num_diagonals = *num_diagonal;
+ qsort (diagonals, num_diagonals, sizeof(Diagonal), diagonal_compare);
+
+
+// find nearest diagonal and expand
+// make shure that overlapping segments on the same diagonal are merged
+
+ int diff;
+// printf("INPUT\n");
+ for (i = 0; i < num_diagonals; ++i)
+ {
+ printf("%i %i %i\n",diagonals[i].x, diagonals[i].y, diagonals[i].length);
+ }
+ for (i = 0; i < num_diagonals; ++i)
+ {
+ diagonals[i].end_exp = 0;
+ diagonals[i].front_exp = 0;
+ }
+ int first = 0;
+ int next = 1;
+ while (next < num_diagonals)
+ {
+
+ if (!((diagonals[next].y >= diagonals[first].y) && diagonals[next].x <= diagonals[first].x))
+ {
+ printf("%i %i %i %i %i %i\n",diagonals[first].x, diagonals[first].y, diagonals[next].x, diagonals[next].y, diagonals[first].y - diagonals[first].x, diagonals[next].y - diagonals[next].x);
+ diff = diagonals[next].y - (diagonals[first].y + diagonals[first].length);
+ if (diff > 0)
+ {
+ if ((diagonals[first].y - diagonals[first].x) != ((diagonals[next].y - diagonals[next].x)))
+ {
+ diagonals[first].end_exp = max(diagonals[first].end_exp, diff);
+ diagonals[next].front_exp = max(diagonals[next].front_exp, diff);
+ // if (diagonals[next].x - diagonals[next].front_exp < 0)
+
+ while (diagonals[++first].x == -1);
+ }
+ else
+ {
+ printf("ICH TUWAS\n");
+ diagonals[first].length = diagonals[next].y + diagonals[next].length - diagonals[first].y;
+ diagonals[first].end_exp = diagonals[next].end_exp;
+ diagonals[next].x = -1;
+ }
+ }
+ diff = diagonals[first].x - (diagonals[next].x + diagonals[next].length);
+ if (diff > 0)
+ {
+ // diff = diagonals[next].x + diagonals[next].length - diagonals[first].x;
+
+ diagonals[first].front_exp = max(diagonals[first].front_exp, diff);
+ diagonals[next].end_exp = max(diagonals[next].end_exp, diff);
+ // ++num_segments;
+ // if (diagonals[first].x - diagonals[first].front_exp < 0)
+
+ while (diagonals[++first].x == -1);
+ }
+ } else
+ ++ first;
+ ++next;
+
+ }
+
+ int num_segments =0;
+ Segment *seg = vcalloc(num_diagonals, sizeof(Segment));
+ int pos = 0;
+// int diag_num1, diag_num2;
+ int *tmp1;
+ Diagonal *tmp2;
+ for (i = 0; i < num_diagonals; ++i)
+ {
+ if (diagonals[i].x != -1)
+ {
+ ++num_segments;
+ tmp2 = &diagonals[i];
+ seg[pos].segments = vcalloc(6, sizeof(int));
+ seg[pos].current_pos = seg[pos].segments;
+ tmp1 = seg[pos].segments;
+ tmp1[0] = tmp2->x - tmp2->front_exp;
+ tmp1[1] = tmp2->y - tmp2->front_exp;
+ tmp1[2] = tmp2->front_exp + tmp2->length + tmp2->end_exp;
+ tmp1[5] = 1;
+ tmp1[4] = l2;
+ tmp1[3] = tmp1[0] + (l2 - tmp1[1]);
+ ++pos;
+ }
+ }
+
+ for ( i = 0; i < num_segments; ++i )
+ {
+ printf("%i %i %i || %i %i %i\n", seg[i].current_pos[0], seg[i].current_pos[1], seg[i].current_pos[2], seg[i].current_pos[3], seg[i].current_pos[4], seg[i].current_pos[5]);
+ }
+// exit(1);
+ vrealloc(seg,num_segments*sizeof(Segment));
+
+ *num_diagonal = num_segments;
+
+// vfree(diagonals);
+ return seg;
+}
+
+
+int **
+segments2int(Segment *diagonals,
+ int num_diagonals,
+ char *seq1,
+ char *seq2,
+ Fastal_profile *profile1,
+ Fastal_profile *profile2,
+ int *num_points,
+ Fastal_param *param_set)
+{
+// printf("NUM: %i\n", num_diagonals);
+ int l1 = strlen(seq1);
+ int l2 = strlen(seq2);
+ int gep = param_set->gep;
+
+ int dig_length;
+ if (seq1 > seq2)
+ dig_length = l1;
+ else
+ dig_length = l2;
+
+ int current_size = num_diagonals*dig_length + l1 +l2;
+
+ int **list = vcalloc(current_size, sizeof(int*));
+// int *diags = vcalloc(num_diagonals, sizeof(int));
+ int i;
+// for (i = 0; i < num_diagonals; ++i)
+// {
+// diags[i] = l1 - diagonals[i*3] + diagonals[i*3+1];
+// }
+
+// qsort (diags, num_diagonals, sizeof(int), fastal_compare);
+
+
+// int *diagx = vcalloc(num_diagonals, sizeof(int));
+// int *diagy = vcalloc(num_diagonals, sizeof(int));
+
+
+ //+1 because diagonals start here at position 1, like in "real" dynamic programming
+ int a = -1, b = -1;
+ for (i = 0; i < num_diagonals; ++i)
+ {
+ if (diagonals[i].current_pos[1] - diagonals[i].current_pos[0] < 0)
+ {
+// diagx[i] = l1 - diags[i];
+// diagy[i] = 0;
+ a= i;
+ }
+ else
+ break;
+ }
+ ++a;
+ b=a-1;
+ for (; i < num_diagonals; ++i)
+ {
+// diagx[i] = 0;
+// diagy[i] = diags[i]-l1;
+ diagonals[i].prev_position = diagonals[i].current_pos[1] - diagonals[i].current_pos[0];
+ b = i;
+ }
+
+// vfree(diags);
+ int tmpy_pos;
+ int tmpy_value;
+ int **M = param_set->M;
+ int *last_y = vcalloc(l2+1, sizeof(int));
+ int *last_x = vcalloc(l1+1, sizeof(int));
+ last_y[0] = 0;
+
+ last_x[0] = 0;
+ list[0] = vcalloc(6, sizeof(int));
+
+ int list_pos = 1;
+ int dig_num = l1;
+ int tmp_l2 = l2 + 1;
+
+ //left border
+ for (; list_pos < tmp_l2; ++list_pos)
+ {
+ list[list_pos] = vcalloc(6, sizeof(int));
+ list[list_pos][0] = 0;
+ list[list_pos][1] = list_pos;
+ last_y[list_pos] = list_pos;
+ list[list_pos][2] = list_pos*gep;
+ list[list_pos][4] = list_pos-1;
+ }
+
+ int pos_x = 0;
+ int y;
+ int tmp_l1 = l1-1;
+ int tmpx, tmpy;
+ while (pos_x < tmp_l1)
+ {
+ if (list_pos + num_diagonals+2 > current_size)
+ {
+ current_size += num_diagonals*1000;
+ list = vrealloc(list, current_size * sizeof(int*));
+ }
+ //upper border
+ list[list_pos] = vcalloc(6, sizeof(int));
+ list[list_pos][0] = ++pos_x;
+ list[list_pos][1] = 0;
+ list[list_pos][2] = pos_x * gep;
+ list[list_pos][3] = last_y[0];
+// tmpy_value = list_pos;
+// tmpy_pos = 0;
+ last_x[pos_x] = list_pos;
+ ++list_pos;
+
+ //diagonals
+ for (i = a; i <= b; ++i)
+ {
+
+ if (pos_x == diagonals[i].current_pos[0])
+ {
+ if (diagonals[i].current_pos[2] == 0)
+ {
+ diagonals[i].current_pos+=3;
+ }
+ list[list_pos] = vcalloc(6, sizeof(int));
+ tmpx = diagonals[i].current_pos[0];
+ tmpy = diagonals[i].current_pos[1];
+ list[list_pos][0] = tmpx;
+ list[list_pos][1] = tmpy;
+ list[list_pos][3] = last_y[tmpy];
+ list[list_pos][4] = list_pos-1;
+ list[list_pos][5] = diagonals[i].prev_position;//last_y[tmpy-1];
+ diagonals[i].prev_position = list_pos;
+// printf("A: %i %i\n",tmpx-1, tmpy-1);
+ list[list_pos][2] = M[toupper(seq1[tmpx-1])-'A'][toupper(seq2[tmpy-1])-'A'];
+ last_y[tmpy] = list_pos;
+// tmpy_value = list_pos;
+// tmpy_pos = tmpy;
+ --diagonals[i].current_pos[2];
+ ++diagonals[i].current_pos[0];
+ ++diagonals[i].current_pos[1];
+ ++list_pos;
+ }
+ }
+// last_y[tmpy_pos] = tmpy_value;
+
+
+ //lower border
+ if (list[list_pos-1][1] != l2)
+ {
+ list[list_pos] = vcalloc(6, sizeof(int));
+ list[list_pos][0] = pos_x;
+ list[list_pos][1] = l2;
+ list[list_pos][3] = last_y[l2];
+
+ list[list_pos][2] = -1000;
+ list[list_pos][4] = list_pos-1;
+ if (pos_x > l2)
+ list[list_pos][5] = last_x[pos_x-l2];
+ else
+ list[list_pos][5] = l2-pos_x;
+ last_y[l2] = list_pos;
+ ++list_pos;
+
+ }
+
+
+ if ((b >= 0) && (diagonals[b].current_pos[1] > l2))
+ --b;
+
+ if ((a >0) && (diagonals[a-1].current_pos[0] - diagonals[a-1].current_pos[1] == pos_x))
+ {
+ --a;
+ diagonals[a].prev_position = last_x[pos_x-1];
+ }
+ }
+
+
+ dig_num = -1;
+ if (list_pos + l2+2 > current_size)
+ {
+ current_size += list_pos + l2 + 2;
+ list = vrealloc(list, current_size * sizeof(int*));
+ }
+
+
+// right border
+ list[list_pos] = vcalloc(6, sizeof(int));
+ list[list_pos][0] = l1;
+ list[list_pos][1] = 0;
+ list[list_pos][3] = last_x[l1-1];
+ list[list_pos][2] = -1000;
+ ++list_pos;
+
+
+
+ for (i = 1; i <= l2; ++i)
+ {
+ list[list_pos] = vcalloc(6, sizeof(int));
+ list[list_pos][0] = l1;
+ list[list_pos][1] = i;
+ list[list_pos][3] = last_y[i];
+ list[list_pos][4] = list_pos-1;
+ y = last_y[i-1];
+ if ((list[y][0] == l1-1) && (list[y][1] == i-1))
+ {
+ list[list_pos][5] = y;
+ list[list_pos][2] = M[toupper(seq1[l1-1])-'A'][toupper(seq2[i-1])-'A'];
+ }
+ else
+ {
+ if (i <= l1)
+ {
+ list[list_pos][5] = last_x[l1-i];
+ }
+ else
+ {
+ list[list_pos][5] = i-l1;
+ }
+ list[list_pos][2] = -1000;
+ }
+ ++list_pos;
+ }
+
+ list[list_pos - l2][2] = -1000;
+
+ *num_points = list_pos;
+// vfree(diagx);
+// vfree(diagy);
+
+
+ return list;
+}
+
+
+
+
+int
+seq_pair2blast_diagonal2(char *seq_file_name1,
+ char *seq_file_name2,
+ Diagonal **diagonals,
+ int *dig_length,
+ int l1,
+ int l2,
+ int is_dna)
+{
+ char *out_file = vtmpnam(NULL);
+ char blast_command[200];
+ char blast_command2[200];
+
+ if (is_dna)
+ {
+ sprintf(blast_command, "bl2seq -p blastn -i %s -j %s -D 1 -g F -o %s -S 1 -F F", seq_file_name1, seq_file_name2, out_file);
+ }
+ else
+ {
+ sprintf(blast_command, "bl2seq -p blastp -i %s -j %s -D 1 -g F -o %s -F F -S 1", seq_file_name1, seq_file_name2, out_file);
+ }
+ system(blast_command);
+
+ Diagonal *diags = diagonals[0];
+ FILE *diag_f = fopen(out_file,"r");
+ char line[300];
+ fgets(line, 300, diag_f);
+ fgets(line, 300, diag_f);
+ fgets(line, 300, diag_f);
+
+
+ char delims[] = "\t";
+ int length, pos_q, pos_d;
+ int current_pos = 0;
+ while (fgets(line, 300, diag_f) != NULL)
+ {
+ strtok(line, delims);
+ strtok(NULL, delims);
+ strtok(NULL, delims);
+ length = atoi(strtok(NULL, delims));
+ strtok(NULL, delims);
+ strtok(NULL, delims);
+ pos_q = atoi(strtok(NULL, delims));
+ strtok(NULL, delims);
+ pos_d = atoi(strtok(NULL, delims));
+
+ if (current_pos >= *dig_length)
+ {
+ (*dig_length) += 40;
+ diags = vrealloc(diags, sizeof(Diagonal)*(*dig_length));
+ }
+ diags[current_pos].x = pos_q;
+ diags[current_pos].y = pos_d;
+ diags[current_pos++].length = length;
+ }
+ fclose(diag_f);
+ diagonals[0] = diags;
+ return current_pos;
+}
+
+/******************************COPYRIGHT NOTICE*******************************/
+/*© Centro de Regulacio Genomica */
+/*and */
+/*Cedric Notredame */
+/*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
+/*All rights reserved.*/
+/*This file is part of T-COFFEE.*/
+/**/
+/* T-COFFEE is free software; you can redistribute it and/or modify*/
+/* it under the terms of the GNU General Public License as published by*/
+/* the Free Software Foundation; either version 2 of the License, or*/
+/* (at your option) any later version.*/
+/**/
+/* T-COFFEE is distributed in the hope that it will be useful,*/
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
+/* GNU General Public License for more details.*/
+/**/
+/* You should have received a copy of the GNU General Public License*/
+/* along with Foobar; if not, write to the Free Software*/
+/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
+/*............................................... |*/
+/* If you need some more information*/
+/* cedric.notredame@europe.com*/
+/*............................................... |*/
+/**/
+/**/
+/* */
+/******************************COPYRIGHT NOTICE*******************************/