Delete unneeded directory
[jabaws.git] / website / archive / binaries / mac / src / tcoffee / t_coffee_source / scoring.c
diff --git a/website/archive/binaries/mac/src/tcoffee/t_coffee_source/scoring.c b/website/archive/binaries/mac/src/tcoffee/t_coffee_source/scoring.c
deleted file mode 100644 (file)
index 04a89b9..0000000
+++ /dev/null
@@ -1,655 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <ctype.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <dirent.h>
-
-
-#include "io_lib_header.h"
-#include "util_lib_header.h"
-#include "fastal_lib_header.h"
-
-
-
-
-/**
- * \brief Function to calculate the sum of pairs score with quasi affine gap costs.
- *
- * \param alignment_file File with the aligned sequences in fasta_format.
- * \param file_index Positions of the sequences in \a alignment_file.
- * \param number_of_sequences The number of sequences in \a alignment_file.
- * \param score_matrix Matrix containing the scores.
- * \param gop The gap opening costs.
- * \param gep The gep extension costs.
- * \return The score of the alignment.
- */
-double
-calculate_sum_of_pairs_score_affine(char *alignment_file_name,
-                                                                       int **score_matrix,
-                                                                       double gop,
-                                                                       double gep)
-{
-
-       const int LINE_LENGTH = 1000;
-       int number_of_sequences = 0;
-       FILE *alignment_file = fopen(alignment_file_name,"r");
-       if (alignment_file == NULL)
-       {
-               printf("FILE COULD NOT BE OPENED!\n");
-               exit(1);
-       }
-       int alignment_length = 0;
-       char line[LINE_LENGTH+1];
-       line[0] = '0';
-
-       while (line[0] != '>')
-       {
-               fgets(line, LINE_LENGTH, alignment_file);
-       }
-       fgets(line, LINE_LENGTH, alignment_file);
-       while (line[0] != '>')
-       {
-               alignment_length += strlen(line);
-               fgets(line, LINE_LENGTH, alignment_file);
-       }
-
-       int alphabet_size = 28;
-       int **counting = vcalloc(alphabet_size, sizeof(int*));
-
-       int i;
-       for (i = 0; i < alphabet_size; ++i)
-       {
-               counting[i] = vcalloc(alignment_length, sizeof(int));
-       }
-
-       alphabet_size -= 2;
-       char c;
-       int was_gap = 0;
-       int pos_line, pos_alignment = -1;
-       fseek (alignment_file, 0, SEEK_SET);
-       pos_line = 0;
-
-       int gap_line = 0;
-       int current_size = 100;
-       int gap_size = 0;
-       int gap_pos = 0;
-
-       int **gaps = vcalloc(100, sizeof(int*));
-
-       while (fgets(line, LINE_LENGTH, alignment_file) != NULL)
-       {
-               if (line[0] != '>')
-               {
-                       pos_line = -1;
-                       while (((c = line[++pos_line]) != '\n') && (c != '\0'))
-                       {
-                               if (isalpha(c))
-                               {
-                                       ++counting[toupper(c)-'A'][++pos_alignment];
-                                       if (was_gap)
-                                       {
-                                               gaps[gap_line][gap_pos++] = pos_alignment-1;
-                                       }
-                                       was_gap = 0;
-                               }
-                               else
-                               {
-                                       ++counting[alphabet_size][++pos_alignment];
-                                       if (!was_gap)
-                                       {
-                                               ++counting[alphabet_size+1][pos_alignment];
-                                               was_gap = 1;
-                                               if (gap_pos >= gap_size-4)
-                                               {
-                                                       gap_size += 50;
-                                                       gaps[gap_line] = vrealloc(gaps[gap_line], gap_size * sizeof(int));
-                                                       gaps[gap_line][gap_pos++] = pos_alignment;
-                                               }
-                                       }
-                               }
-                       }
-               }
-               else
-               {
-                       if (number_of_sequences != 0)
-                       {
-                               if ((gap_pos % 2) != 0)
-                               {
-                                       gaps[gap_line][gap_pos++] = pos_alignment-1;
-                               }
-                               gaps[gap_line][gap_pos++] = alignment_length+2;
-                               gaps[gap_line][gap_pos] = alignment_length+2;
-                               ++gap_line;
-                       }
-                       if (current_size == gap_line)
-                       {
-                               current_size += 50;
-                               gaps = vrealloc(gaps, current_size * sizeof(int*));
-                       }
-                       gap_size = 2;
-                       gaps[gap_line] = vcalloc(gap_size, sizeof(int));
-                       ++number_of_sequences;
-                       pos_alignment = -1;
-                       was_gap = 0;
-
-                       gap_pos = 0;
-               }
-       }
-
-       gaps[gap_line][gap_pos++] = alignment_length+2;
-       gaps[gap_line][gap_pos] = alignment_length+2;
-
-
-       long double score = 0;
-       int j, k, tmp2;
-       int non_gap;
-       for (i = 0; i < alignment_length; ++i)
-       {
-               for (j = 0; j < alphabet_size; ++j)
-               {
-                       if ((tmp2 = counting[j][i]) >1)
-                       {
-                               while (tmp2 > 1)
-                               {
-                                       score += score_matrix[j][j] * (--tmp2);
-                               }
-                       }
-                       for (k = j+1; k < alphabet_size; ++k)
-                       {
-                               score += score_matrix[j][k] * counting[j][i] * counting[k][i];
-                       }
-
-               }
-
-
-               non_gap = number_of_sequences - counting[alphabet_size][i];
-               score += counting[alphabet_size][i] * non_gap  * gep;
-
-       }
-
-       ++gap_line;
-       alignment_length += 2;
-       unsigned long gap_open = 0;
-
-       int chunk = 500;
-
-//     #pragma omp parallel shared(gaps, chunk, alignment_length, gap_open) private(i, j)
-       {
-               int alignment_length2 = alignment_length;
-//             #pragma omp for schedule(dynamic,chunk) reduction(+:gap_open) nowait
-               for (i = 0; i < gap_line; ++i)
-               {
-                       int *gaps1 = gaps[i];
-                       for (j = i+1; j < gap_line; ++j)
-                       {
-                               int *gaps2 = gaps[j];
-                               int k = 0;
-                               int l = 0;
-                               while ((gaps1[k] != alignment_length2) && (gaps2[l] != alignment_length2))
-                               {
-                                       if (gaps1[k+1] < gaps2[l])
-                                       {
-                                               ++gap_open;
-                                               k+=2;
-                                               continue;
-                                       }
-                                       if (gaps2[l+1] < gaps1[k])
-                                       {
-                                               ++gap_open;
-                                               l+=2;
-                                               continue;
-                                       }
-                                       if (gaps1[k] < gaps2[l])
-                                       {
-                                               if (gaps1[k+1] < gaps2[l+1])
-                                               {
-                                                       ++gap_open;
-                                                       k+=2;
-                                                       continue;
-                                               }
-                                               else
-                                               {
-                                                       l+=2;
-                                                       continue;
-                                               }
-                                       }
-                                       if (gaps1[k] > gaps2[l])
-                                       {
-                                               if (gaps1[k+1] > gaps2[l+1])
-                                               {
-                                                       ++gap_open;
-                                                       l+=2;
-                                                       continue;
-                                               }
-                                               else
-                                               {
-                                                       k+=2;
-                                                       continue;
-                                               }
-                                       }
-                                       if (gaps1[k] == gaps2[l])
-                                       {
-                                               if (gaps1[k+1] == gaps2[l+1])
-                                               {
-                                                       k+=2;
-                                                       l+=2;
-                                                       continue;
-                                               }
-                                               else
-                                               {
-                                                       if (gaps1[k+1] <gaps2[l+1])
-                                                       {
-                                                               k+=2;
-                                                       }
-                                                       else
-                                                       {
-                                                               l+=2;
-                                                       }
-                                               }
-                                       }
-
-                               }
-                               while (gaps1[k] != alignment_length2)
-                               {
-                                       ++gap_open;
-                                       k+=2;
-                               }
-                               while (gaps2[l] != alignment_length2)
-                               {
-                                       ++gap_open;
-                                       l+=2;
-                               }
-                       }
-               }
-       }
-
-       score += gop * gap_open;
-
-
-       return score / number_of_sequences;
-}
-
-
-int
-compare (const void * a, const void * b)
-{
-       return (*(int*)a - *(int*)b);
-}
-
-
-
-int
-get_random_value(int a, int b)
-{
-       return rand() % (b - a + 1) + a;
-//     j = 1 + (int)( 10.0 * rand() / ( RAND_MAX + 1.0 ) );
-}
-
-
-
-
-
-void
-compute_ref_alignments(char *seq_file_name, char* ref_directory, int num_alignments, int num_seq_in_aln)
-{
-
-       printf("%s %s %i %i\n", seq_file_name, ref_directory, num_alignments, num_seq_in_aln);
-       //check if target directory exists, if not make it
-       char directory[500];
-       if (ref_directory[0]== '~')
-       {
-               sprintf(directory,"%s%s",getenv("HOME"),ref_directory+1);
-       }
-       else
-       {
-               sprintf(directory,"%s",ref_directory);
-       }
-       struct stat st;
-       if(stat(directory, &st) != 0)
-       {
-               mkdir(directory,0700);
-       }
-
-       printf("made\n");
-
-       // get sequences & produce alignments
-       //make index
-       long *file_positions = NULL;
-       long **tmp_pos = &file_positions;
-       int number_of_sequences = make_index_of_file(seq_file_name, tmp_pos);
-       char *tmp_file_name = vtmpnam(NULL);
-       FILE *seq_file = fopen(seq_file_name, "r");
-       int *already_taken = vcalloc(num_seq_in_aln, sizeof(int));
-       int i, j, k, tmp, already, num_already;
-       for (i = 0; i < num_alignments; ++i)
-       {
-               //choose randomly the sequences
-
-               num_already = 0;
-               while (num_already < num_seq_in_aln)
-               {
-                       already = 0;
-                       tmp = get_random_value(0, number_of_sequences-1);
-                       for (k = 0; k < num_already; ++k)
-                       {
-                               if (already_taken[k] == tmp)
-                               {
-                                       already = 1;
-                                       break;
-                               }
-                       }
-                       if (!already)
-                       {
-                               already_taken[num_already] = tmp;
-                               ++num_already;
-                       }
-               }
-               //write sequences into temporary file
-               const int LINE_LENGTH = 200;
-               char line[LINE_LENGTH];
-
-
-               FILE* tmp_file = fopen(tmp_file_name, "w");
-               for (k = 0; k < num_already; ++k)
-               {
-                       fseek(seq_file, file_positions[already_taken[k]], SEEK_SET);
-                       fgets(line, LINE_LENGTH, seq_file);
-                       fprintf(tmp_file, "%s", line);
-                       while( (fgets(line, LINE_LENGTH, seq_file)) != NULL)
-                       {
-                               if (line[0] == '>')
-                                       break;
-                               fprintf(tmp_file, "%s", line);
-                       }
-               }
-               fclose(tmp_file);
-
-               //calculate alignment
-               char aln_command[1000];
-               sprintf(aln_command,"t_coffee -in %s -outfile %s/%i.fa -output fasta_aln 2>/dev/null >/dev/null", tmp_file_name, directory, i);
-               system(aln_command);
-       }
-
-       vfree(already_taken);
-}
-
-
-
-// void
-// compute agreement_score(char *alignment_file_name, char *alignment_directory)
-// {
-//
-// }
-
-
-
-
-/**
- * This algorithm choses according to a tree a number of sequenes of which it computes a reference algnment.
- * \param seq_file_name The sequence file.
- * \param tree_file_name The given tree.
- * \param ref_aln_name The file where the reference alignment should be written to.
- * \param num_seq_in_ref The number of sequences to choose.
-**/
-void
-make_ref_alignment(char *seq_file_name, char *tree_file_name, char *ref_aln_name, int num_seq_in_ref)
-{
-       const int LINE_LENGTH = 200;
-       char line[LINE_LENGTH];
-
-       FILE *tree_f = fopen(tree_file_name,"r");
-
-
-       long *file_positions = NULL;
-       long **tmp = &file_positions;
-       int num_sequences = make_index_of_file(seq_file_name, tmp);
-
-
-       int every_x = num_sequences/(num_seq_in_ref);
-
-       int *seq_ids = vcalloc(num_seq_in_ref,sizeof(int));
-       char delims[] = " ";
-
-       int pos = -1;
-       fseek (tree_f, 0, SEEK_SET);
-       int node;
-       int dist = every_x;
-       while(fgets(line, LINE_LENGTH, tree_f)!=NULL)
-       {
-               node = atoi(strtok(line,delims));
-               if (node < num_sequences)
-               {
-                       if (dist == every_x)
-                       {
-
-                               seq_ids[++pos] = node;
-                               dist = 0;
-                       }
-                       else
-                       {
-                               ++dist;
-                       }
-               }
-
-               node = atoi(strtok(NULL,delims));
-               if (node < num_sequences)
-               {
-                       if (dist == every_x)
-                       {
-                               seq_ids[++pos] = node;
-                               dist = 0;
-                       }
-                       else
-                       {
-                               ++dist;
-                       }
-               }
-       }
-       fclose(tree_f);
-
-
-       int i = 0;
-       qsort(seq_ids, num_seq_in_ref, sizeof(int), compare);
-
-       char *seq_ref_name = vtmpnam(NULL);
-       FILE *seq_ref_f = fopen(seq_ref_name, "w");;
-       FILE *seq_f = fopen(seq_file_name, "r");
-
-       for (i = 0; i < num_seq_in_ref; ++i)
-       {
-               fseek (seq_f, file_positions[seq_ids[i]], SEEK_SET);
-               fgets(line, LINE_LENGTH, seq_f);
-               fprintf(seq_ref_f, "%s", line);
-               while(fgets(line, LINE_LENGTH, seq_f)!=NULL)
-               {
-                       if (line[0] == '>')
-                               break;
-                       fprintf(seq_ref_f, "%s", line);
-               }
-       }
-       fclose(seq_ref_f);
-       fclose(seq_f);
-
-       char command[500];
-       sprintf(command, "mafft --quiet %s > %s", seq_ref_name, ref_aln_name);
-       system(command);
-}
-
-
-/**
- * This function calculates the agreement between two alignments in a specified number of sequences.
- * \param ref_file_name The reference alignment.
- * \param aln_file_name The test alignment.
- **/
-double
-agreement_score(char *ref_file_name, char *aln_file_name)
-{
-       const int LINE_LENGTH = 200;
-       char line[LINE_LENGTH];
-
-       int number_of_sequences = 0;
-       FILE *ref_f = fopen(ref_file_name,"r");
-       while(fgets(line, LINE_LENGTH, ref_f)!=NULL)
-       {
-               if (line[0] == '>')
-               {
-                       ++number_of_sequences;
-               }
-       }
-       fseek(ref_f, 0, SEEK_SET);
-       char **seq_names = vcalloc(number_of_sequences, sizeof(char*));
-       int i = 0;
-       while(fgets(line, LINE_LENGTH, ref_f)!=NULL)
-       {
-               if (line[0] == '>')
-               {
-                       seq_names[i] = vcalloc(LINE_LENGTH, sizeof(char));
-                       sprintf(seq_names[i], "%s",line);
-                       ++i;
-               }
-       }
-
-
-       fclose(ref_f);
-       FILE *aln_f = fopen(aln_file_name,"r");
-       char *tmp_name = vtmpnam(NULL);
-       FILE *tmp_f = fopen(tmp_name,"w");
-       int x = 0;
-       long last_pos = -1;
-       while(fgets(line, LINE_LENGTH, aln_f)!=NULL)
-       {
-               if (line[0] == '>')
-               {
-                       for (i = 0; i < number_of_sequences; ++i)
-                       {
-                               if (!strcmp(line, seq_names[i]))
-                               {
-                                       fprintf(tmp_f, "%s", line);
-                                       last_pos = ftell(aln_f);
-                                       while(fgets(line, LINE_LENGTH, aln_f)!=NULL)
-                                       {
-                                               if (line[0] == '>')
-                                                       break;
-                                               fprintf(tmp_f, "%s", line);
-
-                                               last_pos = ftell(aln_f);
-                                       }
-                                       ++x;
-                                       fseek(aln_f, last_pos, SEEK_SET);
-                                       break;
-                               }
-                       }
-               }
-               if (x == number_of_sequences)
-                       break;
-       }
-       fclose(aln_f);
-       fclose(tmp_f);
-       char command[500];
-//     sprintf(command, "cp %s /users/cn/ckemena/Desktop/1.fa", tmp_name);
-//     system(command);
-//     fp = popen("ls -l", "r");
-       sprintf(command, "t_coffee -other_pg aln_compare -al1 %s -al2 %s ", ref_file_name, tmp_name);
-
-
-       FILE *result;
-       result = popen(command, "r");
-       fgets(line, LINE_LENGTH, result);
-       fgets(line, LINE_LENGTH, result);
-       fgets(line, LINE_LENGTH, result);
-
-//     sprintf(command, "cp %s ~/Destkop/1.fa", tmp_name);
-//     system(command);
-
-       char delims[] = " ";
-       char *tmp_str;
-       strtok(line, delims);
-       for (i = 0; i < 3; ++i)
-       {
-               while((tmp_str = strtok(NULL, delims))==NULL);
-       }
-       return(atof(tmp_str));
-}
-
-
-complete_agreement_score(char *aln_file_name, const char *ref_directory)
-{
-       struct dirent *dp;
-       char *name = strrchr(aln_file_name,'/')+1;
-//     printf("%s ",name);
-     // enter existing path to directory below
-       char directory[500];
-       if (ref_directory[0]== '~')
-       {
-               sprintf(directory,"%s%s",getenv("HOME"),ref_directory+1);
-       }
-       else
-       {
-               sprintf(directory,"%s",ref_directory);
-       }
-       DIR *dir = opendir(directory);
-
-       char ref_file_name[200];
-       while ((dp=readdir(dir)) != NULL)
-       {
-               if ((strcmp(dp->d_name,".")) && (strcmp(dp->d_name,"..")))
-               {
-
-                       sprintf(ref_file_name, "%s/%s",directory, dp->d_name);
-                       printf("%s %f\n",name, agreement_score(ref_file_name, aln_file_name));
-//                     printf("%f ", agreement_score(ref_file_name, aln_file_name));
-
-               }
-       }
-       printf("\n");
-       closedir(dir);
-       return 0;
-
-}
-
-
-//     char delims[] = " ";
-//     int node[3];
-//     int alignment_length = -1;
-//     node[2] = -1;
-
-
-       //bottom-up traversal
-//     while(fgets(line, LINE_LENGTH, tree_file)!=NULL)
-// {
-//             //read profiles
-//     node[0] = atoi(strtok(line,delims));
-//     node[1] = atoi(strtok(NULL,delims));
-//     node[2] = atoi(strtok(NULL,delims));
-/******************************COPYRIGHT NOTICE*******************************/
-/*© Centro de Regulacio Genomica */
-/*and */
-/*Cedric Notredame */
-/*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
-/*All rights reserved.*/
-/*This file is part of T-COFFEE.*/
-/**/
-/*    T-COFFEE is free software; you can redistribute it and/or modify*/
-/*    it under the terms of the GNU General Public License as published by*/
-/*    the Free Software Foundation; either version 2 of the License, or*/
-/*    (at your option) any later version.*/
-/**/
-/*    T-COFFEE is distributed in the hope that it will be useful,*/
-/*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
-/*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
-/*    GNU General Public License for more details.*/
-/**/
-/*    You should have received a copy of the GNU General Public License*/
-/*    along with Foobar; if not, write to the Free Software*/
-/*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
-/*...............................................                                                                                      |*/
-/*  If you need some more information*/
-/*  cedric.notredame@europe.com*/
-/*...............................................                                                                                                                                     |*/
-/**/
-/**/
-/*     */
-/******************************COPYRIGHT NOTICE*******************************/