From: pvtroshin Date: Tue, 21 Jun 2011 14:14:44 +0000 (+0000) Subject: removing tcoffee to update X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=8140043fefd2101326b3e651e3b7e2d4ac806b99;p=jabaws.git removing tcoffee to update git-svn-id: link to svn.lifesci.dundee.ac.uk/svn/barton/ptroshin/JABA2@4297 e3abac25-378b-4346-85de-24260fe3988d --- diff --git a/binaries/src/tcoffee/README b/binaries/src/tcoffee/README deleted file mode 100644 index e042d9a..0000000 --- a/binaries/src/tcoffee/README +++ /dev/null @@ -1,50 +0,0 @@ -README for T-Coffee installation version 6.70 and higer - -UNIX/LINUX/Mac OSX - -T-Coffee requires gcc, g77, makefile, CPAN, an internet connection and your root password - - - 1. Download and save the Package - 2. gunzip t_coffee.tar.gz - 3. tar -xvf t_coffee.tar - 4. cd T-COFFEE_distribution_Version_X.XX - 5. ./install - -This installation procedure is semi-interactive. It will prompt questions here and there. You can interrupt it any time and resume it later. - - -The install procedure carries out three distinct tasks: Compilation of T-Coffee (C program), Compilation and Installation of SOAP::Lite (Perl Module), Download/Compilation and Installation of all the T-Coffee companion packages required for all possible T-Coffee flavors (tcoffee,expresso,3dcoffee,mcoffee,rcoffee). Except for T-Coffee, the installer will only install the packages that are NOT already on your computer. If you want a lighter or more specific installation, you can try any of the following: - - - 1. ./install tcoffee - 2. ./install rcoffee - 3. ./install expresso - 4. ./install 3dcoffee - 5. ./install 3dcoffee - - -While Installing SOAP::Lite, CPAN will ask you many questions: say Yes to all or type return to keep the default values. If everything went well, the procedure has created in the bin directory two executables: t_coffee and TMalign (Make sure these executables are on your $PATH!). - - -If you have not managed to install SOAP::Lite, you can re-install it anytime (from anywhere) using steps 1-2 - -If you cannot log as root, or if for some reason this procedure does not work, see with your system manager and/or go directly to the CPAN repository of SOAP::Lite. You will still be able to use the most basic functions of T-Coffee. - - - -IMPORTANT: The purpose of SOAP::Lite is to allow T-Coffee the use of the EBI webservices such as webblast. BLAST brings many functionalities to T-Coffee and if you cannot install SOAP we suggest you go to the Installing BLAST for T-Coffee section of the Technical Doumentation (in the Installation section). There you will find alternative ways of using BLAST without SOAP. It is also in this document that you will find all the information required for a full installation of T-Coffee - -Microsoft Windows/Cygwin - -Install Cygwin - - * Download The Installer (NOT Cygwin/X) - * Click on view to list ALL the packages - * Select: gcc-core, make, wget - * Optional: ssh, xemacs, nano - * Run mkpasswd in Cywin (as requested when you start cygwin) - -Install T-Coffee within Cygwin using the Unix procedure (see above) - - diff --git a/binaries/src/tcoffee/install b/binaries/src/tcoffee/install deleted file mode 100644 index 8c7bb60..0000000 --- a/binaries/src/tcoffee/install +++ /dev/null @@ -1,1568 +0,0 @@ -#!/usr/bin/env perl -#Version 2.01 (25/02/03) -use Cwd; -use File::Path; -use FileHandle; -use strict; - - -our (%MODE, %PG, %ENV_SET, %SUPPORTED_OS); - - -our $EXIT_SUCCESS=0; -our $EXIT_FAILURE=1; -our $INTERNET=0; - -#our $CP="cp -d "; -our $CP="cp "; #was causing a crash on MacOSX -our $SILENT=">/dev/null 2>/dev/null"; -our $WEB_BASE="http://www.tcoffee.org"; -our $TCLINKDB_ADDRESS="$WEB_BASE/Resources/tclinkdb.txt"; -our $OS=get_os(); -our $ROOT=&get_root(); -our $CD=cwd(); -our $CDIR=$CD; -our $HOME=$ENV{'HOME'}; -########### DEFINITIONS ############################## -# -# -our $CXX="g++"; -our $CXXFLAGS=""; - -our $CPP="g++"; -our $CPPFLAGS=""; - -our $CC="gcc"; -our $CFLAGS=""; - -our $FC="f77"; -our $FFLAGS=""; - -my $install="all"; -my $default_update_action="no_update"; -######################################################## -my @required_applications=("wget_OR_curl"); -########### Mode Definitions ############################## -# -# -my @smode=("all", "clean", "install"); - -######################################################## -&initialize_PG(); - -#Parse The Command Line -my $cl=join( " ", @ARGV); -if ($#ARGV==-1 || ($cl=~/-h/) ||($cl=~/-H/) ) - { - print "\n!!!!!!! ./install t_coffee --> installs t_coffee only"; - print "\n!!!!!!! ./install all --> installs all the modes [mcoffee, expresso, psicoffee,rcoffee..]"; - print "\n!!!!!!! ./install [mcoffee|rcoffee|..] --> installs the specified mode"; - print "\n!!!!!!! ./install -h --> print usage\n\n"; - if ( $#ARGV==-1){exit ($EXIT_FAILURE);} - } - -if (($cl=~/-h/) ||($cl=~/-H/) ) - { - my $m; - print "\n\n!!!!!!! advanced mode\n"; - foreach $m ((keys (%MODE)),@smode) - { - print "!!!!!!! ./install $m\n"; - } - - print "!!!!!!! ./install [target:package|mode|] [-update|-force|-exec=dir|-dis=dir|-root|-tclinkdb=file|-] [CC=|FCC=|CXX=|CFLAGS=|CXXFLAGS=]\n"; - print "!!!!!!! ./install clean [removes all executables]\n"; - print "!!!!!!! ./install [optional:target] -update [updates package already installed]\n"; - print "!!!!!!! ./install [optional:target] -force [Forces recompilation over everything]\n"; - - print "!!!!!!! ./install [optional:target] -root [You are running as root]\n"; - print "!!!!!!! ./install [optional:target] -exec=/foo/bar/ [address for the T-Coffee executable]\n"; - print "!!!!!!! ./install [optional:target] -dis=/foo/bar/ [Address where distributions should be stored]\n"; - print "!!!!!!! ./install [optional:target] -tclinkdb=foo|update [file containing all the packages to be installed]\n"; - print "!!!!!!! ./install [optional:target] -tclinkdb=foo|update [file containing all the packages to be installed]\n"; - print "!!!!!!! ./install [optional:target] -clean [clean everything]\n"; - print "!!!!!!! ./install [optional:target] -plugins [plugins directory]\n"; - print "!!!!!!! mode:"; - foreach $m (keys(%MODE)){print "$m ";} - print "\n"; - print "!!!!!!! Packages:"; - foreach $m (keys (%PG)){print "$m ";} - print "\n"; - - print "\n\n"; - exit ($EXIT_FAILURE); - } - - - -# parse compiler flags -my (@argl)=($cl=~/(\S+=[^=]+)\s\w+=/g); -push (@argl, ($cl=~/(\S+=[^=]+\S)\s*$/g)); - -foreach $a (@argl) - { - if ( ($cl=~/CXX=(.*)/)){$CXX=$1;} - if ( ($cl=~/-CC=(.*)/ )){$CC=$1;} - if ( ($cl=~/-FC=(.*)/ )){$FC=$1;} - if ( ($cl=~/-CFLAGS=(.*)/)){$CFLAGS=$1;} - if ( ($cl=~/-CXXFLAGS=(.*)/)){$CXXFLAGS=$1;} - } -#parse install flags -our ($ROOT_INSTALL, $NO_QUESTION, $default_update_action,$BINARIES_ONLY,$force, $default_update_action, $INSTALL_DIR, $PLUGINS_DIR, $DISTRIBUTIONS,$tclinkdb, $proxy, $clean); -if ( ($cl=~/-root/)){$ROOT_INSTALL=1;} -if ( ($cl=~/-no_question/)){$NO_QUESTION=1;} -if ( ($cl=~/-update/)){$default_update_action="update";} -if ( ($cl=~/-binaries/)){$BINARIES_ONLY=1;} -if ( ($cl=~/-force/)){$force=1;$default_update_action="update"} -if ( ($cl=~/-exec=\s*(\S+)/)){$INSTALL_DIR=$1;} -if ( ($cl=~/-plugins=\s*(\S+)/)){$PLUGINS_DIR=$1;} -if ( ($cl=~/-dis=\s*(\S+)/)){$DISTRIBUTIONS=$1;} - -if ( ($cl=~/-tclinkdb=\s*(\S+)/)){$tclinkdb=$1;} -if ( ($cl=~/-proxy=\s*(\S+)/)){$proxy=$1;} -if ( ($cl=~/-clean/)){$clean=1;} -#automated update -if ($tclinkdb){&update_tclinkdb ($tclinkdb);} - -#Prepare the T-Coffee directory structure -our $TCDIR=$ENV{DIR_4_TCOFFEE}; -our $TCCACHE=$ENV{CACHE_4_TCOFFEE}; -our $TCTMP=$ENV{CACHE_4_TCOFFEE}; -our $TCM=$ENV{MCOFFEE_4_TCOFFEE}; -our $TCMETHODS=$ENV{METHODS_4_TCOFFEE}; -our $TCPLUGINS=$ENV{PLUGINS_4_TCOFFEE}; -our $PLUGINS_DIR=""; -our $INSTALL_DIR=""; - -&add_dir ($TCDIR="$HOME/.t_coffee"); -&add_dir ($TCCACHE="$TCDIR/cache"); -&add_dir ($TCTMP="$CDIR/tmp"); -&add_dir ($TCM="$TCDIR/mcoffee"); -&add_dir ($TCMETHODS="$TCDIR/methods"); -&add_dir ($TCPLUGINS="$TCDIR/plugins/$OS"); - -#Prepare the Installation Structure - -our $BASE="$CD/bin"; -our $BIN="$BASE/binaries/$OS"; -our $DOWNLOAD_DIR="$BASE/download"; -our $DOWNLOAD_FILE="$DOWNLOAD_DIR/files"; -our $TMP="$BASE/tmp"; - -&add_dir($BASE); -&add_dir($BIN); -&add_dir($DOWNLOAD_DIR); -&add_dir($DOWNLOAD_FILE); -if (!$DISTRIBUTIONS){$DISTRIBUTIONS="$DOWNLOAD_DIR/distributions";} -&add_dir ($DISTRIBUTIONS); -&add_dir ($TMP); - - -#set the directory for the plugins -if (!$PLUGINS_DIR && !$ROOT_INSTALL){$PLUGINS_DIR=$TCPLUGINS;} -elsif (!$PLUGINS_DIR && $ROOT_INSTALL){$PLUGINS_DIR="/usr/local/bin/";} - -#set the directory for t_coffee -if (!$INSTALL_DIR && !$ROOT_INSTALL){$INSTALL_DIR="$HOME/bin/";mkpath ($INSTALL_DIR);} -elsif (!$INSTALL_DIR && $ROOT_INSTALL){$INSTALL_DIR="/usr/local/bin/";} - -#prepare mcoffee files [Only if vanilla installation] -if (-d "mcoffee"){`cp mcoffee/* $TCM`;} - - -#prepare the environement -our $ENV_FILE="$TCDIR/t_coffee_env"; -&env_file2putenv ($ENV_FILE); -&set_proxy($proxy); -my ($target, $p, $r); -$target=$p; - -foreach $p ( ((keys (%PG)),(keys(%MODE)),(@smode)) ) - { - if ($ARGV[0] eq $p && $target eq ""){$target=$p;} - } -if ($target eq ""){exit ($EXIT_FAILURE);} - - -# Check the basic requirements are met -foreach $r (@required_applications) - { - my @app_list; - my $i; - $i=0; - - @app_list=split (/_OR_/, $r); - foreach my $pg (@app_list) - { - $i+=&pg_is_installed ($pg); - } - if ($i==0) - { - print "One of the following packages must be installed to proceed: "; - foreach my $pg (@app_list) - { - print ("$pg "); - } - die; - } - } - - -# Set the mains paths and create directories -# distrib_dir/install/bin -# distrib_dir/install/Downloads -# distrib_dir/install/Downloads/Files -# distrib_dir/install/Downloads/Distributions -# distrib_dir/tmp - -#Directory structure of the installation WITHIN the distribution dir - - - -#sign the license -&sign_license_ni(); - - -#Configure the copilers and their optins -$PG{C}{compiler}=get_C_compiler($CC); -$PG{Fortran}{compiler}=get_F_compiler($FC); -$PG{CXX}{compiler}=$PG{CPP}{compiler}=$PG{GPP}{compiler}=get_CXX_compiler($CXX); -if ($CXXFLAGS){$PG{CPP}{options}=$PG{GPP}{options}=$PG{CXX}{options}=$CXXFLAGS;} -if ($CFLAGS){$PG{C}{options}=$CFLAGS;} -foreach my $c (keys(%PG)) - { - my $arguments; - if ($PG{$c}{compiler}) - { - $arguments="$PG{$c}{compiler_flag}=$PG{$c}{compiler} "; - if ($PG{$c}{options}) - { - $arguments.="$PG{$c}{options_flag}=$PG{$c}{options} "; - } - $PG{$c}{arguments}=$arguments; - } - } - -# select the list of packages to update -if ($PG{$target}){$PG{$target}{install}=1;} -else - { - foreach my $pg (keys(%PG)) - { - if ( $target eq "all" || ($PG{$pg}{mode}=~/$target/)) - { - $PG{$pg} {install}=1; - } - } - } - -foreach my $pg (keys(%PG)) - { - if (!$PG{$pg}{update_action}){$PG{$pg}{update_action}=$default_update_action;} - elsif ($PG{$pg}{update_action} eq "never"){$PG{$pg}{install}=0;} - if ( $force && $PG{$pg}{install}) - { - `rm $BIN/$pg $BIN/$pg.exe $SILENT`; - } - if ($PG{$pg}{update_action} eq "update" && $PG{$pg}{install}){$PG{$pg}{update}=1;} - } - -#Execute the target: install/remove all the selected components -if (($target=~/clean/)) - { - print "------- cleaning executables -----\n"; - `rm bin/* $SILENT`; - exit ($EXIT_SUCCESS); - } - -if ( !$PG{$target}){print "------- Installing T-Coffee Modes\n";} - -#1 - Installing various modes -foreach my $m (keys(%MODE)) - { - if ( $target eq "all" || $target eq $m) - { - print "\n------- The installer will now install the $m components $MODE{$m}{description}\n"; - foreach my $pg (keys(%PG)) - { - if ( $PG{$pg}{mode} =~/$m/ && $PG{$pg}{install}) - { - if ($PG{$pg}{touched}){print "------- $PG{$pg}{dname}: already processed\n";} - else {$PG{$pg}{success}=&install_pg($pg);$PG{$pg}{touched}=1;} - } - } - } - } - -#2 - Installing Various Packages -if ( $PG{$target}){print "------- Installing Individual Package\n";} -foreach my $pg (keys (%PG)) - { - - if ( $PG{$pg}{install} && !$PG{$pg}{touched}) - { - print "\n------- Install $pg\n"; - $PG{$pg}{success}=&install_pg($pg);$PG{$pg}{touched}=1; - } - } -print "------- Finishing The installation\n"; -my $final_report=&install ($INSTALL_DIR); - -print "\n"; -print "*********************************************************************\n"; -print "******** INSTALLATION SUMMARY *****************\n"; -print "*********************************************************************\n"; -print "------- SUMMARY package Installation:\n"; -foreach my $pg (keys(%PG)) - { - if ( $PG{$pg}{install}) - { - my $bin_status=($PG{$pg}{from_binary} && $PG{$pg}{success})?"[from binary]":""; - if ( $PG{$pg}{new} && !$PG{$pg}{old}) {print "*------ $PG{$pg}{dname}: installed $bin_status\n"; $PG{$pg}{status}=1;} - elsif ( $PG{$pg}{new} && $PG{$pg}{old}) {print "*------ $PG{$pg}{dname}: updated $bin_status\n" ; $PG{$pg}{status}=1;} - elsif (!$PG{$pg}{new} && $PG{$pg}{old} && !$PG{$pg}{update}){print "*------ $PG{$pg}{dname}: previous\n" ; $PG{$pg}{status}=1;} - elsif (!$PG{$pg}{new} && $PG{$pg}{old} && $PG{$pg}{update}){print "*------ $PG{$pg}{dname}: failed update (previous installation available)\n";$PG{$pg}{status}=0;} - else {print "*------ $PG{$pg}{dname}: failed installation";$PG{$pg}{status}=0;} - } - } - -if ( !$PG{$target}){print "*------ SUMMARY mode Installation:\n";} -foreach my $m (keys(%MODE)) - { - if ( $target eq "all" || $target eq $m) - { - my $succesful=1; - foreach my $pg (keys(%PG)) - { - if (($PG{$pg}{mode}=~/$m/) && $PG{$pg}{install} && $PG{$pg}{status}==0) - { - $succesful=0; - print "*!!!!!! $PG{$pg}{dname}: Missing\n"; - } - } - if ( $succesful) - { - $MODE{$m}{status}=1; - print "*------ MODE $MODE{$m}{dname} SUCCESFULY installed\n"; - } - else - { - $MODE{$m}{status}=0; - print "*!!!!!! MODE $MODE{$m}{dname} UNSUCCESFULY installed\n"; - } - } - } - -if ($clean==1 && ($BASE=~/install4tcoffee/) ){print "*------ Clean Installation Directory: $BASE\n";`rm -rf $BASE`;} -#failure if one program was not well installed -foreach my $pg (keys(%PG)){if ($PG{$pg}{install} && $PG{$pg}{status}==0){exit ($EXIT_FAILURE);}} -exit ($EXIT_SUCCESS); - -################################################################################# -# # -# # -# # -# GENERIC INSTALLATION # -# # -# # -# # -################################################################################# -sub get_CXX_compiler - { - my $c=@_[0]; - my (@clist)=("g++"); - - return get_compil ($c, @clist); - } -sub get_C_compiler - { - my $c=@_[0]; - my (@clist)=("gcc", "cc", "icc"); - - return get_compil ($c, @clist); - } - -sub get_F_compiler - { - my ($c)=@_[0]; - my @clist=("f77", "g77", "gfortran", "ifort"); - return get_compil ($c, @clist); - } - -sub get_compil - { - my ($fav,@clist)=(@_); - - #return the first compiler found installed in the system. Check first the favorite - foreach my $c ($fav,@clist) - { - if (&pg_is_installed ($c)){return $c;} - } - return ""; - } -sub exit_if_pg_not_installed - { - my (@arg)=(@_); - - foreach my $p (@arg) - { - if ( !&pg_is_installed ($p)) - { - print "!!!!!!!! The $p utility must be installed for this installation to proceed [FATAL]\n"; - die; - } - } - return 1; - } -sub set_proxy - { - my ($proxy)=(@_); - my (@list,$p); - - @list= ("HTTP_proxy", "http_proxy", "HTTP_PROXY", "ALL_proxy", "all_proxy","HTTP_proxy_4_TCOFFEE","http_proxy_4_TCOFFEE"); - - if (!$proxy) - { - foreach my $p (@list) - { - if ( ($ENV_SET{$p}) || $ENV{$p}){$proxy=$ENV{$p};} - } - } - foreach my $p(@list){$ENV{$p}=$proxy;} - } - -sub check_internet_connection - { - my $internet; - - if ( -e "x"){unlink ("x");} - if (&pg_is_installed ("wget")){`wget www.google.com -Ox >/dev/null 2>/dev/null`;} - elsif (&pg_is_installed ("curl")){`curl www.google.com -ox >/dev/null 2>/dev/null`;} - else - { - printf stderr "\nERROR: No pg for remote file fetching [wget or curl][FATAL]\n"; - exit ($EXIT_FAILURE); - } - - if ( !-e "x" || -s "x" < 10){$internet=0;} - else {$internet=1;} - if (-e "x"){unlink "x";} - return $internet; - } -sub url2file - { - my ($cmd, $file,$wget_arg, $curl_arg)=(@_); - my ($exit,$flag, $pg, $arg); - - if ($INTERNET || check_internet_connection ()){$INTERNET=1;} - else - { - print STDERR "ERROR: No Internet Connection [FATAL:install.pl]\n"; - exit ($EXIT_FAILURE); - } - - if (&pg_is_installed ("wget")){$pg="wget"; $flag="-O";$arg=$wget_arg;} - elsif (&pg_is_installed ("curl")){$pg="curl"; $flag="-o";$arg=$curl_arg;} - else - { - printf stderr "\nERROR: No pg for remote file fetching [wget or curl][FATAL]\n"; - exit ($EXIT_FAILURE); - } - - - if (-e $file){unlink($file);} - $exit=system "$pg $cmd $flag$file $arg"; - return $exit; - } - -sub pg_is_installed - { - my ($p, $dir)=(@_); - my ($r,$m); - my ($supported, $language, $compil); - - if ( $PG{$p}) - { - $language=$PG{$p}{language2}; - $compil=$PG{$language}{compiler}; - } - - if ( $compil eq "CPAN") - { - if ( system ("perl -M$p -e 1")==$EXIT_SUCCESS){return 1;} - else {return 0;} - } - elsif ($dir) - { - if (-e "$dir/$p" || -e "$dir/$p\.exe"){return 1;} - else {return 0;} - } - elsif (-e "$PLUGINS_DIR/$p" || -e "$PLUGINS_DIR/$p.exe"){return 1;} - else - { - $r=`which $p 2>/dev/null`; - if ($r eq ""){return 0;} - else {return 1;} - } - return 0; - } -sub install - { - my ($new_bin)=(@_); - my ($copied, $report); - - - if (!$ROOT_INSTALL) - { - - if (-e "$BIN/t_coffee"){`$CP $BIN/t_coffee $INSTALL_DIR`}; - `cp $BIN/* $PLUGINS_DIR`; - $copied=1; - } - else - { - $copied=&root_run ("You must be root to finalize the installation", "$CP $BIN/* $INSTALL_DIR $SILENT"); - } - - - if ( !$copied) - { - $report="*!!!!!! Installation unsuccesful. The executables have been left in $BASE/bin\n"; - } - elsif ( $copied && $ROOT) - { - $report="*------ Installation succesful. Your executables have been copied in $new_bin and are on your PATH\n"; - } - elsif ( $copied && !$ROOT) - { - $report= "*!!!!!! T-Coffee and associated packages have been copied in: $new_bin\n"; - $report.="*!!!!!! This address is NOT in your PATH sytem variable\n"; - $report.="*!!!!!! You can do so by adding the following line in your ~/.bashrc file:\n"; - $report.="*!!!!!! export PATH=$new_bin:\$PATH\n"; - } - return $report; -} - -sub sign_license_ni - { - my $F=new FileHandle; - open ($F, "license.txt"); - while (<$F>) - { - print "$_"; - } - close ($F); - - return; - } -################################################################################# -# # -# # -# # -# INDIVIDUAL MULTIPLE SEQUENCE ALIGNMNT PACKAGES INSTALLATION # -# # -# # -# # -################################################################################# - -sub install_pg - { - my ($pg)=(@_); - my ($report, $previous, $language, $compiler, $return); - - if (!$PG{$pg}{install}){return 1;} - - $previous=&pg_is_installed ($pg); - - if ($PG{$pg}{update_action} eq "no_update" && $previous) - { - $PG{$pg}{old}=1; - $PG{$pg}{new}=0; - $return=1; - } - else - { - $PG{$pg}{old}=$previous; - - if ($PG{$pg} {language2} eq "Perl"){&install_perl_package ($pg);} - elsif ($BINARIES_ONLY && &install_binary_package ($pg)){$PG{$pg}{from_binary}=1;} - elsif (&install_source_package ($pg)){;} - else - { - - if (!&supported_os($OS)) - { - print "!!!!!!!! $pg compilation failed, binary unsupported for $OS\n"; - } - elsif (!($PG{$pg}{from_binary}=&install_binary_package ($pg))) - { - print "!!!!!!!! $pg compilation and binary installation failed\n"; - } - } - $PG{$pg}{new}=$return=&pg_is_installed ($pg,$BIN); - } - - - return $return; - } -sub install_perl_package - { - my ($pg)=(@_); - my ($report, $language, $compiler); - - $language=$PG{$pg} {language2}; - $compiler=$PG{$language}{compiler}; - - if (!&pg_is_installed ($pg)) - { - if ( $OS eq "windows"){`perl -M$compiler -e 'install $pg'`;} - elsif ( $ROOT eq "sudo"){system ("sudo perl -M$compiler -e 'install $pg'");} - else {system ("su root -c perl -M$compiler -e 'install $pg'");} - } - return &pg_is_installed ($pg); - } - - - -sub install_source_package - { - my ($pg)=(@_); - my ($report, $download, $arguments, $language, $address, $name, $ext, $main_dir, $distrib); - my $wget_tmp="$TMP/wget.tmp"; - my (@fl); - if ( -e "$BIN/$pg" || -e "$BIN/$pg.exe"){return 1;} - - if ($pg eq "t_coffee") {return &install_t_coffee ($pg);} - elsif ($pg eq "TMalign"){return &install_TMalign ($pg);} - - chdir $DISTRIBUTIONS; - - $download=$PG{$pg}{source}; - - if (($download =~/tgz/)) - { - ($address,$name,$ext)=($download=~/(.+\/)([^\/]+)(\.tgz)/); - } - elsif (($download=~/tar\.gz/)) - { - ($address,$name,$ext)=($download=~/(.+\/)([^\/]+)(\.tar\.gz)/); - } - elsif (($download=~/tar/)) - { - ($address,$name,$ext)=($download=~/(.+\/)([^\/]+)(\.tar)/); - } - else - { - ($address,$name)=($download=~/(.+\/)([^\/]+)/); - $ext=""; - } - $distrib="$name$ext"; - - if ( !-d $pg){mkdir $pg;} - chdir $pg; - - #get the distribution if available - if ( -e "$DOWNLOAD_DIR/$distrib") - { - `$CP $DOWNLOAD_DIR/$distrib .`; - } - #UNTAR and Prepare everything - if (!-e "$name.tar" && !-e "$name") - { - &check_rm ($wget_tmp); - print "\n------- Downloading/Installing $pg\n"; - if (!-e $distrib && &url2file ("$download", "$wget_tmp")==$EXIT_SUCCESS) - { - - `mv $wget_tmp $distrib`; - `$CP $distrib $DOWNLOAD_DIR/`; - } - - if (!-e $distrib) - { - print "!!!!!!! Download of $pg distribution failed\n"; - print "!!!!!!! Check Address: $PG{$pg}{source}\n"; - return 0; - } - print "\n------- unzipping/untaring $name\n"; - if (($ext =~/z/)) - { - &flush_command ("gunzip $name$ext"); - - } - if (($ext =~/tar/) || ($ext =~/tgz/)) - { - &flush_command("tar -xvf $name.tar"); - } - } - #Guess and enter the distribution directory - @fl=ls($p); - foreach my $f (@fl) - { - if (-d $f) - { - $main_dir=$f; - } - } - if (-d $main_dir) - {chdir $main_dir;} - - print "\n------- Compiling/Installing $pg\n"; - `make clean $SILENT`; - #sap - if ($pg eq "sap") - { - `rm *.o sap sap.exe ./util/aa/*.o ./util/wt/.o $SILENT`; - &flush_command ("make $arguments sap"); - &check_cp ($pg, "$BIN"); - } - elsif ($pg eq "clustalw2") - { - &flush_command("./configure"); - &flush_command("make $arguments"); - &check_cp ("./src/$pg", "$BIN"); - - } - elsif ($pg eq "clustalw") - { - &flush_command("make $arguments clustalw"); - `$CP $pg $BIN $SILENT`; - } - - elsif ($pg eq "mafft") - { - my $base=cwd(); - my $c; - - #compile core - mkpath ("./mafft/bin"); - mkpath ("./mafft/lib"); - chdir "$base/core"; - `make clean $SILENT`; - &flush_command ("make $arguments"); - &flush_command ("make install LIBDIR=../mafft/lib BINDIR=../mafft/bin"); - - #compile extension - chdir "$base/extensions"; - `make clean $SILENT`; - &flush_command ("make $arguments"); - &flush_command ("make install LIBDIR=../mafft/lib BINDIR=../mafft/bin"); - - #put everything in mafft and copy the coompiled stuff in bin - chdir "$base"; - if ($ROOT_INSTALL) - { - &root_run ("You Must be Roor to Install MAFFT\n", "mkdir /usr/local/mafft/;$CP mafft/lib/* /usr/local/mafft;$CP mafft/lib/mafft* /usr/local/bin ;$CP mafft/bin/mafft /usr/local/bin/; "); - } - else - { - `$CP mafft/lib/* $BIN`; - `$CP mafft/bin/mafft $BIN`; - } - `tar -cvf mafft.tar mafft`; - `gzip mafft.tar`; - `mv mafft.tar.gz $BIN`; - } - elsif ( $pg eq "dialign-tx") - { - my $f; - my $base=cwd(); - - chdir "./source"; - &flush_command (" make CPPFLAGS='-O3 -funroll-loops' all"); - - chdir ".."; - &check_cp ("./source/$pg", "$BIN"); - &check_cp ("./source/$pg", "$BIN/dialign-t"); - } - elsif ($pg eq "poa") - { - &flush_command ("make $arguments poa"); - &check_cp ("$pg", "$BIN"); - } - elsif ( $pg eq "probcons") - { - `rm *.exe $SILENT`; - &flush_command ("make $arguments probcons"); - &check_cp("$pg", "$BIN/$pg"); - } - elsif ( $pg eq "probcons" || $pg eq "probconsRNA") - { - `rm *.exe $SILENT`; - &flush_command ("make $arguments probcons"); - &check_cp("probcons", "$BIN/$pg"); - } - - elsif ( $pg eq "muscle") - { - `rm *.o muscle muscle.exe $SILENT`; - &flush_command ("make $arguments all"); - &check_cp("$pg", "$BIN"); - } - elsif ( $pg eq "pcma") - { - &flush_command ("make $arguments pcma"); - &check_cp("$pg", "$BIN"); - } - elsif ($pg eq "kalign") - { - &flush_command ("./configure"); - &flush_command("make $arguments"); - &check_cp ("$pg",$BIN); - } - elsif ( $pg eq "amap") - { - chdir "align"; - `make clean $SILENT`; - &flush_command ("make $arguments all"); - &check_cp ("$pg", $BIN); - } - elsif ( $pg eq "proda") - { - &flush_command ("make $arguments all"); - &check_cp ("$pg", $BIN); - } - elsif ( $pg eq "prank") - { - &flush_command ("make $arguments all"); - &check_cp ("$pg", $BIN); - } - elsif ( $pg eq "mustang") - { - &flush_command ("make $arguments all"); - if ( $OS=~/windows/){&check_cp("./bin/MUSTANG_v.3", "$BIN/mustang.exe");} - else {&check_cp("./bin/MUSTANG_v.3", "$BIN/mustang");} - } - elsif ( $pg eq "RNAplfold") - { - &flush_command("./configure"); - &flush_command ("make $arguments all"); - &check_cp("./Progs/RNAplfold", "$BIN"); - } - chdir $CDIR; - return &pg_is_installed ($pg, $BIN); - } - -sub install_t_coffee - { - my ($pg)=(@_); - my ($report,$cflags, $arguments, $language, $compiler) ; - #1-Install T-Coffee - chdir "t_coffee_source"; - &flush_command ("make clean"); - print "\n------- Compiling T-Coffee\n"; - $language=$PG{$pg} {language2}; - $arguments=$PG{$language}{arguments}; - if (!($arguments =~/CFLAGS/)){$arguments .= " CFLAGS=-O2 ";} - - if ( $CC ne ""){&flush_command ("make -i $arguments t_coffee");} - &check_cp ($pg, $BIN); - - chdir $CDIR; - return &pg_is_installed ($pg, $BIN); - } -sub install_TMalign - { - my ($pg)=(@_); - my $report; - chdir "t_coffee_source"; - print "\n------- Compiling TMalign\n"; - `rm TMalign TMalign.exe $SILENT`; - if ( $FC ne ""){&flush_command ("make -i $PG{Fortran}{arguments} TMalign");} - &check_cp ($pg, $BIN); - if ( !-e "$BIN/$pg" && pg_has_binary_distrib ($pg)) - { - print "!!!!!!! Compilation of $pg impossible. Will try to install from binary\n"; - return &install_binary_package ($pg); - } - chdir $CDIR; - return &pg_is_installed ($pg, $BIN); - } - -sub pg_has_binary_distrib - { - my ($pg)=(@_); - if ($PG{$pg}{windows}){return 1;} - elsif ($PG{$pg}{osx}){return 1;} - elsif ($PG{$pg}{linux}){return 1;} - return 0; - } -sub install_binary_package - { - my ($pg)=(@_); - my ($base,$report,$name, $download, $arguments, $language, $dir); - my $isdir; - &input_os(); - - if (!&supported_os($OS)){return 0;} - if ( $PG{$pg}{binary}){$name=$PG{$pg}{binary};} - else - { - $name=$pg; - if ( $OS eq "windows"){$name.=".exe";} - } - - $download="$WEB_BASE/Packages/Binaries/$OS/$name"; - - $base=cwd(); - chdir $TMP; - - if (!-e $name) - { - `rm x $SILENT`; - if ( url2file("$download","x")==$EXIT_SUCCESS) - { - `mv x $name`; - } - } - - if (!-e $name) - { - print "!!!!!!! $PG{$pg}{dname}: Download of $pg binary failed\n"; - print "!!!!!!! $PG{$pg}{dname}: Check Address: $download\n"; - return 0; - } - print "\n------- Installing $pg\n"; - - if ($name =~/tar\.gz/) - { - `gunzip $name`; - `tar -xvf $pg.tar`; - chdir $pg; - if ( $pg eq "mafft") - { - if ($ROOT_INSTALL) - { - &root_run ("You Must be Roor to Install MAFFT\n", "$CP mafft/bin/* /usr/local/mafft;mkdir /usr/local/mafft/; $CP mafft/lib/* /usr/local/bin/"); - } - else - { - `$CP $TMP/$pg/bin/* $BIN $SILENT`; - `$CP $TMP/$pg/lib/* $BIN $SILENT`; - } - } - else - { - if (-e "$TMP/$pg/data"){`$CP $TMP/$pg/data/* $TCM $SILENT`;} - if (!($pg=~/\*/)){`rm -rf $pg`;} - } - } - else - { - &check_cp ("$pg", "$BIN"); - `chmod u+x $BIN/$pg`; - unlink ($pg); - } - chdir $base; - $PG{$pg}{from_binary}=1; - return &pg_is_installed ($pg, $BIN); - } - -################################################################################ -# # -# # -# # -# Simple Utilities # -# # -# # -# # -################################################################################# -sub add_dir - { - my $dir=@_[0]; - - if (!-e $dir && !-d $dir) - { - return mkpath ($dir); - } - else - { - return 0; - } - } -sub check_rm - { - my ($file)=(@_); - - if ( -e $file) - { - return unlink($file); - } - return 0; - } -sub check_cp - { - my ($from, $to)=(@_); - if ( !-e $from && -e "$from\.exe"){$from="$from\.exe";} - if ( !-e $from){return 0;} - - `$CP $from $to`; - return 1; - } -sub check_file_list_exists - { - my ($base, @flist)=(@_); - my $f; - - foreach $f (@flist) - { - if ( !-e "$base/$f"){return 0;} - } - return 1; - } -sub ls - { - my $f=@_[0]; - my @fl; - chomp(@fl=`ls -1 $f`); - return @fl; - } -sub flush_command - { - my $command=@_[0]; - my $F=new FileHandle; - open ($F, "$command|"); - while (<$F>){print " --- $_";} - close ($F); - } - -sub input_installation_directory - { - my $dir=@_[0]; - my $new; - - print "------- The current installation directory is: [$dir]\n"; - print "??????? Return to keep the default or new value:"; - - if ($NO_QUESTION==0) - { - chomp ($new=); - while ( $new ne "" && !input_yes ("You have entered $new. Is this correct? ([y]/n):")) - { - print "???????New installation directory:"; - chomp ($new=); - } - $dir=($new eq "")?$dir:$new; - $dir=~s/\/$//; - } - - if ( -d $dir){return $dir;} - elsif (&root_run ("You must be root to create $dir","mkdir $dir")==$EXIT_SUCCESS){return $dir;} - else - { - print "!!!!!!! $dir could not be created\n"; - if ( $NO_QUESTION) - { - return ""; - } - elsif ( &input_yes ("??????? Do you want to provide a new directory([y]/n)?:")) - { - return input_installation_directory ($dir); - } - else - { - return ""; - } - } - - } -sub input_yes - { - my $question =@_[0]; - my $answer; - - if ($NO_QUESTION==1){return 1;} - - if ($question eq ""){$question="??????? Do you wish to proceed ([y]/n)?:";} - print $question; - chomp($answer=lc()); - if (($answer=~/^y/) || $answer eq ""){return 1;} - elsif ( ($answer=~/^n/)){return 0;} - else - { - return input_yes($question); - } - } -sub root_run - { - my ($txt, $cmd)=(@_); - - if ( system ($cmd)==$EXIT_SUCCESS){return $EXIT_SUCCESS;} - else - { - print "------- $txt\n"; - if ( $ROOT eq "sudo"){return system ("sudo $cmd");} - else {return system ("su root -c \"$cmd\"");} - } - } -#analyze environement -sub get_root - { - if (&pg_is_installed ("sudo")){return "sudo";} - else {return "su";} - } - -sub get_os - { - my $raw_os=`uname`; - my $os; - - $raw_os=lc ($raw_os); - - if ($raw_os =~/cygwin/){$os="windows";} - elsif ($raw_os =~/linux/){$os="linux";} - elsif ($raw_os =~/osx/){$os="macosx";} - elsif ($raw_os =~/darwin/){$os="macosx";} - else - { - $os=$raw_os; - } - return $os; - } -sub input_os - { - my $answer; - if ($OS) {return $OS;} - - print "??????? which os do you use: [w]indows, [l]inux, [m]acosx:?"; - $answer=lc(); - - if (($answer=~/^m/)){$OS="macosx";} - elsif ( ($answer=~/^w/)){$OS="windows";} - elsif ( ($answer=~/^linux/)){$OS="linux";} - - else - { - return &input_os(); - } - return $OS; - } - -sub supported_os - { - my ($os)=(@_[0]); - return $SUPPORTED_OS{$os}; - } - - -################################################################################ -# # -# # -# # -# update/initialize links # -# # -# # -# # -################################################################################# - - -sub update_tclinkdb - { - my $file =@_[0]; - my $name; - my $F=new FileHandle; - my ($download, $address, $name, $l, $db); - - if ( $file eq "update"){$file=$TCLINKDB_ADDRESS;} - - if ( $file =~/http:\/\// || $file =~/ftp:\/\//) - { - ($address, $name)=($download=~/(.*)\/([^\/]+)$/); - `rm x $SILENT`; - if (&url2file ($file,"x")==$EXIT_SUCCESS) - { - print "------- Susscessful upload of $name"; - `mv x $name`; - $file=$name; - } - } - open ($F, "$file"); - while (<$F>) - { - my $l=$_; - if (($l =~/^\/\//) || ($db=~/^#/)){;} - elsif ( !($l =~/\w/)){;} - else - { - my @v=split (/\s+/, $l); - if ( $l=~/^MODE/) - { - $MODE{$v[1]}{$v[2]}=$v[3]; - } - elsif ($l=~/^PG/) - { - $PG{$v[1]}{$v[2]}=$v[3]; - } - } - } - close ($F); - &post_process_PG(); - return; - } - - - -sub initialize_PG - { - -$PG{"t_coffee"}{"4_TCOFFEE"}="TCOFFEE"; -$PG{"t_coffee"}{"type"}="sequence_multiple_aligner"; -$PG{"t_coffee"}{"ADDRESS"}="http://www.tcoffee.org"; -$PG{"t_coffee"}{"language"}="C"; -$PG{"t_coffee"}{"language2"}="C"; -$PG{"t_coffee"}{"source"}="http://www.tcoffee.org/Packages/T-COFFEE_distribution.tar.gz"; -$PG{"t_coffee"}{"update_action"}="always"; -$PG{"t_coffee"}{"mode"}="tcoffee,mcoffee,rcoffee,expresso,3dcoffee"; -$PG{"clustalw2"}{"4_TCOFFEE"}="CLUSTALW2"; -$PG{"clustalw2"}{"type"}="sequence_multiple_aligner"; -$PG{"clustalw2"}{"ADDRESS"}="http://www.clustal.org"; -$PG{"clustalw2"}{"language"}="C++"; -$PG{"clustalw2"}{"language2"}="CXX"; -$PG{"clustalw2"}{"source"}="http://www.clustal.org/download/2.0.10/clustalw-2.0.10-src.tar.gz"; -$PG{"clustalw2"}{"mode"}="mcoffee,rcoffee"; -$PG{"clustalw"}{"4_TCOFFEE"}="CLUSTALW"; -$PG{"clustalw"}{"type"}="sequence_multiple_aligner"; -$PG{"clustalw"}{"ADDRESS"}="http://www.clustal.org"; -$PG{"clustalw"}{"language"}="C"; -$PG{"clustalw"}{"language2"}="C"; -$PG{"clustalw"}{"source"}="http://www.clustal.org/download/1.X/ftp-igbmc.u-strasbg.fr/pub/ClustalW/clustalw1.82.UNIX.tar.gz"; -$PG{"clustalw"}{"mode"}="mcoffee,rcoffee"; -$PG{"dialign-t"}{"4_TCOFFEE"}="DIALIGNT"; -$PG{"dialign-t"}{"type"}="sequence_multiple_aligner"; -$PG{"dialign-t"}{"ADDRESS"}="http://dialign-tx.gobics.de/"; -$PG{"dialign-t"}{"DIR"}="/usr/share/dialign-tx/"; -$PG{"dialign-t"}{"language"}="C"; -$PG{"dialign-t"}{"language2"}="C"; -$PG{"dialign-t"}{"source"}="http://dialign-tx.gobics.de/DIALIGN-TX_1.0.1.tar.gz"; -$PG{"dialign-t"}{"mode"}="mcoffee"; -$PG{"dialign-t"}{"binary"}="dialign-t"; -$PG{"dialign-tx"}{"4_TCOFFEE"}="DIALIGNTX"; -$PG{"dialign-tx"}{"type"}="sequence_multiple_aligner"; -$PG{"dialign-tx"}{"ADDRESS"}="http://dialign-tx.gobics.de/"; -$PG{"dialign-tx"}{"DIR"}="/usr/share/dialign-tx/"; -$PG{"dialign-tx"}{"language"}="C"; -$PG{"dialign-tx"}{"language2"}="C"; -$PG{"dialign-tx"}{"source"}="http://dialign-tx.gobics.de/DIALIGN-TX_1.0.1.tar.gz"; -$PG{"dialign-tx"}{"mode"}="mcoffee"; -$PG{"dialign-tx"}{"binary"}="dialign-tx"; -$PG{"poa"}{"4_TCOFFEE"}="POA"; -$PG{"poa"}{"type"}="sequence_multiple_aligner"; -$PG{"poa"}{"ADDRESS"}="http://www.bioinformatics.ucla.edu/poa/"; -$PG{"poa"}{"language"}="C"; -$PG{"poa"}{"language2"}="C"; -$PG{"poa"}{"source"}="http://downloads.sourceforge.net/poamsa/poaV2.tar.gz"; -$PG{"poa"}{"DIR"}="/usr/share/"; -$PG{"poa"}{"FILE1"}="blosum80.mat"; -$PG{"poa"}{"mode"}="mcoffee"; -$PG{"poa"}{"binary"}="poa"; -$PG{"probcons"}{"4_TCOFFEE"}="PROBCONS"; -$PG{"probcons"}{"type"}="sequence_multiple_aligner"; -$PG{"probcons"}{"ADDRESS"}="http://probcons.stanford.edu/"; -$PG{"probcons"}{"language2"}="CXX"; -$PG{"probcons"}{"language"}="C++"; -$PG{"probcons"}{"source"}="http://probcons.stanford.edu/probcons_v1_12.tar.gz"; -$PG{"probcons"}{"mode"}="mcoffee"; -$PG{"probcons"}{"binary"}="probcons"; -$PG{"mafft"}{"4_TCOFFEE"}="MAFFT"; -$PG{"mafft"}{"type"}="sequence_multiple_aligner"; -$PG{"mafft"}{"ADDRESS"}="http://align.bmr.kyushu-u.ac.jp/mafft/online/server/"; -$PG{"mafft"}{"language"}="C"; -$PG{"mafft"}{"language"}="C"; -$PG{"mafft"}{"source"}="http://align.bmr.kyushu-u.ac.jp/mafft/software/mafft-6.603-with-extensions-src.tgz"; -$PG{"mafft"}{"windows"}="http://align.bmr.kyushu-u.ac.jp/mafft/software/mafft-6.603-mingw.tar"; -$PG{"mafft"}{"mode"}="mcoffee,rcoffee"; -$PG{"mafft"}{"binary"}="mafft.tar.gz"; -$PG{"muscle"}{"4_TCOFFEE"}="MUSCLE"; -$PG{"muscle"}{"type"}="sequence_multiple_aligner"; -$PG{"muscle"}{"ADDRESS"}="http://www.drive5.com/muscle/"; -$PG{"muscle"}{"language"}="C++"; -$PG{"muscle"}{"language2"}="GPP"; -$PG{"muscle"}{"source"}="http://www.drive5.com/muscle/downloads3.6/muscle3.6_src.tar.gz"; -$PG{"muscle"}{"windows"}="http://www.drive5.com/muscle/downloads3.6/muscle3.6_win32.zip"; -$PG{"muscle"}{"linux"}="http://www.drive5.com/muscle/downloads3.6/muscle3.6_linux_ia32.tar.gz"; -$PG{"muscle"}{"mode"}="mcoffee,rcoffee"; -$PG{"pcma"}{"4_TCOFFEE"}="PCMA"; -$PG{"pcma"}{"type"}="sequence_multiple_aligner"; -$PG{"pcma"}{"ADDRESS"}="ftp://iole.swmed.edu/pub/PCMA/"; -$PG{"pcma"}{"language"}="C"; -$PG{"pcma"}{"language2"}="C"; -$PG{"pcma"}{"source"}="ftp://iole.swmed.edu/pub/PCMA/pcma.tar.gz"; -$PG{"pcma"}{"mode"}="mcoffee"; -$PG{"kalign"}{"4_TCOFFEE"}="KALIGN"; -$PG{"kalign"}{"type"}="sequence_multiple_aligner"; -$PG{"kalign"}{"ADDRESS"}="http://msa.cgb.ki.se"; -$PG{"kalign"}{"language"}="C"; -$PG{"kalign"}{"language2"}="C"; -$PG{"kalign"}{"source"}="http://msa.cgb.ki.se/downloads/kalign/current.tar.gz"; -$PG{"kalign"}{"mode"}="mcoffee"; -$PG{"amap"}{"4_TCOFFEE"}="AMAP"; -$PG{"amap"}{"type"}="sequence_multiple_aligner"; -$PG{"amap"}{"ADDRESS"}="http://bio.math.berkeley.edu/amap/"; -$PG{"amap"}{"language"}="C++"; -$PG{"amap"}{"language2"}="CXX"; -$PG{"amap"}{"source"}="http://baboon.math.berkeley.edu/amap/download/amap.2.2.tar.gz"; -$PG{"amap"}{"mode"}="mcoffee"; -$PG{"proda"}{"4_TCOFFEE"}="PRODA"; -$PG{"proda"}{"type"}="sequence_multiple_aligner"; -$PG{"proda"}{"ADDRESS"}="http://proda.stanford.edu"; -$PG{"proda"}{"language"}="C++"; -$PG{"proda"}{"language2"}="CXX"; -$PG{"proda"}{"source"}="http://proda.stanford.edu/proda_1_0.tar.gz"; -$PG{"proda"}{"mode"}="mcoffee"; -$PG{"prank"}{"4_TCOFFEE"}="PRANK"; -$PG{"prank"}{"type"}="sequence_multiple_aligner"; -$PG{"prank"}{"ADDRESS"}="http://www.ebi.ac.uk/goldman-srv/prank/"; -$PG{"prank"}{"language"}="C++"; -$PG{"prank"}{"language2"}="CXX"; -$PG{"prank"}{"source"}="http://www.ebi.ac.uk/goldman-srv/prank/src/old/prank.src.081202.tgz"; -$PG{"prank"}{"mode"}="mcoffee"; -$PG{"sap"}{"4_TCOFFEE"}="SAP"; -$PG{"sap"}{"type"}="structure_pairwise_aligner"; -$PG{"sap"}{"ADDRESS"}="http://mathbio.nimr.mrc.ac.uk/wiki/Software"; -$PG{"sap"}{"language"}="C"; -$PG{"sap"}{"language2"}="C"; -$PG{"sap"}{"source"}="http://www.tcoffee.org/Packages/sap_distribution_TCC_0.6.tar.gz"; -$PG{"sap"}{"mode"}="expresso,3dcoffee"; -$PG{"TMalign"}{"4_TCOFFEE"}="TMALIGN"; -$PG{"TMalign"}{"type"}="structure_pairwise_aligner"; -$PG{"TMalign"}{"ADDRESS"}="http://zhang.bioinformatics.ku.edu/TM-align/TMalign.f"; -$PG{"TMalign"}{"language"}="Fortran"; -$PG{"TMalign"}{"language2"}="Fortran"; -$PG{"TMalign"}{"source"}="http://zhang.bioinformatics.ku.edu/TM-align/TMalign.f"; -$PG{"TMalign"}{"linux"}="http://zhang.bioinformatics.ku.edu/TM-align/TMalign_32.gz"; -$PG{"TMalign"}{"mode"}="expresso,3dcoffee"; -$PG{"mustang"}{"4_TCOFFEE"}="MUSTANG"; -$PG{"mustang"}{"type"}="structure_pairwise_aligner"; -$PG{"mustang"}{"ADDRESS"}="http://www.cs.mu.oz.au/~arun/mustang"; -$PG{"mustang"}{"language"}="C++"; -$PG{"mustang"}{"language2"}="CXX"; -$PG{"mustang"}{"source"}="http://www.cs.mu.oz.au/~arun/mustang/mustang_v.3.tgz"; -$PG{"mustang"}{"mode"}="expresso,3dcoffee"; -$PG{"lsqman"}{"4_TCOFFEE"}="LSQMAN"; -$PG{"lsqman"}{"type"}="structure_pairwise_aligner"; -$PG{"lsqman"}{"ADDRESS"}="empty"; -$PG{"lsqman"}{"language"}="empty"; -$PG{"lsqman"}{"language2"}="empty"; -$PG{"lsqman"}{"source"}="empty"; -$PG{"lsqman"}{"update_action"}="never"; -$PG{"lsqman"}{"mode"}="expresso,3dcoffee"; -$PG{"align_pdb"}{"4_TCOFFEE"}="ALIGN_PDB"; -$PG{"align_pdb"}{"type"}="structure_pairwise_aligner"; -$PG{"align_pdb"}{"ADDRESS"}="empty"; -$PG{"align_pdb"}{"language"}="empty"; -$PG{"align_pdb"}{"language2"}="empty"; -$PG{"align_pdb"}{"source"}="empty"; -$PG{"align_pdb"}{"update_action"}="never"; -$PG{"align_pdb"}{"mode"}="expresso,3dcoffee"; -$PG{"fugueali"}{"4_TCOFFEE"}="FUGUE"; -$PG{"fugueali"}{"type"}="structure_pairwise_aligner"; -$PG{"fugueali"}{"ADDRESS"}="http://www-cryst.bioc.cam.ac.uk/fugue/download.html"; -$PG{"fugueali"}{"language"}="empty"; -$PG{"fugueali"}{"language2"}="empty"; -$PG{"fugueali"}{"source"}="empty"; -$PG{"fugueali"}{"update_action"}="never"; -$PG{"fugueali"}{"mode"}="expresso,3dcoffee"; -$PG{"dalilite.pl"}{"4_TCOFFEE"}="DALILITEc"; -$PG{"dalilite.pl"}{"type"}="structure_pairwise_aligner"; -$PG{"dalilite.pl"}{"ADDRESS"}="built_in"; -$PG{"dalilite.pl"}{"ADDRESS2"}="http://www.ebi.ac.uk/Tools/webservices/services/dalilite"; -$PG{"dalilite.pl"}{"language"}="Perl"; -$PG{"dalilite.pl"}{"language2"}="Perl"; -$PG{"dalilite.pl"}{"source"}="empty"; -$PG{"dalilite.pl"}{"update_action"}="never"; -$PG{"dalilite.pl"}{"mode"}="expresso,3dcoffee"; -$PG{"probconsRNA"}{"4_TCOFFEE"}="PROBCONSRNA"; -$PG{"probconsRNA"}{"type"}="RNA_multiple_aligner"; -$PG{"probconsRNA"}{"ADDRESS"}="http://probcons.stanford.edu/"; -$PG{"probconsRNA"}{"language"}="C++"; -$PG{"probconsRNA"}{"language2"}="CXX"; -$PG{"probconsRNA"}{"source"}="http://probcons.stanford.edu/probconsRNA.tar.gz"; -$PG{"probconsRNA"}{"mode"}="mcoffee,rcoffee"; -$PG{"sfold"}{"4_TCOFFEE"}="CONSAN"; -$PG{"sfold"}{"type"}="RNA_pairwise_aligner"; -$PG{"sfold"}{"ADDRESS"}="http://selab.janelia.org/software/consan/"; -$PG{"sfold"}{"language"}="empty"; -$PG{"sfold"}{"language2"}="empty"; -$PG{"sfold"}{"source"}="empty"; -$PG{"sfold"}{"update_action"}="never"; -$PG{"sfold"}{"mode"}="rcoffee"; -$PG{"RNAplfold"}{"4_TCOFFEE"}="RNAPLFOLD"; -$PG{"RNAplfold"}{"type"}="RNA_secondarystructure_predictor"; -$PG{"RNAplfold"}{"ADDRESS"}="http://www.tbi.univie.ac.at/~ivo/RNA/"; -$PG{"RNAplfold"}{"language"}="C"; -$PG{"RNAplfold"}{"language2"}="C"; -$PG{"RNAplfold"}{"source"}="http://www.tbi.univie.ac.at/~ivo/RNA/ViennaRNA-1.7.2.tar.gz"; -$PG{"RNAplfold"}{"mode"}="rcoffee"; -$PG{"hmmtop"}{"4_TCOFFEE"}="HMMTOP"; -$PG{"hmmtop"}{"type"}="protein_secondarystructure_predictor"; -$PG{"hmmtop"}{"ADDRESS"}="www.enzim.hu/hmmtop/"; -$PG{"hmmtop"}{"language"}="C"; -$PG{"hmmtop"}{"language2"}="C"; -$PG{"hmmtop"}{"source"}="empty"; -$PG{"hmmtop"}{"update_action"}="never"; -$PG{"hmmtop"}{"mode"}="tcoffee"; -$PG{"gorIV"}{"4_TCOFFEE"}="GOR4"; -$PG{"gorIV"}{"type"}="protein_secondarystructure_predictor"; -$PG{"gorIV"}{"ADDRESS"}="http://mig.jouy.inra.fr/logiciels/gorIV/"; -$PG{"gorIV"}{"language"}="C"; -$PG{"gorIV"}{"language2"}="C"; -$PG{"gorIV"}{"source"}="http://mig.jouy.inra.fr/logiciels/gorIV/GOR_IV.tar.gz"; -$PG{"gorIV"}{"update_action"}="never"; -$PG{"gorIV"}{"mode"}="tcoffee"; -$PG{"wublast.pl"}{"4_TCOFFEE"}="EBIWUBLASTc"; -$PG{"wublast.pl"}{"type"}="protein_homology_predictor"; -$PG{"wublast.pl"}{"ADDRESS"}="built_in"; -$PG{"wublast.pl"}{"ADDRESS2"}="http://www.ebi.ac.uk/Tools/webservices/services/wublast"; -$PG{"wublast.pl"}{"language"}="Perl"; -$PG{"wublast.pl"}{"language2"}="Perl"; -$PG{"wublast.pl"}{"source"}="empty"; -$PG{"wublast.pl"}{"update_action"}="never"; -$PG{"wublast.pl"}{"mode"}="psicoffee,expresso,3dcoffee"; -$PG{"blastpgp.pl"}{"4_TCOFFEE"}="EBIBLASTPGPc"; -$PG{"blastpgp.pl"}{"type"}="protein_homology_predictor"; -$PG{"blastpgp.pl"}{"ADDRESS"}="built_in"; -$PG{"blastpgp.pl"}{"ADDRESS2"}="http://www.ebi.ac.uk/Tools/webservices/services/blastpgp"; -$PG{"blastpgp.pl"}{"language"}="Perl"; -$PG{"blastpgp.pl"}{"language2"}="Perl"; -$PG{"blastpgp.pl"}{"source"}="empty"; -$PG{"blastpgp.pl"}{"update_action"}="never"; -$PG{"blastpgp.pl"}{"mode"}="psicoffee,expresso,3dcoffee"; -$PG{"blastcl3"}{"4_TCOFFEE"}="NCBIWEBBLAST"; -$PG{"blastcl3"}{"type"}="protein_homology_predictor"; -$PG{"blastcl3"}{"ADDRESS"}="ftp://ftp.ncbi.nih.gov/blast/executables/LATEST"; -$PG{"blastcl3"}{"language"}="C"; -$PG{"blastcl3"}{"language2"}="C"; -$PG{"blastcl3"}{"source"}="empty"; -$PG{"blastcl3"}{"update_action"}="never"; -$PG{"blastcl3"}{"mode"}="psicoffee,expresso,3dcoffee"; -$PG{"blastpgp"}{"4_TCOFFEE"}="NCBIBLAST"; -$PG{"blastpgp"}{"type"}="protein_homology_predictor"; -$PG{"blastpgp"}{"ADDRESS"}="ftp://ftp.ncbi.nih.gov/blast/executables/LATEST"; -$PG{"blastpgp"}{"language"}="C"; -$PG{"blastpgp"}{"language2"}="C"; -$PG{"blastpgp"}{"source"}="empty"; -$PG{"blastpgp"}{"update_action"}="never"; -$PG{"blastpgp"}{"mode"}="psicoffee,expresso,3dcoffee"; -$PG{"SOAP::Lite"}{"4_TCOFFEE"}="SOAPLITE"; -$PG{"SOAP::Lite"}{"type"}="library"; -$PG{"SOAP::Lite"}{"ADDRESS"}="http://cpansearch.perl.org/src/MKUTTER/SOAP-Lite-0.710.08/Makefile.PL"; -$PG{"SOAP::Lite"}{"language"}="Perl"; -$PG{"SOAP::Lite"}{"language2"}="Perl"; -$PG{"SOAP::Lite"}{"source"}="empty"; -$PG{"SOAP::Lite"}{"mode"}="psicoffee,expresso,3dcoffee"; -$MODE{"tcoffee"}{"name"}="tcoffee"; -$MODE{"rcoffee"}{"name"}="rcoffee"; -$MODE{"3dcoffee"}{"name"}="3dcoffee"; -$MODE{"mcoffee"}{"name"}="mcoffee"; -$MODE{"expresso"}{"name"}="expresso"; -#TclinkdbEnd End tag for the list updating - -########### Compilers ############################## -# -# - -$PG{C}{compiler}="gcc"; -$PG{C}{compiler_flag}="CC"; -$PG{C}{options}=""; -$PG{C}{options_flag}="CFLAGS"; -$PG{C}{type}="compiler"; - -$PG{"CXX"}{compiler}="g++"; -$PG{"CXX"}{compiler_flag}="CXX"; -$PG{"CXX"}{options}=""; -$PG{"CXX"}{options_flag}="CXXFLAGS"; -$PG{CXX}{type}="compiler"; - -$PG{"CPP"}{compiler}="g++"; -$PG{"CPP"}{compiler_flag}="CPP"; -$PG{"CPP"}{options}=""; -$PG{"CPP"}{options_flag}="CPPFLAGS"; -$PG{CPP}{type}="compiler"; - -$PG{"GPP"}{compiler}="g++"; -$PG{"GPP"}{compiler_flag}="GPP"; -$PG{"GPP"}{options}=""; -$PG{"GPP"}{options_flag}="CFLAGS"; -$PG{GPP}{type}="compiler"; - -$PG{Fortran}{compiler}="g77"; -$PG{Fortran}{compiler_flag}="FCC"; -$PG{Fortran}{type}="compiler"; - -$PG{Perl}{compiler}="CPAN"; -$PG{Perl}{type}="compiler"; - -$SUPPORTED_OS{macox}="Macintosh"; -$SUPPORTED_OS{linux}="Linux"; -$SUPPORTED_OS{windows}="Cygwin"; - - - -$MODE{t_coffee}{description}=" for regular multiple sequence alignments"; -$MODE{rcoffee} {description}=" for RNA multiple sequence alignments"; - -$MODE{psicoffee} {description}=" for Homology Extended multiple sequence alignments"; -$MODE{expresso}{description}=" for very accurate structure based multiple sequence alignments"; -$MODE{"3dcoffee"}{description}=" for multiple structure alignments"; -$MODE{mcoffee} {description}=" for combining alternative multiple sequence alignment packages\n------- into a unique meta-package. The installer will upload several MSA packages and compile them\n -"; - - -&post_process_PG(); -return; -} - -sub post_process_PG - { - my $p; - - %PG=&name2dname (%PG); - %MODE=&name2dname(%MODE); - foreach $p (keys(%PG)){if ( $PG{$p}{type} eq "compiler"){$PG{$p}{update_action}="never";}} - - } - -sub name2dname - { - my (%L)=(@_); - my ($l, $ml); - - foreach my $pg (keys(%L)) - { - $l=length ($pg); - if ( $l>$ml){$ml=$l;} - } - $ml+=1; - foreach my $pg (keys(%L)) - { - my $name; - $l=$ml-length ($pg); - $name=$pg; - for ( $b=0; $b<$l; $b++) - { - $name .=" "; - } - $L{$pg}{dname}=$name; - } - return %L; - } - -sub env_file2putenv - { - my $f=@_[0]; - my $F=new FileHandle; - my $n; - - open ($F, "$f"); - while (<$F>) - { - my $line=$_; - my($var, $value)=($_=~/(\S+)\=(\S*)/); - $ENV{$var}=$value; - $ENV_SET{$var}=1; - $n++; - } - close ($F); - return $n; - } - diff --git a/binaries/src/tcoffee/license.txt b/binaries/src/tcoffee/license.txt deleted file mode 100644 index 3827054..0000000 --- a/binaries/src/tcoffee/license.txt +++ /dev/null @@ -1,348 +0,0 @@ -----------------------------------COPYRIGHT NOTICE---------------------------------/ - ACADEMIC LICENCE AGREEMENT - - © Centro de Regulacio Genomica and Cedric Notredame ( Tue Oct 27 10:12:26 WEST 2009). - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) year name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - , 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. - - - -----------------------------------COPYRIGHT NOTICE---------------------------------/ diff --git a/binaries/src/tcoffee/t_coffee_source/CUSTOM_evaluate_for_struc.c b/binaries/src/tcoffee/t_coffee_source/CUSTOM_evaluate_for_struc.c deleted file mode 100644 index 62e3312..0000000 --- a/binaries/src/tcoffee/t_coffee_source/CUSTOM_evaluate_for_struc.c +++ /dev/null @@ -1,460 +0,0 @@ -#include -#include -#include -#include - - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" - -#include "dp_lib_header.h" - - -/* - 23/06/00, Cedric Notredame - - - 1-Content of the data structures. - 2-Implementing your own function in pdb_align. - 3-Using that function with T-Coffee (multiple Sequence Alignment). - 4-Syntax rules as defined by Philipp Bucher (19/06/00). - 5-Current Shortcomings - 6-Enquiries. - - 1-Content of the data structures - - This file only contains a dummy function to help you create - your own matching potential function (Step 2 in the Notations RULES - - int evaluate_match_score ( Constraint_list *CL, int A, int i, int B, int j) - - returns a score, expected to be between -100 and 100, that corresponds to the matching of - A_i with B_j. - - Most needed parameters are included in the data structure CL, - This Data Structure is declared in util_constraint_list.h - The following, non exhaustive list explains the most common parameters - - The neighborhood is computed using: - ((CL->T[A])->pdb_param)->maximum_distance as a radius for the Bubble - ((CL->T[A])->pdb_param)->n_excluded_nb are excluded around the central residue - i.e i-1 and i+1 for n_excluded_nb=1. - - - ((CL->T[A])->Bubble)->nb[i][0] --> Number of residues in the bubble around A_i - ((CL->T[A])->Bubble)->nb[i][k]=j --> Index of the kth residue in the bubble - Residues are sorted according to the Ca chain - ((CL->T[A])->Bubble)->d_nb[i][k]=d --> Distance between A_i and A_j equals d; - - ((CL->T[A])->ca[i]->x -----------> Coordinates of the Ca A_i - ((CL->T[A])->ca[i]->y - ((CL->T[A])->ca[i]->z - - - - ((CL->T[A])->len -----------> Length of Chain A. - ((CL->T[A])->n_atom -----------> n atoms in A. - - - Unspecified parameters can be passed from the command line: - - align_pdb -extra_parameters=10, 10.3, 11, 12.4, my_file - - The values of these parameters can be accessed in: - - ((CL->T[A])->pdb_param)->n_extra_param=4 - ((CL->T[A])->pdb_param)->extra_param[0]="10" - ((CL->T[A])->pdb_param)->extra_param[1]="10.3" - ((CL->T[A])->pdb_param)->extra_param[2]="11.6" - ((CL->T[A])->pdb_param)->extra_param[3]="my_file" - - These parameters contain strings! To get the real values, in C, use atoi and atof: - atoi ( ((CL->T[A])->pdb_param)->extra_param[0])=10; - atof ( ((CL->T[A])->pdb_param)->extra_param[1])=10.3; - - The maximum number of parameters is currently 1000... - - - - 2-Implementing your own function - - all you need to do is to edit this file and recompile align_pdb. - There is no need to prototype any function. - - 10 functions holders exist, that correspond to the 10 dummy functions - declared in this file: - custom_pair_score_function1 - custom_pair_score_function2 - custom_pair_score_function3 - custom_pair_score_function4 - ..... - custom_pair_score_function10 - - Let us imagine, you want to use custom_pair_function1. - - 1-In CUSTOM_evaluate_for_struc.c, modify custom_pair_function1, - so that it computes the score you need. - - 2-If you need extra parameters, get them from ((CL->T[A])->pdb_param)->extra_param. - 3-Recompile pdb_align: - -put it in your bin - -rehash or whatever - - 4-run the program as follows: - - align_pdb -in -hasch_mode=custom_pair_score_function1 - -extra_param=10, 12, 0.4, matrix... - - 5-My advice for a first time: make a very simple dummy function that spits - out the content of extra_param. - - 6-Remember it is your responsability to control the number of extra parameters - and their proper order, and type. Do not forget to apply atoi and atof to the parameters - - 7-Remember that the modifications you made to CUSTOM_evaluate_for_sytructure - must be preserved by you!!! They may disappear if you update align_pdb, save them - appart as your own patch. - - - - -3-Using that function with T-Coffee (multiple Sequence Alignment). - - 1- setenv ALIGN_PDB_4_TCOFFEE - - 2- run t_coffee - To do so, you will NOT NEED to recompile T-Coffee, simply type: - t_coffee -in ... custom1_align_pdb_pair - - - -4-Syntax rules as defined by Philipp Bucher (19/06/00). - - Proposed ascii text notation for align_pdb - - First, let us summarize the align pdb algorithm in plain - english: - - Given are two protein structures A and B. - - Step 1: For each residue in each structure extract - the local structural neighbourhood. A neighbourhood - is simply a subset of (usually non-consecutive) - residues from one of the structures. - - Step 2: For all possible pairs of residues between structures - A and B, compute the optimal neighbourhood alignment - score. This score, which is also referred to as - local neighbourhood similarity (LNS) score indicates - whether two residues have similar local stuctural - environemnts. - - Step 3: Generate one (or multiple) optimal structural alignment(s) - for A and B based on LNS scores plus some gap penalty - function. - - Now, some rules for ascii/email notation: - - - Whenever possible use a style which fits on one line (because it - is painful to modify formulas that span over several lines). Example: - - Use: ( a**2 + b**2 )**0.5 - ________ - | 2 2 - instead of: \| a + b - - Introduce local variables/functions to split long expressions over - several lines, e.g. - - Score = Sum(0 Step 1: For each residue in each structure, extract - > the local structural neighbourhood. A neighbourhood - > is simply a subset of (usually non-consecutive) - > residues from the same structure. - - The result is something like: - - P(i) = P_1(i) .. P_k(i) .. P_K_i(i) - Q(i) = Q_1(j) .. Q_l(j) .. Q_L_j(j) - - These are all ordered integer arrays. The P's and Q's indicate - residue positions in sequence space. For the C-alpha coordinates, - we use: - - C(i) = C_1(i) .. C_k(i) .. C_K_i(i) - D(i) = D_1(j) .. D_l(j) .. D_L_i(j) - - > Step 2: For all possible pairs of residues between structures - > A and B, compute the optimal neighbourhood alignment - > score. This score, which is also referred to as - > local neighbourhood similarity (NSL) score indicates - > whether two residues have similar local stuctural - > environemnts. - - We have to define a similarity score: - - S(i,j) = function[A,B,P(i),Q(j)] - - More specifically, S(i,j) is the score of an opimal alignment between - two subsets of C-alpha coordinates from A and B, defined by P(i) and Q(j). - We use the following notation for an alignment between two neighbourhoods. - - R = (k_1,l_1) .. (k_m,l_m) .. (k_M, l_M) - - This is pretty abstract and requires some explanation. - - The alignment consists of M pairs of residues from two neighbourhoods. - The paired residues are numbered 1,2...K and 1,2...L, respectively. - Obviously M <= K,L. For K=9 and L=7, a possible alignment would - look as follows: - - R = (1,2) , (2,3) , (5,4) , (6,5) , (9,7) - - This alignment consists of five paired residues, the first - residue of neighbourhood P(i) is aligned with with the second residue - of Q(j), etc. - - The score of an alignment Z(R) is a function that can be - defined in many different ways. But independently of its - definition: - - S(i,j) = Z(R*,A,B,P(i),Q(j)) - R* = argmax Z(R,A,B,P(i),Q(j)) - - This is just a complicated way of saying that the LNS score - S(i,j) is an optimal alignment score. A simple alignment - scoring function would be: - - Z = Sum(m=1..M) [ 2 - |C_(k_m) - D_l_m)| ] - - A more complex function could be the sum of the sums of "pair-weights", - "pair-connection-weights", and unpaired-residue-weights": - - Z = Sum(m=1 .. M) [ PW (i,P_k_m,Q_l_m,C_k_m, D_l_m) ] - + Sum(m=2 .. M ) [ PCW(j,P_k_m,P_l_m,Q_k_m-1,Q_l_m-1,C_k_m,D_l_m,C_k_m-1,D_j_m-1 ] - + Sum(over k for all C_i(k) unpaired) UPRW [P_k, C_k ] - + Sum(over l for all C_i(l) unpaired) UPRW [Q_l, D_l)) ] - - Here, the terms P_k_m ... denote sequence positions, the terms C_k_m ... - denote coordinates. i and j, the sequence position of the center residues - of the neighbourhoods under consideration) are included in the argument - lists of the functions because they are necessary to decide whether - a residue A_k_m occurs before or after the residue A_i in sequence space. - We don not want to align a residue A_k_m that occurs before A_i with - a residue B_j_l that occurs after B_j and vice-versa. - - The LNS score could also be defined by a recursive equation system - defining a dynamic programming algorithm. However, I find the - above formulation more helpful for designing appropriate alignment - scoring functions. - - > Step 3: Generate one (or multiple) optimal structural alignment(s)r - > for A and B based on NLS scores plus some gap penalty - > function. - - This is now pretty simple. We use essentially the same notation as - for the neighbourhood alignments. - - R = (i_1,j_1) .. (i_n,j_n) .. (i_N, j_N) - - X* = X(R*,A,B) - R* = argmax X(R,A,B) - - The alignment scoring functing X is the sum of the LNS scores - of the pairs minus some gap penalties. - - -5-Current Shortcomings - - At present, it is impossible to use the extra_param flag with T-Coffee. This means that - the actual values of your parameters must be HARD-CODED within the custom_pair_score_function - you are using. - - On request, I will implement a solution to solve that problem. - -6-Contact - For any enquiry, please send a mail to: - cedric.notredame@europe.com - */ - - - - - - - - -int custom_pair_score_function1 (Constraint_list *CL, int s1, int r1, int s2, int r2) - { - int score=0; - int a; - FILE *fp; - - fp=vfopen ( "test_file", "w"); - for ( a=0; a< ((CL->T[0])->pdb_param)->n_extra_param; a++) - fprintf (fp, "\n\t%s", ((CL->T[0])->pdb_param)->extra_param[a]); - - fprintf ( fp, "\nTEST OK"); - vfclose ( fp); - exit (1); - - return score; - - } -int custom_pair_score_function2 (Constraint_list *CL, int s1, int r1, int s2, int r2) - { - int score=0; - - return score; - - } -int custom_pair_score_function3 (Constraint_list *CL, int s1, int r1, int s2, int r2) - { - int score=0; - - return score; - - } -int custom_pair_score_function4 (Constraint_list *CL, int s1, int r1, int s2, int r2) - { - int score=0; - - return score; - - } -int custom_pair_score_function5 (Constraint_list *CL, int s1, int r1, int s2, int r2) - { - int score=0; - - return score; - - } -int custom_pair_score_function6 (Constraint_list *CL, int s1, int r1, int s2, int r2) - { - int score=0; - - return score; - - } -int custom_pair_score_function7 (Constraint_list *CL, int s1, int r1, int s2, int r2) - { - int score=0; - - return score; - - } -int custom_pair_score_function8 (Constraint_list *CL, int s1, int r1, int s2, int r2) - { - int score=0; - - return score; - - } - -int custom_pair_score_function9 (Constraint_list *CL, int s1, int r1, int s2, int r2) - { - int score=0; - - return score; - - } -int custom_pair_score_function10 (Constraint_list *CL, int s1, int r1, int s2, int r2) - { - int score=0; - - return score; - - } -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/TMalign.f b/binaries/src/tcoffee/t_coffee_source/TMalign.f deleted file mode 100644 index 5920db1..0000000 --- a/binaries/src/tcoffee/t_coffee_source/TMalign.f +++ /dev/null @@ -1,2554 +0,0 @@ -************************************************************************* -* This program is to identify the best alignment of two protein -* structures to give the best TM-score. By default, TM-score is -* normalized by the second protein. The program can be freely -* copied or modified or redistributed. -* (For comments, please email to: yzhang@ku.edu) -* -* Reference: -* Yang Zhang, Jeffrey Skolnick, Nucl. Acid Res. 2005 33: 2303-9 -* -************************ updating history ******************************* -* 2005/06/01: A small bug of two-point superposition was fixed. -* 2005/10/19: the program was reformed so that the alignment -* results are not dependent on the specific compilers. -* 2006/06/20: select 'A' if there is altLoc when reading PDB file. -* 2007/02/27: rotation matrix from Chain-1 to Chain-2 is added. -* 2007/04/18: add options with TM-score normalized by average -* length, shorter length, or longer length of two -* structures. -* 2007/05/23: add additional output file 'TM.sup_all' for showing -* all atoms while 'TM.sup' is only for aligned atoms -* 2007/09/19: add a new feature alignment to deal with the problem -* of aligning fractional structures (e.g. protein -* interfaces). -************************************************************************* - - program compares - PARAMETER(nmax=5000) - PARAMETER(nmax2=10000) - - COMMON/BACKBONE/XA(3,nmax,0:1) - common/dpc/score(nmax,nmax),gap_open,invmap(nmax) - common/alignrst/invmap0(nmax) - common/length/nseq1,nseq2 - common/d0/d0,anseq - common/d0min/d0_min - common/d00/d00,d002 - - character*100 fnam,pdb(100),outname - character*3 aa(-1:20),aanam,ss1(nmax),ss2(nmax) - character*100 s,du - character*200 outnameall_tmp,outnameall - character seq1(0:nmax),seq2(0:nmax) - character aseq1(nmax2),aseq2(nmax2),aseq3(nmax2) - - dimension xx(nmax),yy(nmax),zz(nmax) - dimension m1(nmax),m2(nmax) - dimension xtm1(nmax),ytm1(nmax),ztm1(nmax) - dimension xtm2(nmax),ytm2(nmax),ztm2(nmax) - common/init/invmap_i(nmax) - - common/TM/TM,TMmax - common/n1n2/n1(nmax),n2(nmax) - common/d8/d8 - common/initial4/mm1(nmax),mm2(nmax) - -ccc RMSD: - double precision r_1(3,nmax),r_2(3,nmax),r_3(3,nmax),w(nmax) - double precision u(3,3),t(3),rms,drms !armsd is real - data w /nmax*1.0/ -ccc - - data aa/ 'BCK','GLY','ALA','SER','CYS','VAL','THR','ILE', - & 'PRO','MET','ASP','ASN','LEU', - & 'LYS','GLU','GLN','ARG', - & 'HIS','PHE','TYR','TRP','CYX'/ - character*1 slc(-1:20) - data slc/'X','G','A','S','C','V','T','I', - & 'P','M','D','N','L','K','E','Q','R', - & 'H','F','Y','W','C'/ - - call getarg(1,fnam) - if(fnam.eq.' '.or.fnam.eq.'?'.or.fnam.eq.'-h')then - write(*,*) - write(*,*)'Brief instruction for running TM-align program:' - write(*,*)'(For detail: Zhang & Skolnick, Nucl. Acid Res.', - & '2005 33, 2303)' - write(*,*) - write(*,*)'1. Align ''structure.pdb'' to ''target.pdb''' - write(*,*)' (By default, TM-score is normalized by the ', - & 'length of ''target.pdb'')' - write(*,*)' >TMalign structure.pdb target.pdb' - write(*,*) - write(*,*)'2. Run TM-align and output the superposition ', - & 'to ''TM.sup'' and ''TM.sup_all'':' - write(*,*)' >TMalign structure.pdb target.pdb -o TM.sup' - write(*,*)' To view the superimposed structures of the', - & ' aligned regions by rasmol:' - write(*,*)' >rasmol -script TM.sup)' - write(*,*)' To view the superimposed structures of all', - & ' regions by rasmol:' - write(*,*)' >rasmol -script TM.sup_all)' - write(*,*) - write(*,*)'3. If you want TM-score normalized by ', - & 'an assigned length, e.g. 100 aa:' - write(*,*)' >TMalign structure.pdb target.pdb -L 100' - write(*,*)' If you want TM-score normalized by the ', - & 'average length of two structures:' - write(*,*)' >TMalign structure.pdb target.pdb -a' - write(*,*)' If you want TM-score normalized by the ', - & 'shorter length of two structures:' - write(*,*)' >TMalign structure.pdb target.pdb -b' - write(*,*)' If you want TM-score normalized by the ', - & 'longer length of two structures:' - write(*,*)' >TMalign structure.pdb target.pdb -c' - write(*,*) -c write(*,*)'5. If you want to set a minimum cutoff for d0, ', -c & 'e.g. d0>3.0' -c write(*,*)' (By default d0>0.5, this option need ', -c & 'be considered only when L<35 aa)' -c write(*,*)' >TMalign structure.pdb target.pdb -dmin 3.0' -c write(*,*) - write(*,*)'(All above options does not change the ', - & 'final structure alignment result)' - write(*,*) - goto 9999 - endif - -******* options -----------> - m_out=-1 !decided output - m_fix=-1 !fixed length-scale only for output - m_ave=-1 !using average length - m_d0_min=-1 !diminum d0 for search - m_d0=-1 !given d0 for both search and output - narg=iargc() - i=0 - j=0 - 115 continue - i=i+1 - call getarg(i,fnam) - if(fnam.eq.'-o')then - m_out=1 - i=i+1 - call getarg(i,outname) - elseif(fnam.eq.'-L')then !change both L_all and d0 - m_fix=1 - i=i+1 - call getarg(i,fnam) - read(fnam,*)L_fix - elseif(fnam.eq.'-dmin')then - m_d0_min=1 - i=i+1 - call getarg(i,fnam) - read(fnam,*)d0_min_input - elseif(fnam.eq.'-d0')then - m_d0=1 - i=i+1 - call getarg(i,fnam) - read(fnam,*)d0_fix - elseif(fnam.eq.'-a')then ! this will change the superposed output but not the alignment - m_ave=1 - i=i+1 - elseif(fnam.eq.'-b')then - m_ave=2 - i=i+1 - elseif(fnam.eq.'-c')then - m_ave=3 - i=i+1 - else - j=j+1 - pdb(j)=fnam - endif - if(i.lt.narg)goto 115 - -ccccccccc read data from first CA file: - open(unit=10,file=pdb(1),status='old') - i=0 - do while (.true.) - read(10,9001,end=1010) s - if(i.gt.0.and.s(1:3).eq.'TER')goto 1010 - if(s(1:3).eq.'ATO')then - if(s(13:16).eq.'CA '.or.s(13:16).eq.' CA '.or - & .s(13:16).eq.' CA')then - if(s(17:17).eq.' '.or.s(17:17).eq.'A')then - i=i+1 - read(s,9000)du,aanam,du,mm1(i),du, - $ xa(1,i,0),xa(2,i,0),xa(3,i,0) - do j=-1,20 - if(aanam.eq.aa(j))seq1(i)=slc(j) - enddo - ss1(i)=aanam - if(i.ge.nmax)goto 1010 - endif - endif - endif - enddo - 1010 continue - 9000 format(A17,A3,A2,i4,A4,3F8.3) - 9001 format(A100) - close(10) - nseq1=i -c^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -ccccccccc read data from the second CA file: - open(unit=10,file=pdb(2),status='old') - i=0 - do while (.true.) - read(10,9001,end=1011) s - if(i.gt.0.and.s(1:3).eq.'TER')goto 1011 - if(s(1:3).eq.'ATO')then - if(s(13:16).eq.'CA '.or.s(13:16).eq.' CA '.or. - & s(13:16).eq.' CA')then - if(s(17:17).eq.' '.or.s(17:17).eq.'A')then - i=i+1 - read(s,9000)du,aanam,du,mm2(i),du, - $ xa(1,i,1),xa(2,i,1),xa(3,i,1) - do j=-1,20 - if(aanam.eq.aa(j))seq2(i)=slc(j) - enddo - ss2(i)=aanam - if(i.ge.nmax)goto 1011 - endif - endif - endif - enddo - 1011 continue - close(10) - nseq2=i -c^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -*!!! Scale of TM-score in search is based on the smaller protein ---------> - d0_min=0.5 - if(m_d0_min.eq.1)then - d0_min=d0_min_input !for search - endif - anseq_min=min(nseq1,nseq2) - anseq=anseq_min !length for defining TMscore in search - d8=1.5*anseq_min**0.3+3.5 !remove pairs with dis>d8 during search & final - if(anseq.gt.15)then - d0=1.24*(anseq-15)**(1.0/3.0)-1.8 !scale for defining TM-score - else - d0=d0_min - endif - if(d0.lt.d0_min)d0=d0_min - if(m_d0.eq.1)d0=d0_fix - d00=d0 !for quickly calculate TM-score in searching - if(d00.gt.8)d00=8 - if(d00.lt.4.5)d00=4.5 - d002=d00**2 - nseq=max(nseq1,nseq2) - do i=1,nseq - n1(i)=i - n2(i)=i - enddo - -***** do alignment ************************** - CALL super_align !to find invmap(j) - -************************************************************ -*** resuperpose to find residues of dis - n_al=0 - do j=1,nseq2 - if(invmap0(j).gt.0)then - i=invmap0(j) - n_al=n_al+1 - xtm1(n_al)=xa(1,i,0) - ytm1(n_al)=xa(2,i,0) - ztm1(n_al)=xa(3,i,0) - xtm2(n_al)=xa(1,j,1) - ytm2(n_al)=xa(2,j,1) - ztm2(n_al)=xa(3,j,1) - m1(n_al)=i !for recording residue order - m2(n_al)=j - endif - enddo - d0_input=d0 - call TMscore8(d0_input,n_al,xtm1,ytm1,ztm1,n1,n_al, - & xtm2,ytm2,ztm2,n2,TM,Rcomm,Lcomm) !TM-score with dis - d0_min=0.5 !for output - anseq=nseq2 !length for defining final TMscore - if(m_ave.eq.1)anseq=(nseq1+nseq2)/2.0 ! - if(m_ave.eq.2)anseq=min(nseq1,nseq2) - if(m_ave.eq.3)anseq=max(nseq1,nseq2) - if(anseq.lt.anseq_min)anseq=anseq_min - if(m_fix.eq.1)anseq=L_fix !input length - if(anseq.gt.15)then - d0=1.24*(anseq-15)**(1.0/3.0)-1.8 !scale for defining TM-score - else - d0=d0_min - endif - if(d0.lt.d0_min)d0=d0_min - if(m_d0.eq.1)d0=d0_fix - -*** remove dis>d8 in normal TM-score calculation for final report-----> - j=0 - n_eq=0 - do i=1,n_al - dis2=sqrt((xtm1(i)-xtm2(i))**2+(ytm1(i)-ytm2(i))**2+ - & (ztm1(i)-ztm2(i))**2) - if(dis2.le.d8)then - j=j+1 - xtm1(j)=xtm1(i) - ytm1(j)=ytm1(i) - ztm1(j)=ztm1(i) - xtm2(j)=xtm2(i) - ytm2(j)=ytm2(i) - ztm2(j)=ztm2(i) - m1(j)=m1(i) - m2(j)=m2(i) - if(ss1(m1(i)).eq.ss2(m2(i)))then - n_eq=n_eq+1 - endif - endif - enddo - seq_id=float(n_eq)/(n_al+0.00000001) - n8_al=j - d0_input=d0 - call TMscore(d0_input,n8_al,xtm1,ytm1,ztm1,n1,n8_al, - & xtm2,ytm2,ztm2,n2,TM8,Rcomm,Lcomm) !normal TMscore - rmsd8_al=Rcomm - TM8=TM8*n8_al/anseq !TM-score after cutoff - -********* for output summary ****************************** - write(*,*) - write(*,*)'*****************************************************', - & '*********************' - write(*,*)'* TM-align ', - & ' *' - write(*,*)'* A protein structural alignment algorithm based on T', - & 'M-score *' - write(*,*)'* Reference: Y. Zhang and J. Skolnick, Nucl. Acids Re', - & 's. 2005 33, 2302-9 *' - write(*,*)'* Comments on the program, please email to: yzhang@ku', - & '.edu *' - write(*,*)'*****************************************************', - & '*********************' - write(*,*) - write(*,101)pdb(1),nseq1 - 101 format('Chain 1:',A10,' Size=',I4) - write(*,102)pdb(2),nseq2,int(anseq) - 102 format('Chain 2:',A10,' Size=',I4, - & ' (TM-score is normalized by ',I4,')') - write(*,*) - write(*,103)n8_al,rmsd8_al,TM8,seq_id - 103 format('Aligned length=',I4,', RMSD=',f6.2, - & ', TM-score=',f7.5,', ID=',f5.3) - write(*,*) - -********* extract rotation matrix ------------> - L=0 - do i=1,n8_al - k=m1(i) - L=L+1 - r_1(1,L)=xa(1,k,0) - r_1(2,L)=xa(2,k,0) - r_1(3,L)=xa(3,k,0) - r_2(1,L)=xtm1(i) - r_2(2,L)=ytm1(i) - r_2(3,L)=ztm1(i) - enddo - if(L.gt.3)then - call u3b(w,r_1,r_2,L,1,rms,u,t,ier) !u rotate r_1 to r_2 - armsd=dsqrt(rms/L) - write(*,*)'-------- rotation matrix to rotate Chain-1 to ', - & 'Chain-2 ------' - write(*,*)'i t(i) u(i,1) u(i,2) ', - & ' u(i,3)' - do i=1,3 - write(*,204)i,t(i),u(i,1),u(i,2),u(i,3) - enddo -c do i=1,nseq1 -c ax=t(1)+u(1,1)*xa(1,i,0)+u(1,2)*xa(2,i,0)+u(1,3)*xa(3,i,0) -c ay=t(2)+u(2,1)*xa(1,i,0)+u(2,2)*xa(2,i,0)+u(2,3)*xa(3,i,0) -c az=t(3)+u(3,1)*xa(1,i,0)+u(3,2)*xa(2,i,0)+u(3,3)*xa(3,i,0) -c enddo - write(*,*) - endif - 204 format(I2,f18.10,f15.10,f15.10,f15.10) - -********* for output superposition ****************************** - if(m_out.eq.1)then - 1237 format('ATOM ',i5,' CA ',A3,I6,4X,3F8.3) - 1238 format('TER') - 1239 format('CONECT',I5,I5) - 900 format(A) - 901 format('select atomno=',I4) - 104 format('REMARK Chain 1:',A10,' Size=',I4) - 105 format('REMARK Chain 2:',A10,' Size=',I4, - & ' (TM-score is normalized by ',I4,')') - 106 format('REMARK Aligned length=',I4,', RMSD=',f6.2, - & ', TM-score=',f7.5,', ID=',f5.3) - OPEN(unit=7,file=outname,status='unknown') !pdb1.aln + pdb2.aln -*** script: - write(7,900)'load inline' - write(7,900)'select atomno<2000' - write(7,900)'wireframe .45' - write(7,900)'select none' - write(7,900)'select atomno>2000' - write(7,900)'wireframe .20' - write(7,900)'color white' - do i=1,n8_al - dis2=sqrt((xtm1(i)-xtm2(i))**2+ - & (ytm1(i)-ytm2(i))**2+(ztm1(i)-ztm2(i))**2) - if(dis2.le.5)then - write(7,901)m1(i) - write(7,900)'color red' - write(7,901)2000+m2(i) - write(7,900)'color red' - endif - enddo - write(7,900)'select all' - write(7,900)'exit' - write(7,104)pdb(1),nseq1 - write(7,105)pdb(2),nseq2,int(anseq) - write(7,106)n8_al,rmsd8_al,TM8,seq_id -*** chain1: - do i=1,n8_al - write(7,1237)m1(i),ss1(m1(i)),mm1(m1(i)), - & xtm1(i),ytm1(i),ztm1(i) - enddo - write(7,1238) !TER - do i=2,n8_al - write(7,1239)m1(i-1),m1(i) !connect atoms - enddo -*** chain2: - do i=1,n8_al - write(7,1237)2000+m2(i),ss2(m2(i)),mm2(m2(i)), - $ xtm2(i),ytm2(i),ztm2(i) - enddo - write(7,1238) - do i=2,n8_al - write(7,1239)2000+m2(i-1),2000+m2(i) - enddo - close(7) -ccc - k=0 - outnameall_tmp=outname//'_all' - outnameall='' - do i=1,200 - if(outnameall_tmp(i:i).ne.' ')then - k=k+1 - outnameall(k:k)=outnameall_tmp(i:i) - endif - enddo - OPEN(unit=8,file=outnameall,status='unknown') !pdb1.aln + pdb2.aln -*** script: - write(8,900)'load inline' - write(8,900)'select atomno<2000' - write(8,900)'wireframe .45' - write(8,900)'select none' - write(8,900)'select atomno>2000' - write(8,900)'wireframe .20' - write(8,900)'color white' - do i=1,n8_al - dis2=sqrt((xtm1(i)-xtm2(i))**2+ - & (ytm1(i)-ytm2(i))**2+(ztm1(i)-ztm2(i))**2) - if(dis2.le.5)then - write(8,901)m1(i) - write(8,900)'color red' - write(8,901)2000+m2(i) - write(8,900)'color red' - endif - enddo - write(8,900)'select all' - write(8,900)'exit' - write(8,104)pdb(1),nseq1 - write(8,105)pdb(2),nseq2,int(anseq) - write(8,106)n8_al,rmsd8_al,TM8,seq_id -*** chain1: - do i=1,nseq1 - ax=t(1)+u(1,1)*xa(1,i,0)+u(1,2)*xa(2,i,0)+u(1,3)*xa(3,i,0) - ay=t(2)+u(2,1)*xa(1,i,0)+u(2,2)*xa(2,i,0)+u(2,3)*xa(3,i,0) - az=t(3)+u(3,1)*xa(1,i,0)+u(3,2)*xa(2,i,0)+u(3,3)*xa(3,i,0) - write(8,1237)i,ss1(i),mm1(i),ax,ay,az - enddo - write(8,1238) !TER - do i=2,nseq1 - write(8,1239)i-1,i - enddo -*** chain2: - do i=1,nseq2 - write(8,1237)2000+i,ss2(i),mm2(i), - $ xa(1,i,1),xa(2,i,1),xa(3,i,1) - enddo - write(8,1238) - do i=2,nseq2 - write(8,1239)2000+i-1,2000+i - enddo - close(8) - endif -*^^^^^^^^^^^^^^^^^^ output finished ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -************ output aligned sequences ************************** - ii=0 - i1_old=1 - i2_old=1 - do i=1,n8_al - do j=i1_old,m1(i)-1 - ii=ii+1 - aseq1(ii)=seq1(j) - aseq2(ii)='-' - aseq3(ii)=' ' - enddo - do j=i2_old,m2(i)-1 - ii=ii+1 - aseq1(ii)='-' - aseq2(ii)=seq2(j) - aseq3(ii)=' ' - enddo - ii=ii+1 - aseq1(ii)=seq1(m1(i)) - aseq2(ii)=seq2(m2(i)) - dis2=sqrt((xtm1(i)-xtm2(i))**2+ - & (ytm1(i)-ytm2(i))**2+(ztm1(i)-ztm2(i))**2) - if(dis2.le.5)then - aseq3(ii)=':' - else - aseq3(ii)='.' - endif - i1_old=m1(i)+1 - i2_old=m2(i)+1 - enddo - do i=i1_old,nseq1 - ii=ii+1 - aseq1(ii)=seq1(i) - aseq2(ii)='-' - aseq3(ii)=' ' - enddo - do i=i2_old,nseq2 - ii=ii+1 - aseq1(ii)='-' - aseq2(ii)=seq2(i) - aseq3(ii)=' ' - enddo - write(*,50) - 50 format('(":" denotes the residue pairs of distance < 5.0 ', - & 'Angstrom)') - write(*,10)(aseq1(i),i=1,ii) - write(*,10)(aseq3(i),i=1,ii) - write(*,10)(aseq2(i),i=1,ii) - 10 format(10000A1) - write(*,*) - -c^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - 9999 END - -*********************************************************************** -*********************************************************************** -* Structure superposition -*********************************************************************** -*********************************************************************** -*********************************************************************** - SUBROUTINE super_align - PARAMETER(nmax=5000) - COMMON/BACKBONE/XA(3,nmax,0:1) - common/length/nseq1,nseq2 - common/dpc/score(nmax,nmax),gap_open,invmap(nmax) - common/alignrst/invmap0(nmax) - common/zscore/zrms,n_al,rmsd_al - common/TM/TM,TMmax - common/init/invmap_i(nmax) - dimension gapp(100) - - TMmax=0 - n_gapp=2 - gapp(1)=-0.6 - gapp(2)=0 - -c n_gapp=11 -c do i=1,n_gapp -c gapp(i)=-(n_gapp-i) -c enddo - -*11111111111111111111111111111111111111111111111111111111 -* get initial alignment from gapless threading -********************************************************** - call get_initial !gapless threading - do i=1,nseq2 - invmap(i)=invmap_i(i) !with highest zcore - enddo - call get_score !TM, matrix score(i,j) - if(TM.gt.TMmax)then - TMmax=TM - do j=1,nseq2 - invmap0(j)=invmap(j) - enddo - endif - -***************************************************************** -* initerative alignment, for different gap_open: -***************************************************************** - DO 111 i_gapp=1,n_gapp !different gap panalties - GAP_OPEN=gapp(i_gapp) !gap panalty - do 222 id=1,30 !maximum interation is 200 - call DP(NSEQ1,NSEQ2) !produce alignment invmap(j) -* Input: score(i,j), and gap_open -* Output: invmap(j) - - call get_score !calculate TM-score, score(i,j) -c record the best alignment in whole search ----------> - if(TM.gt.TMmax)then - TMmax=TM - do j=1,nseq2 - invmap0(j)=invmap(j) - enddo - endif - if(id.gt.1)then - diff=abs(TM-TM_old) - if(diff.lt.0.000001)goto 33 - endif - TM_old=TM - 222 continue - 33 continue - 111 continue - -*222222222222222222222222222222222222222222222222222222222 -* get initial alignment from secondary structure alignment -********************************************************** - call get_initial2 !DP for secondary structure - do i=1,nseq2 - invmap(i)=invmap_i(i) !with highest zcore - enddo - call get_score !TM, score(i,j) - if(TM.gt.TMmax)then - TMmax=TM - do j=1,nseq2 - invmap0(j)=invmap(j) - enddo - endif - -***************************************************************** -* initerative alignment, for different gap_open: -***************************************************************** - DO 1111 i_gapp=1,n_gapp !different gap panalties - GAP_OPEN=gapp(i_gapp) !gap panalty - do 2222 id=1,30 !maximum interation is 200 - call DP(NSEQ1,NSEQ2) !produce alignment invmap(j) -* Input: score(i,j), and gap_open -* Output: invmap(j) - - call get_score !calculate TM-score, score(i,j) -c write(*,21)gap_open,rmsd_al,n_al,TM -c record the best alignment in whole search ----------> - if(TM.gt.TMmax)then - TMmax=TM - do j=1,nseq2 - invmap0(j)=invmap(j) - enddo - endif - if(id.gt.1)then - diff=abs(TM-TM_old) - if(diff.lt.0.000001)goto 333 - endif - TM_old=TM - 2222 continue - 333 continue - 1111 continue - -*333333333333333333333333333333333333333333333333333333333333 -* get initial alignment from invmap0+SS -************************************************************* - call get_initial3 !invmap0+SS - do i=1,nseq2 - invmap(i)=invmap_i(i) !with highest zcore - enddo - call get_score !TM, score(i,j) - if(TM.gt.TMmax)then - TMmax=TM - do j=1,nseq2 - invmap0(j)=invmap(j) - enddo - endif - -***************************************************************** -* initerative alignment, for different gap_open: -***************************************************************** - DO 1110 i_gapp=1,n_gapp !different gap panalties - GAP_OPEN=gapp(i_gapp) !gap panalty - do 2220 id=1,30 !maximum interation is 200 - call DP(NSEQ1,NSEQ2) !produce alignment invmap(j) -* Input: score(i,j), and gap_open -* Output: invmap(j) - - call get_score !calculate TM-score, score(i,j) -c write(*,21)gap_open,rmsd_al,n_al,TM -c record the best alignment in whole search ----------> - if(TM.gt.TMmax)then - TMmax=TM - do j=1,nseq2 - invmap0(j)=invmap(j) - enddo - endif - if(id.gt.1)then - diff=abs(TM-TM_old) - if(diff.lt.0.000001)goto 330 - endif - TM_old=TM - 2220 continue - 330 continue - 1110 continue - -*444444444444444444444444444444444444444444444444444444444 -* get initial alignment of pieces from gapless threading -********************************************************** - call get_initial4 !gapless threading - do i=1,nseq2 - invmap(i)=invmap_i(i) !with highest zcore - enddo - call get_score !TM, matrix score(i,j) - if(TM.gt.TMmax)then - TMmax=TM - do j=1,nseq2 - invmap0(j)=invmap(j) - enddo - endif - -***************************************************************** -* initerative alignment, for different gap_open: -***************************************************************** - DO 44 i_gapp=2,n_gapp !different gap panalties - GAP_OPEN=gapp(i_gapp) !gap panalty - do 444 id=1,2 !maximum interation is 200 - call DP(NSEQ1,NSEQ2) !produce alignment invmap(j) -* Input: score(i,j), and gap_open -* Output: invmap(j) - - call get_score !calculate TM-score, score(i,j) -c record the best alignment in whole search ----------> - if(TM.gt.TMmax)then - TMmax=TM - do j=1,nseq2 - invmap0(j)=invmap(j) - enddo - endif - 444 continue - 44 continue - -c^^^^^^^^^^^^^^^ best alignment invmap0(j) found ^^^^^^^^^^^^^^^^^^ - RETURN - END - -************************************************************** -* get initial alignment invmap0(i) from gapless threading -************************************************************** - subroutine get_initial - PARAMETER(nmax=5000) - COMMON/BACKBONE/XA(3,nmax,0:1) - common/length/nseq1,nseq2 - common/dpc/score(nmax,nmax),gap_open,invmap(nmax) - common/alignrst/invmap0(nmax) - common/zscore/zrms,n_al,rmsd_al - common/TM/TM,TMmax - common/init/invmap_i(nmax) - - aL=min(nseq1,nseq2) - idel=aL/2.5 !minimum size of considered fragment - if(idel.le.5)idel=5 - n1=-nseq2+idel - n2=nseq1-idel - GL_max=0 - do ishift=n1,n2 - L=0 - do j=1,nseq2 - i=j+ishift - if(i.ge.1.and.i.le.nseq1)then - L=L+1 - invmap(j)=i - else - invmap(j)=-1 - endif - enddo - if(L.ge.idel)then - call get_GL(GL) - if(GL.gt.GL_max)then - GL_max=GL - do i=1,nseq2 - invmap_i(i)=invmap(i) - enddo - endif - endif - enddo - - return - end - -************************************************************** -* get initial alignment invmap0(i) from secondary structure -************************************************************** - subroutine get_initial2 - PARAMETER(nmax=5000) - COMMON/BACKBONE/XA(3,nmax,0:1) - common/length/nseq1,nseq2 - common/dpc/score(nmax,nmax),gap_open,invmap(nmax) - common/alignrst/invmap0(nmax) - common/zscore/zrms,n_al,rmsd_al - common/TM/TM,TMmax - common/sec/isec(nmax),jsec(nmax) - common/init/invmap_i(nmax) - -********** assign secondary structures *************** -c 1->coil, 2->helix, 3->turn, 4->strand - do i=1,nseq1 - isec(i)=1 - j1=i-2 - j2=i-1 - j3=i - j4=i+1 - j5=i+2 - if(j1.ge.1.and.j5.le.nseq1)then - dis13=diszy(0,j1,j3) - dis14=diszy(0,j1,j4) - dis15=diszy(0,j1,j5) - dis24=diszy(0,j2,j4) - dis25=diszy(0,j2,j5) - dis35=diszy(0,j3,j5) - isec(i)=make_sec(dis13,dis14,dis15,dis24,dis25,dis35) - endif - enddo - do i=1,nseq2 - jsec(i)=1 - j1=i-2 - j2=i-1 - j3=i - j4=i+1 - j5=i+2 - if(j1.ge.1.and.j5.le.nseq2)then - dis13=diszy(1,j1,j3) - dis14=diszy(1,j1,j4) - dis15=diszy(1,j1,j5) - dis24=diszy(1,j2,j4) - dis25=diszy(1,j2,j5) - dis35=diszy(1,j3,j5) - jsec(i)=make_sec(dis13,dis14,dis15,dis24,dis25,dis35) - endif - enddo - call smooth !smooth the assignment - -********** score matrix ************************** - do i=1,nseq1 - do j=1,nseq2 - if(isec(i).eq.jsec(j))then - score(i,j)=1 - else - score(i,j)=0 - endif - enddo - enddo - -********** find initial alignment: invmap(j) ************ - gap_open=-1.0 !should be -1 - call DP(NSEQ1,NSEQ2) !produce alignment invmap(j) - do i=1,nseq2 - invmap_i(i)=invmap(i) - enddo - -*^^^^^^^^^^^^ initial alignment done ^^^^^^^^^^^^^^^^^^^^^^ - return - end - -************************************************************** -* get initial alignment invmap0(i) from secondary structure -* and previous alignments -************************************************************** - subroutine get_initial3 - PARAMETER(nmax=5000) - COMMON/BACKBONE/XA(3,nmax,0:1) - common/length/nseq1,nseq2 - common/dpc/score(nmax,nmax),gap_open,invmap(nmax) - common/alignrst/invmap0(nmax) - common/zscore/zrms,n_al,rmsd_al - common/TM/TM,TMmax - common/sec/isec(nmax),jsec(nmax) - common/init/invmap_i(nmax) - -********** score matrix ************************** - do i=1,nseq2 - invmap(i)=invmap0(i) - enddo - call get_score1 !get score(i,j) using RMSD martix - do i=1,nseq1 - do j=1,nseq2 - if(isec(i).eq.jsec(j))then - score(i,j)=0.5+score(i,j) - else - score(i,j)=score(i,j) - endif - enddo - enddo - -********** find initial alignment: invmap(j) ************ - gap_open=-1.0 !should be -1 - call DP(NSEQ1,NSEQ2) !produce alignment invmap(j) - do i=1,nseq2 - invmap_i(i)=invmap(i) - enddo - -*^^^^^^^^^^^^ initial alignment done ^^^^^^^^^^^^^^^^^^^^^^ - return - end - -************************************************************** -* get initial alignment invmap0(i) from fragment gapless threading -************************************************************** - subroutine get_initial4 - PARAMETER(nmax=5000) - COMMON/BACKBONE/XA(3,nmax,0:1) - common/length/nseq1,nseq2 - common/dpc/score(nmax,nmax),gap_open,invmap(nmax) - common/alignrst/invmap0(nmax) - common/zscore/zrms,n_al,rmsd_al - common/TM/TM,TMmax - common/init/invmap_i(nmax) - common/initial4/mm1(nmax),mm2(nmax) - logical contin - - dimension ifr2(2,nmax,nmax),Lfr2(2,nmax),Lfr_max2(2),i_fr2(2) - dimension ifr(nmax) - dimension mm(2,nmax) - - fra_min=4 !>=4,minimum fragment for search - fra_min1=fra_min-1 !cutoff for shift, save time - dcu0=3.85 - dcu_min=3.65 - -ccc Find the smallest continuous fragments --------> - do i=1,nseq1 - mm(1,i)=mm1(i) - enddo - do i=1,nseq2 - mm(2,i)=mm2(i) - enddo - do k=1,2 - dcu=dcu0 - if(k.eq.1)then - nseq0=nseq1 - r_min=nseq1/3.0 !minimum fragment, in case too small protein - else - nseq0=nseq2 - r_min=nseq2/3.0 !minimum fragment, in case too small protein - endif - if(r_min.gt.fra_min)r_min=fra_min - 20 nfr=1 !number of fragments - j=1 !number of residue at nf-fragment - ifr2(k,nfr,j)=1 !what residue - Lfr2(k,nfr)=j !length of the fragment - do i=2,nseq0 - dis=diszy(k-1,i-1,i) - contin=.false. - if(dcu.gt.dcu0)then - if(dis.lt.dcu)then - if(dis.gt.dcu_min)then - contin=.true. - endif - endif - elseif(mm(k,i).eq.(mm(k,i-1)+1))then - if(dis.lt.dcu)then - if(dis.gt.dcu_min)then - contin=.true. - endif - endif - endif - if(contin)then - j=j+1 - ifr2(k,nfr,j)=i - Lfr2(k,nfr)=j - else - nfr=nfr+1 - j=1 - ifr2(k,nfr,j)=i - Lfr2(k,nfr)=j - endif - enddo - Lfr_max=0 - i_fr2(k)=1 !ID of the maximum piece - do i=1,nfr - if(Lfr_max.lt.Lfr2(k,i))then - Lfr_max=Lfr2(k,i) - i_fr2(k)=i - endif - enddo - if(Lfr_max.lt.r_min)then - dcu=1.1*dcu - goto 20 - endif - enddo -c^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -ccc select what piece will be used (this may araise ansysmetry, but -ccc only when L1=L2 and Lfr1=Lfr2 and L1 ne Lfr1 -ccc if L1=Lfr1 and L2=Lfr2 (normal proteins), it will be the same as initial1 - mark=1 - if(Lfr2(1,i_fr2(1)).lt.Lfr2(2,i_fr2(2)))then - mark=1 - elseif(Lfr2(1,i_fr2(1)).gt.Lfr2(2,i_fr2(2)))then - mark=2 - else !Lfr1=Lfr2 - if(nseq1.le.nseq2)then - mark=1 - else - mark=2 - endif - endif -ccc - L_fr=Lfr2(mark,i_fr2(mark)) - do i=1,L_fr - ifr(i)=ifr2(mark,i_fr2(mark),i) - enddo -ccc - if(mark.eq.1)then !non-redundant to get_initial1 - nseq0=nseq1 - else - nseq0=nseq2 - endif - if(L_fr.eq.nseq0)then - n1=int(nseq0*0.1) !0 - n2=int(nseq0*0.89) !2 - j=0 - do i=n1,n2 - j=j+1 - ifr(j)=ifr(n1+j) - enddo - L_fr=j - endif - -ccc get initial -------------> - if(mark.eq.1)then !nseq1 as the smallest one - nseq1_=L_fr - aL=min(nseq1_,nseq2) - idel=aL/2.5 !minimum size of considered fragment - if(idel.le.fra_min1)idel=fra_min1 - n1=-nseq2+idel !shift1 - n2=nseq1_-idel !shift2 - GL_max=0 - do ishift=n1,n2 - L=0 - do j=1,nseq2 - i=j+ishift - if(i.ge.1.and.i.le.nseq1_)then - L=L+1 - invmap(j)=ifr(i) - else - invmap(j)=-1 - endif - enddo - if(L.ge.idel)then - call get_GL(GL) - if(GL.gt.GL_max)then - GL_max=GL - do i=1,nseq2 - invmap_i(i)=invmap(i) - enddo - endif - endif - enddo - else !@@@@@@@@@@@@@@@@@@@@ - nseq2_=L_fr - aL=min(nseq1,nseq2_) - idel=aL/2.5 !minimum size of considered fragment - if(idel.le.fra_min1)idel=fra_min1 - n1=-nseq2_+idel - n2=nseq1-idel - GL_max=0 - do ishift=n1,n2 - L=0 - do j=1,nseq2 - invmap(j)=-1 - enddo - do j=1,nseq2_ - i=j+ishift - if(i.ge.1.and.i.le.nseq1)then - L=L+1 - invmap(ifr(j))=i - endif - enddo - if(L.ge.idel)then - call get_GL(GL) - if(GL.gt.GL_max)then - GL_max=GL - do i=1,nseq2 - invmap_i(i)=invmap(i) - enddo - endif - endif - enddo - endif - - return - end - -************************************************************** -* smooth the secondary structure assignment -************************************************************** - subroutine smooth - PARAMETER(nmax=5000) - common/sec/isec(nmax),jsec(nmax) - common/length/nseq1,nseq2 - -*** smooth single --------------> -*** --x-- => ----- - do i=1,nseq1 - if(isec(i).eq.2.or.isec(i).eq.4)then - j=isec(i) - if(isec(i-2).ne.j)then - if(isec(i-1).ne.j)then - if(isec(i+1).ne.j)then - if(isec(i+1).ne.j)then - isec(i)=1 - endif - endif - endif - endif - endif - enddo - do i=1,nseq2 - if(jsec(i).eq.2.or.jsec(i).eq.4)then - j=jsec(i) - if(jsec(i-2).ne.j)then - if(jsec(i-1).ne.j)then - if(jsec(i+1).ne.j)then - if(jsec(i+1).ne.j)then - jsec(i)=1 - endif - endif - endif - endif - endif - enddo - -*** smooth double --------------> -*** --xx-- => ------ - do i=1,nseq1 - if(isec(i).ne.2)then - if(isec(i+1).ne.2)then - if(isec(i+2).eq.2)then - if(isec(i+3).eq.2)then - if(isec(i+4).ne.2)then - if(isec(i+5).ne.2)then - isec(i+2)=1 - isec(i+3)=1 - endif - endif - endif - endif - endif - endif - - if(isec(i).ne.4)then - if(isec(i+1).ne.4)then - if(isec(i+2).eq.4)then - if(isec(i+3).eq.4)then - if(isec(i+4).ne.4)then - if(isec(i+5).ne.4)then - isec(i+2)=1 - isec(i+3)=1 - endif - endif - endif - endif - endif - endif - enddo - do i=1,nseq2 - if(jsec(i).ne.2)then - if(jsec(i+1).ne.2)then - if(jsec(i+2).eq.2)then - if(jsec(i+3).eq.2)then - if(jsec(i+4).ne.2)then - if(jsec(i+5).ne.2)then - jsec(i+2)=1 - jsec(i+3)=1 - endif - endif - endif - endif - endif - endif - - if(jsec(i).ne.4)then - if(jsec(i+1).ne.4)then - if(jsec(i+2).eq.4)then - if(jsec(i+3).eq.4)then - if(jsec(i+4).ne.4)then - if(jsec(i+5).ne.4)then - jsec(i+2)=1 - jsec(i+3)=1 - endif - endif - endif - endif - endif - endif - enddo - -*** connect --------------> -*** x-x => xxx - do i=1,nseq1 - if(isec(i).eq.2)then - if(isec(i+1).ne.2)then - if(isec(i+2).eq.2)then - isec(i+1)=2 - endif - endif - endif - - if(isec(i).eq.4)then - if(isec(i+1).ne.4)then - if(isec(i+2).eq.4)then - isec(i+1)=4 - endif - endif - endif - enddo - do i=1,nseq2 - if(jsec(i).eq.2)then - if(jsec(i+1).ne.2)then - if(jsec(i+2).eq.2)then - jsec(i+1)=2 - endif - endif - endif - - if(jsec(i).eq.4)then - if(jsec(i+1).ne.4)then - if(jsec(i+2).eq.4)then - jsec(i+1)=4 - endif - endif - endif - enddo - - return - end - -************************************************************* -* assign secondary structure: -************************************************************* - function diszy(i,i1,i2) - PARAMETER(nmax=5000) - COMMON/BACKBONE/XA(3,nmax,0:1) - diszy=sqrt((xa(1,i1,i)-xa(1,i2,i))**2 - & +(xa(2,i1,i)-xa(2,i2,i))**2 - & +(xa(3,i1,i)-xa(3,i2,i))**2) - return - end - -************************************************************* -* assign secondary structure: -************************************************************* - function make_sec(dis13,dis14,dis15,dis24,dis25,dis35) - make_sec=1 - delta=2.1 - if(abs(dis15-6.37).lt.delta)then - if(abs(dis14-5.18).lt.delta)then - if(abs(dis25-5.18).lt.delta)then - if(abs(dis13-5.45).lt.delta)then - if(abs(dis24-5.45).lt.delta)then - if(abs(dis35-5.45).lt.delta)then - make_sec=2 !helix - return - endif - endif - endif - endif - endif - endif - delta=1.42 - if(abs(dis15-13).lt.delta)then - if(abs(dis14-10.4).lt.delta)then - if(abs(dis25-10.4).lt.delta)then - if(abs(dis13-6.1).lt.delta)then - if(abs(dis24-6.1).lt.delta)then - if(abs(dis35-6.1).lt.delta)then - make_sec=4 !strand - return - endif - endif - endif - endif - endif - endif - if(dis15.lt.8)then - make_sec=3 - endif - - return - end - -**************************************************************** -* quickly calculate TM-score with given invmap(i) in 3 iterations -**************************************************************** - subroutine get_GL(GL) - PARAMETER(nmax=5000) - common/length/nseq1,nseq2 - COMMON/BACKBONE/XA(3,nmax,0:1) - common/dpc/score(nmax,nmax),gap_open,invmap(nmax) - common/zscore/zrms,n_al,rmsd_al - common/d0/d0,anseq - dimension xtm1(nmax),ytm1(nmax),ztm1(nmax) - dimension xtm2(nmax),ytm2(nmax),ztm2(nmax) - common/TM/TM,TMmax - common/n1n2/n1(nmax),n2(nmax) - common/d00/d00,d002 - - dimension xo1(nmax),yo1(nmax),zo1(nmax) - dimension xo2(nmax),yo2(nmax),zo2(nmax) - dimension dis2(nmax) - -ccc RMSD: - double precision r_1(3,nmax),r_2(3,nmax),r_3(3,nmax),w(nmax) - double precision u(3,3),t(3),rms,drms !armsd is real - data w /nmax*1.0/ -ccc - -c calculate RMSD between aligned structures and rotate the structures --> - n_al=0 - do j=1,NSEQ2 - i=invmap(j) !j aligned to i - if(i.gt.0)then - n_al=n_al+1 - r_1(1,n_al)=xa(1,i,0) - r_1(2,n_al)=xa(2,i,0) - r_1(3,n_al)=xa(3,i,0) - r_2(1,n_al)=xa(1,j,1) - r_2(2,n_al)=xa(2,j,1) - r_2(3,n_al)=xa(3,j,1) - xo1(n_al)=xa(1,i,0) - yo1(n_al)=xa(2,i,0) - zo1(n_al)=xa(3,i,0) - xo2(n_al)=xa(1,j,1) - yo2(n_al)=xa(2,j,1) - zo2(n_al)=xa(3,j,1) - endif - enddo - call u3b(w,r_1,r_2,n_al,1,rms,u,t,ier) !u rotate r_1 to r_2 - GL=0 - do i=1,n_al - xx=t(1)+u(1,1)*xo1(i)+u(1,2)*yo1(i)+u(1,3)*zo1(i) - yy=t(2)+u(2,1)*xo1(i)+u(2,2)*yo1(i)+u(2,3)*zo1(i) - zz=t(3)+u(3,1)*xo1(i)+u(3,2)*yo1(i)+u(3,3)*zo1(i) - dis2(i)=(xx-xo2(i))**2+(yy-yo2(i))**2+(zz-zo2(i))**2 - GL=GL+1/(1+dis2(i)/(d0**2)) - enddo -ccc for next iteration-------------> - d002t=d002 - 21 j=0 - do i=1,n_al - if(dis2(i).le.d002t)then - j=j+1 - r_1(1,j)=xo1(i) - r_1(2,j)=yo1(i) - r_1(3,j)=zo1(i) - r_2(1,j)=xo2(i) - r_2(2,j)=yo2(i) - r_2(3,j)=zo2(i) - endif - enddo - if(j.lt.3.and.n_al.gt.3)then - d002t=d002t+.5 - goto 21 - endif - L=j - call u3b(w,r_1,r_2,L,1,rms,u,t,ier) !u rotate r_1 to r_2 - G2=0 - do i=1,n_al - xx=t(1)+u(1,1)*xo1(i)+u(1,2)*yo1(i)+u(1,3)*zo1(i) - yy=t(2)+u(2,1)*xo1(i)+u(2,2)*yo1(i)+u(2,3)*zo1(i) - zz=t(3)+u(3,1)*xo1(i)+u(3,2)*yo1(i)+u(3,3)*zo1(i) - dis2(i)=(xx-xo2(i))**2+(yy-yo2(i))**2+(zz-zo2(i))**2 - G2=G2+1/(1+dis2(i)/(d0**2)) - enddo -ccc for next iteration-------------> - d002t=d002+1 - 22 j=0 - do i=1,n_al - if(dis2(i).le.d002t)then - j=j+1 - r_1(1,j)=xo1(i) - r_1(2,j)=yo1(i) - r_1(3,j)=zo1(i) - r_2(1,j)=xo2(i) - r_2(2,j)=yo2(i) - r_2(3,j)=zo2(i) - endif - enddo - if(j.lt.3.and.n_al.gt.3)then - d002t=d002t+.5 - goto 22 - endif - L=j - call u3b(w,r_1,r_2,L,1,rms,u,t,ier) !u rotate r_1 to r_2 - G3=0 - do i=1,n_al - xx=t(1)+u(1,1)*xo1(i)+u(1,2)*yo1(i)+u(1,3)*zo1(i) - yy=t(2)+u(2,1)*xo1(i)+u(2,2)*yo1(i)+u(2,3)*zo1(i) - zz=t(3)+u(3,1)*xo1(i)+u(3,2)*yo1(i)+u(3,3)*zo1(i) - dis2(i)=(xx-xo2(i))**2+(yy-yo2(i))**2+(zz-zo2(i))**2 - G3=G3+1/(1+dis2(i)/(d0**2)) - enddo - if(G2.gt.GL)GL=G2 - if(G3.gt.GL)GL=G3 - -c^^^^^^^^^^^^^^^^ GL done ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - return - end - -**************************************************************** -* with invmap(i) calculate TM-score and martix score(i,j) for rotation -**************************************************************** - subroutine get_score - PARAMETER(nmax=5000) - common/length/nseq1,nseq2 - COMMON/BACKBONE/XA(3,nmax,0:1) - common/dpc/score(nmax,nmax),gap_open,invmap(nmax) - common/zscore/zrms,n_al,rmsd_al - common/d0/d0,anseq - dimension xtm1(nmax),ytm1(nmax),ztm1(nmax) - dimension xtm2(nmax),ytm2(nmax),ztm2(nmax) - common/TM/TM,TMmax - common/n1n2/n1(nmax),n2(nmax) - -ccc RMSD: - double precision r_1(3,nmax),r_2(3,nmax),r_3(3,nmax),w(nmax) - double precision u(3,3),t(3),rms,drms !armsd is real - data w /nmax*1.0/ -ccc - -c calculate RMSD between aligned structures and rotate the structures --> - n_al=0 - do j=1,NSEQ2 - i=invmap(j) !j aligned to i - if(i.gt.0)then - n_al=n_al+1 -ccc for TM-score: - xtm1(n_al)=xa(1,i,0) !for TM-score - ytm1(n_al)=xa(2,i,0) - ztm1(n_al)=xa(3,i,0) - xtm2(n_al)=xa(1,j,1) - ytm2(n_al)=xa(2,j,1) - ztm2(n_al)=xa(3,j,1) -ccc for rotation matrix: - r_1(1,n_al)=xa(1,i,0) - r_1(2,n_al)=xa(2,i,0) - r_1(3,n_al)=xa(3,i,0) - endif - enddo -*** calculate TM-score for the given alignment-----------> - d0_input=d0 - call TMscore8_search(d0_input,n_al,xtm1,ytm1,ztm1,n1, - & n_al,xtm2,ytm2,ztm2,n2,TM,Rcomm,Lcomm) !simplified search engine - TM=TM*n_al/anseq !TM-score -*** calculate score matrix score(i,j)------------------> - do i=1,n_al - r_2(1,i)=xtm1(i) - r_2(2,i)=ytm1(i) - r_2(3,i)=ztm1(i) - enddo - call u3b(w,r_1,r_2,n_al,1,rms,u,t,ier) !u rotate r_1 to r_2 - do i=1,nseq1 - xx=t(1)+u(1,1)*xa(1,i,0)+u(1,2)*xa(2,i,0)+u(1,3)*xa(3,i,0) - yy=t(2)+u(2,1)*xa(1,i,0)+u(2,2)*xa(2,i,0)+u(2,3)*xa(3,i,0) - zz=t(3)+u(3,1)*xa(1,i,0)+u(3,2)*xa(2,i,0)+u(3,3)*xa(3,i,0) - do j=1,nseq2 - dd=(xx-xa(1,j,1))**2+(yy-xa(2,j,1))**2+(zz-xa(3,j,1))**2 - score(i,j)=1/(1+dd/d0**2) - enddo - enddo - -c^^^^^^^^^^^^^^^^ score(i,j) done ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - return - end - -**************************************************************** -* with invmap(i) calculate score(i,j) using RMSD rotation -**************************************************************** - subroutine get_score1 - PARAMETER(nmax=5000) - common/length/nseq1,nseq2 - COMMON/BACKBONE/XA(3,nmax,0:1) - common/dpc/score(nmax,nmax),gap_open,invmap(nmax) - common/zscore/zrms,n_al,rmsd_al - common/d0/d0,anseq - common/d0min/d0_min - dimension xtm1(nmax),ytm1(nmax),ztm1(nmax) - dimension xtm2(nmax),ytm2(nmax),ztm2(nmax) - common/TM/TM,TMmax - common/n1n2/n1(nmax),n2(nmax) - -ccc RMSD: - double precision r_1(3,nmax),r_2(3,nmax),r_3(3,nmax),w(nmax) - double precision u(3,3),t(3),rms,drms !armsd is real - data w /nmax*1.0/ -ccc - -c calculate RMSD between aligned structures and rotate the structures --> - n_al=0 - do j=1,NSEQ2 - i=invmap(j) !j aligned to i - if(i.gt.0)then - n_al=n_al+1 -ccc for rotation matrix: - r_1(1,n_al)=xa(1,i,0) - r_1(2,n_al)=xa(2,i,0) - r_1(3,n_al)=xa(3,i,0) - r_2(1,n_al)=xa(1,j,1) - r_2(2,n_al)=xa(2,j,1) - r_2(3,n_al)=xa(3,j,1) - endif - enddo -*** calculate score matrix score(i,j)------------------> - call u3b(w,r_1,r_2,n_al,1,rms,u,t,ier) !u rotate r_1 to r_2 - d01=d0+1.5 - if(d01.lt.d0_min)d01=d0_min - d02=d01*d01 - do i=1,nseq1 - xx=t(1)+u(1,1)*xa(1,i,0)+u(1,2)*xa(2,i,0)+u(1,3)*xa(3,i,0) - yy=t(2)+u(2,1)*xa(1,i,0)+u(2,2)*xa(2,i,0)+u(2,3)*xa(3,i,0) - zz=t(3)+u(3,1)*xa(1,i,0)+u(3,2)*xa(2,i,0)+u(3,3)*xa(3,i,0) - do j=1,nseq2 - dd=(xx-xa(1,j,1))**2+(yy-xa(2,j,1))**2+(zz-xa(3,j,1))**2 - score(i,j)=1/(1+dd/d02) - enddo - enddo - -c^^^^^^^^^^^^^^^^ score(i,j) done ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - return - end - - -************************************************************************* -************************************************************************* -* This is a subroutine to compare two structures and find the -* superposition that has the maximum TM-score. -* -* L1--Length of the first structure -* (x1(i),y1(i),z1(i))--coordinates of i'th residue at the first structure -* n1(i)--Residue sequence number of i'th residue at the first structure -* L2--Length of the second structure -* (x2(i),y2(i),z2(i))--coordinates of i'th residue at the second structure -* n2(i)--Residue sequence number of i'th residue at the second structure -* TM--TM-score of the comparison -* Rcomm--RMSD of two structures in the common aligned residues -* Lcomm--Length of the common aligned regions -* -* Note: -* 1, Always put native as the second structure, by which TM-score -* is normalized. -* 2, The returned (x1(i),y1(i),z1(i)) are the rotated structure after -* TM-score superposition. -************************************************************************* -************************************************************************* -*** dis<8, simplified search engine - subroutine TMscore8_search(dx,L1,x1,y1,z1,n1,L2,x2,y2,z2,n2, - & TM,Rcomm,Lcomm) - PARAMETER(nmax=5000) - common/stru/xt(nmax),yt(nmax),zt(nmax),xb(nmax),yb(nmax),zb(nmax) - common/nres/nresA(nmax),nresB(nmax),nseqA,nseqB - common/para/d,d0 - common/d0min/d0_min - common/align/n_ali,iA(nmax),iB(nmax) - common/nscore/i_ali(nmax),n_cut ![1,n_ali],align residues for the score - dimension k_ali(nmax),k_ali0(nmax) - dimension L_ini(100),iq(nmax) - common/scores/score - double precision score,score_max - dimension xa(nmax),ya(nmax),za(nmax) - dimension iL0(nmax) - - dimension x1(nmax),y1(nmax),z1(nmax),n1(nmax) - dimension x2(nmax),y2(nmax),z2(nmax),n2(nmax) - -ccc RMSD: - double precision r_1(3,nmax),r_2(3,nmax),r_3(3,nmax),w(nmax) - double precision u(3,3),t(3),rms,drms !armsd is real - data w /nmax*1.0/ -ccc - -********* convert input data *************************** -* because L1=L2 in this special case----------> - nseqA=L1 - nseqB=L2 - do i=1,nseqA - xa(i)=x1(i) - ya(i)=y1(i) - za(i)=z1(i) - nresA(i)=n1(i) - xb(i)=x2(i) - yb(i)=y2(i) - zb(i)=z2(i) - nresB(i)=n2(i) - iA(i)=i - iB(i)=i - enddo - n_ali=L1 !number of aligned residues - Lcomm=L1 - -************///// -* parameters: -***************** -*** d0-------------> - d0=dx - if(d0.lt.d0_min)d0=d0_min -*** d0_search -----> - d0_search=d0 - if(d0_search.gt.8)d0_search=8 - if(d0_search.lt.4.5)d0_search=4.5 -*** iterative parameters -----> - n_it=20 !maximum number of iterations - d_output=5 !for output alignment - n_init_max=6 !maximum number of L_init - n_init=0 - L_ini_min=4 - if(n_ali.lt.4)L_ini_min=n_ali - do i=1,n_init_max-1 - n_init=n_init+1 - L_ini(n_init)=n_ali/2**(n_init-1) - if(L_ini(n_init).le.L_ini_min)then - L_ini(n_init)=L_ini_min - goto 402 - endif - enddo - n_init=n_init+1 - L_ini(n_init)=L_ini_min - 402 continue - -****************************************************************** -* find the maximum score starting from local structures superposition -****************************************************************** - score_max=-1 !TM-score - do 333 i_init=1,n_init - L_init=L_ini(i_init) - iL_max=n_ali-L_init+1 - k=0 - do i=1,iL_max,40 !this is the simplification! - k=k+1 - iL0(k)=i - enddo - if(iL0(k).lt.iL_max)then - k=k+1 - iL0(k)=iL_max - endif - n_shift=k - do 300 i_shift=1,n_shift - iL=iL0(i_shift) - LL=0 - ka=0 - do i=1,L_init - k=iL+i-1 ![1,n_ali] common aligned - r_1(1,i)=xa(iA(k)) - r_1(2,i)=ya(iA(k)) - r_1(3,i)=za(iA(k)) - r_2(1,i)=xb(iB(k)) - r_2(2,i)=yb(iB(k)) - r_2(3,i)=zb(iB(k)) - LL=LL+1 - ka=ka+1 - k_ali(ka)=k - enddo - call u3b(w,r_1,r_2,LL,1,rms,u,t,ier) !u rotate r_1 to r_2 - if(i_init.eq.1)then !global superposition - armsd=dsqrt(rms/LL) - Rcomm=armsd - endif - do j=1,nseqA - xt(j)=t(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) - yt(j)=t(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) - zt(j)=t(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) - enddo - d=d0_search-1 - call score_fun8 !init, get scores, n_cut+i_ali(i) for iteration - if(score_max.lt.score)then - score_max=score - ka0=ka - do i=1,ka0 - k_ali0(i)=k_ali(i) - enddo - endif -*** iteration for extending ----------------------------------> - d=d0_search+1 - do 301 it=1,n_it - LL=0 - ka=0 - do i=1,n_cut - m=i_ali(i) ![1,n_ali] - r_1(1,i)=xa(iA(m)) - r_1(2,i)=ya(iA(m)) - r_1(3,i)=za(iA(m)) - r_2(1,i)=xb(iB(m)) - r_2(2,i)=yb(iB(m)) - r_2(3,i)=zb(iB(m)) - ka=ka+1 - k_ali(ka)=m - LL=LL+1 - enddo - call u3b(w,r_1,r_2,LL,1,rms,u,t,ier) !u rotate r_1 to r_2 - do j=1,nseqA - xt(j)=t(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) - yt(j)=t(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) - zt(j)=t(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) - enddo - call score_fun8 !get scores, n_cut+i_ali(i) for iteration - if(score_max.lt.score)then - score_max=score - ka0=ka - do i=1,ka - k_ali0(i)=k_ali(i) - enddo - endif - if(it.eq.n_it)goto 302 - if(n_cut.eq.ka)then - neq=0 - do i=1,n_cut - if(i_ali(i).eq.k_ali(i))neq=neq+1 - enddo - if(n_cut.eq.neq)goto 302 - endif - 301 continue !for iteration - 302 continue - 300 continue !for shift - 333 continue !for initial length, L_ali/M - -******** return the final rotation **************** - LL=0 - do i=1,ka0 - m=k_ali0(i) !record of the best alignment - r_1(1,i)=xa(iA(m)) - r_1(2,i)=ya(iA(m)) - r_1(3,i)=za(iA(m)) - r_2(1,i)=xb(iB(m)) - r_2(2,i)=yb(iB(m)) - r_2(3,i)=zb(iB(m)) - LL=LL+1 - enddo - call u3b(w,r_1,r_2,LL,1,rms,u,t,ier) !u rotate r_1 to r_2 - do j=1,nseqA - x1(j)=t(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) - y1(j)=t(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) - z1(j)=t(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) - enddo - TM=score_max - -c^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - return - END - -************************************************************************* -************************************************************************* -* This is a subroutine to compare two structures and find the -* superposition that has the maximum TM-score. -* -* L1--Length of the first structure -* (x1(i),y1(i),z1(i))--coordinates of i'th residue at the first structure -* n1(i)--Residue sequence number of i'th residue at the first structure -* L2--Length of the second structure -* (x2(i),y2(i),z2(i))--coordinates of i'th residue at the second structure -* n2(i)--Residue sequence number of i'th residue at the second structure -* TM--TM-score of the comparison -* Rcomm--RMSD of two structures in the common aligned residues -* Lcomm--Length of the common aligned regions -* -* Note: -* 1, Always put native as the second structure, by which TM-score -* is normalized. -* 2, The returned (x1(i),y1(i),z1(i)) are the rotated structure after -* TM-score superposition. -************************************************************************* -************************************************************************* -*** dis<8, but same search engine - subroutine TMscore8(dx,L1,x1,y1,z1,n1,L2,x2,y2,z2,n2, - & TM,Rcomm,Lcomm) - PARAMETER(nmax=5000) - common/stru/xt(nmax),yt(nmax),zt(nmax),xb(nmax),yb(nmax),zb(nmax) - common/nres/nresA(nmax),nresB(nmax),nseqA,nseqB - common/para/d,d0 - common/d0min/d0_min - common/align/n_ali,iA(nmax),iB(nmax) - common/nscore/i_ali(nmax),n_cut ![1,n_ali],align residues for the score - dimension k_ali(nmax),k_ali0(nmax) - dimension L_ini(100),iq(nmax) - common/scores/score - double precision score,score_max - dimension xa(nmax),ya(nmax),za(nmax) - - dimension x1(nmax),y1(nmax),z1(nmax),n1(nmax) - dimension x2(nmax),y2(nmax),z2(nmax),n2(nmax) - -ccc RMSD: - double precision r_1(3,nmax),r_2(3,nmax),r_3(3,nmax),w(nmax) - double precision u(3,3),t(3),rms,drms !armsd is real - data w /nmax*1.0/ -ccc - -********* convert input data *************************** -* because L1=L2 in this special case----------> - nseqA=L1 - nseqB=L2 - do i=1,nseqA - xa(i)=x1(i) - ya(i)=y1(i) - za(i)=z1(i) - nresA(i)=n1(i) - xb(i)=x2(i) - yb(i)=y2(i) - zb(i)=z2(i) - nresB(i)=n2(i) - iA(i)=i - iB(i)=i - enddo - n_ali=L1 !number of aligned residues - Lcomm=L1 - -************///// -* parameters: -***************** -*** d0-------------> - d0=dx - if(d0.lt.d0_min)d0=d0_min -*** d0_search -----> - d0_search=d0 - if(d0_search.gt.8)d0_search=8 - if(d0_search.lt.4.5)d0_search=4.5 -*** iterative parameters -----> - n_it=20 !maximum number of iterations - d_output=5 !for output alignment - n_init_max=6 !maximum number of L_init - n_init=0 - L_ini_min=4 - if(n_ali.lt.4)L_ini_min=n_ali - do i=1,n_init_max-1 - n_init=n_init+1 - L_ini(n_init)=n_ali/2**(n_init-1) - if(L_ini(n_init).le.L_ini_min)then - L_ini(n_init)=L_ini_min - goto 402 - endif - enddo - n_init=n_init+1 - L_ini(n_init)=L_ini_min - 402 continue - -****************************************************************** -* find the maximum score starting from local structures superposition -****************************************************************** - score_max=-1 !TM-score - do 333 i_init=1,n_init - L_init=L_ini(i_init) - iL_max=n_ali-L_init+1 - do 300 iL=1,iL_max !on aligned residues, [1,nseqA] - LL=0 - ka=0 - do i=1,L_init - k=iL+i-1 ![1,n_ali] common aligned - r_1(1,i)=xa(iA(k)) - r_1(2,i)=ya(iA(k)) - r_1(3,i)=za(iA(k)) - r_2(1,i)=xb(iB(k)) - r_2(2,i)=yb(iB(k)) - r_2(3,i)=zb(iB(k)) - LL=LL+1 - ka=ka+1 - k_ali(ka)=k - enddo - call u3b(w,r_1,r_2,LL,1,rms,u,t,ier) !u rotate r_1 to r_2 - if(i_init.eq.1)then !global superposition - armsd=dsqrt(rms/LL) - Rcomm=armsd - endif - do j=1,nseqA - xt(j)=t(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) - yt(j)=t(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) - zt(j)=t(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) - enddo - d=d0_search-1 - call score_fun8 !init, get scores, n_cut+i_ali(i) for iteration - if(score_max.lt.score)then - score_max=score - ka0=ka - do i=1,ka0 - k_ali0(i)=k_ali(i) - enddo - endif -*** iteration for extending ----------------------------------> - d=d0_search+1 - do 301 it=1,n_it - LL=0 - ka=0 - do i=1,n_cut - m=i_ali(i) ![1,n_ali] - r_1(1,i)=xa(iA(m)) - r_1(2,i)=ya(iA(m)) - r_1(3,i)=za(iA(m)) - r_2(1,i)=xb(iB(m)) - r_2(2,i)=yb(iB(m)) - r_2(3,i)=zb(iB(m)) - ka=ka+1 - k_ali(ka)=m - LL=LL+1 - enddo - call u3b(w,r_1,r_2,LL,1,rms,u,t,ier) !u rotate r_1 to r_2 - do j=1,nseqA - xt(j)=t(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) - yt(j)=t(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) - zt(j)=t(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) - enddo - call score_fun8 !get scores, n_cut+i_ali(i) for iteration - if(score_max.lt.score)then - score_max=score - ka0=ka - do i=1,ka - k_ali0(i)=k_ali(i) - enddo - endif - if(it.eq.n_it)goto 302 - if(n_cut.eq.ka)then - neq=0 - do i=1,n_cut - if(i_ali(i).eq.k_ali(i))neq=neq+1 - enddo - if(n_cut.eq.neq)goto 302 - endif - 301 continue !for iteration - 302 continue - 300 continue !for shift - 333 continue !for initial length, L_ali/M - -******** return the final rotation **************** - LL=0 - do i=1,ka0 - m=k_ali0(i) !record of the best alignment - r_1(1,i)=xa(iA(m)) - r_1(2,i)=ya(iA(m)) - r_1(3,i)=za(iA(m)) - r_2(1,i)=xb(iB(m)) - r_2(2,i)=yb(iB(m)) - r_2(3,i)=zb(iB(m)) - LL=LL+1 - enddo - call u3b(w,r_1,r_2,LL,1,rms,u,t,ier) !u rotate r_1 to r_2 - do j=1,nseqA - x1(j)=t(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) - y1(j)=t(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) - z1(j)=t(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) - enddo - TM=score_max - -c^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - return - END - -ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc -c 1, collect those residues with dis - nseqA=L1 - nseqB=L2 - do i=1,nseqA - xa(i)=x1(i) - ya(i)=y1(i) - za(i)=z1(i) - nresA(i)=n1(i) - xb(i)=x2(i) - yb(i)=y2(i) - zb(i)=z2(i) - nresB(i)=n2(i) - iA(i)=i - iB(i)=i - enddo - n_ali=L1 !number of aligned residues - Lcomm=L1 - -************///// -* parameters: -***************** -*** d0-------------> -c d0=1.24*(nseqB-15)**(1.0/3.0)-1.8 - d0=dx - if(d0.lt.d0_min)d0=d0_min -*** d0_search -----> - d0_search=d0 - if(d0_search.gt.8)d0_search=8 - if(d0_search.lt.4.5)d0_search=4.5 -*** iterative parameters -----> - n_it=20 !maximum number of iterations - d_output=5 !for output alignment - n_init_max=6 !maximum number of L_init - n_init=0 - L_ini_min=4 - if(n_ali.lt.4)L_ini_min=n_ali - do i=1,n_init_max-1 - n_init=n_init+1 - L_ini(n_init)=n_ali/2**(n_init-1) - if(L_ini(n_init).le.L_ini_min)then - L_ini(n_init)=L_ini_min - goto 402 - endif - enddo - n_init=n_init+1 - L_ini(n_init)=L_ini_min - 402 continue - -****************************************************************** -* find the maximum score starting from local structures superposition -****************************************************************** - score_max=-1 !TM-score - do 333 i_init=1,n_init - L_init=L_ini(i_init) - iL_max=n_ali-L_init+1 - do 300 iL=1,iL_max !on aligned residues, [1,nseqA] - LL=0 - ka=0 - do i=1,L_init - k=iL+i-1 ![1,n_ali] common aligned - r_1(1,i)=xa(iA(k)) - r_1(2,i)=ya(iA(k)) - r_1(3,i)=za(iA(k)) - r_2(1,i)=xb(iB(k)) - r_2(2,i)=yb(iB(k)) - r_2(3,i)=zb(iB(k)) - LL=LL+1 - ka=ka+1 - k_ali(ka)=k - enddo - call u3b(w,r_1,r_2,LL,1,rms,u,t,ier) !u rotate r_1 to r_2 - if(i_init.eq.1)then !global superposition - armsd=dsqrt(rms/LL) - Rcomm=armsd - endif - do j=1,nseqA - xt(j)=t(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) - yt(j)=t(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) - zt(j)=t(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) - enddo - d=d0_search-1 - call score_fun !init, get scores, n_cut+i_ali(i) for iteration - if(score_max.lt.score)then - score_max=score - ka0=ka - do i=1,ka0 - k_ali0(i)=k_ali(i) - enddo - endif -*** iteration for extending ----------------------------------> - d=d0_search+1 - do 301 it=1,n_it - LL=0 - ka=0 - do i=1,n_cut - m=i_ali(i) ![1,n_ali] - r_1(1,i)=xa(iA(m)) - r_1(2,i)=ya(iA(m)) - r_1(3,i)=za(iA(m)) - r_2(1,i)=xb(iB(m)) - r_2(2,i)=yb(iB(m)) - r_2(3,i)=zb(iB(m)) - ka=ka+1 - k_ali(ka)=m - LL=LL+1 - enddo - call u3b(w,r_1,r_2,LL,1,rms,u,t,ier) !u rotate r_1 to r_2 - do j=1,nseqA - xt(j)=t(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) - yt(j)=t(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) - zt(j)=t(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) - enddo - call score_fun !get scores, n_cut+i_ali(i) for iteration - if(score_max.lt.score)then - score_max=score - ka0=ka - do i=1,ka - k_ali0(i)=k_ali(i) - enddo - endif - if(it.eq.n_it)goto 302 - if(n_cut.eq.ka)then - neq=0 - do i=1,n_cut - if(i_ali(i).eq.k_ali(i))neq=neq+1 - enddo - if(n_cut.eq.neq)goto 302 - endif - 301 continue !for iteration - 302 continue - 300 continue !for shift - 333 continue !for initial length, L_ali/M - -******** return the final rotation **************** - LL=0 - do i=1,ka0 - m=k_ali0(i) !record of the best alignment - r_1(1,i)=xa(iA(m)) - r_1(2,i)=ya(iA(m)) - r_1(3,i)=za(iA(m)) - r_2(1,i)=xb(iB(m)) - r_2(2,i)=yb(iB(m)) - r_2(3,i)=zb(iB(m)) - LL=LL+1 - enddo - call u3b(w,r_1,r_2,LL,1,rms,u,t,ier) !u rotate r_1 to r_2 - do j=1,nseqA - x1(j)=t(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) - y1(j)=t(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) - z1(j)=t(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) - enddo - TM=score_max - -c^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - return - END - -ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc -c 1, collect those residues with dis -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "dp_lib_header.h" -#include "define_header.h" - - -int aln_compare ( int argc, char *argv[]) - { - int a, b, c,f; - - Alignment *A, *B; - Sequence *SA, *SB, *TOT_SEQ=NULL; - Sequence *defined_residueA; - Sequence *defined_residueB; - char **seq_list; - int n_seq_file; - - - Sequence *S=NULL; - Structure *ST=NULL; - Result *R=NULL; - char *buf1; - char *buf2; - -/*PARAMETERS*/ - char ***grep_list; - int n_greps; - char compare_mode[STRING]; - char sim_aln[STRING]; - char *alignment1_file; - char *alignment2_file; - - char *pep1_file; - char *pep2_file; - - char io_format[STRING]; - - int n_structure; - char **struct_file; - char **struct_format; - int *n_symbol; - char ***symbol_list; - int pep_compare; - int aln_compare; - int count; - int output_aln; - int output_aln_threshold; - char output_aln_file[LONG_STRING]; - char output_aln_format[LONG_STRING]; - char output_aln_modif[LONG_STRING]; -/*LIST VARIABLES*/ - Constraint_list *CL_A; - Constraint_list *CL_B; - CLIST_TYPE *clist_entry; - int pos_in_clist; - Sequence *CLS; - - -/*Column Comparison Variables*/ - int **posA; - int **posB; - int **seq_cache; - int is_same; - int n; -/*RESULTS_VARIABLES*/ - int **tot_count; - int **pos_count; - int ***pw_tot_count; - int ***pw_pos_count; - int *glob; - int **pw_glob; - int s1, r1, s2, r2; -/*IO VARIABLES*/ - int n_categories; - char ***category; - char *category_list; - int *n_sub_categories; - char sep_l; - char sep_r; - char io_file[STRING]; - FILE *fp; - int **aln_output_count; - int **aln_output_tot; - -/*Sims VARIABLES*/ - float **sim; - float **sim_param; - char sim_matrix[STRING]; - - int sim_n_categories; - char ***sim_category; - char *sim_category_list; - int *sim_n_sub_categories; - - - if ( argc==1|| strm6 ( argv[1], "h", "-h", "help", "-help", "-man", "?")) - { - output_informations(); - } - - argv=standard_initialisation (argv, &argc); -/*Declarations and Initializations*/ - alignment1_file=vcalloc ( LONG_STRING, sizeof (char)); - alignment2_file=vcalloc ( LONG_STRING, sizeof (char)); - - pep1_file=vcalloc ( LONG_STRING, sizeof (char)); - pep2_file=vcalloc ( LONG_STRING, sizeof (char)); - - - sprintf (compare_mode, "sp"); - count=0; - pep_compare=0; - aln_compare=1; - - - grep_list=vcalloc ( STRING, sizeof (char**)); - for ( a=0; a< STRING; a++)grep_list[a]=declare_char (3, STRING); - n_greps=0; - - n_structure=0; - struct_file=declare_char ( MAX_N_STRUC, LONG_STRING); - struct_format=declare_char (MAX_N_STRUC, STRING); - - n_symbol=vcalloc ( MAX_N_STRUC, sizeof (int)); - symbol_list=vcalloc (MAX_N_STRUC, sizeof (char**)); - for ( a=0; a< MAX_N_STRUC; a++)symbol_list[a]=declare_char ( 100, 100); - - - - - n_categories=1; - category=vcalloc ( MAX_N_CATEGORIES, sizeof (char**)); - for ( a=0; a< MAX_N_CATEGORIES; a++)category[a]=declare_char(100, STRING); - n_sub_categories=vcalloc ( 100, sizeof (int)); - category_list=vcalloc ( LONG_STRING, sizeof (char)); - sprintf ( category_list, "[*][*]=[ALL]"); - - - sim_n_categories=1; - sim_category=vcalloc ( MAX_N_CATEGORIES, sizeof (char**)); - for ( a=0; a< MAX_N_CATEGORIES; a++)sim_category[a]=declare_char(100, STRING); - sim_n_sub_categories=vcalloc ( 100, sizeof (int)); - sim_category_list=vcalloc ( LONG_STRING, sizeof (char)); - sprintf ( sim_category_list, "[*][*]=[ALL]"); - sprintf ( sim_aln, "al1"); - sim_matrix[0]='\0'; - sprintf ( sim_category_list, "[*][*]=[ALL]"); - - sprintf ( io_format, "ht"); - sprintf ( io_file, "stdout"); - sep_l='['; - sep_r=']'; - - output_aln=0; - output_aln_threshold=100; - sprintf ( output_aln_file, "stdout"); - sprintf ( output_aln_format, "clustalw"); - sprintf ( output_aln_modif, "lower"); -/*END OF INITIALIZATION*/ - - - - -/*PARAMETERS INPUT*/ - - for ( a=1; a< argc; a++) - { - if (strcmp ( argv[a], "-f")==0) - { - sprintf (io_file,"%s", argv[++a]); - } - else if ( strcmp ( argv[a], "-sim_aln")==0) - { - sprintf (sim_aln,"%s", argv[++a]); - } - else if ( strcmp ( argv[a], "-sim_matrix")==0) - { - sprintf (sim_matrix,"%s", argv[++a]); - } - else if ( strm ( argv[a], "-compare_mode")) - { - sprintf ( compare_mode, "%s", argv[++a]); - } - else if ( strcmp ( argv[a], "-sim_cat")==0) - { - if ( argv[++a][0]!='[') - { - if ( strcmp ( argv[a], "3d_ali")==0)sprintf ( sim_category_list, "[b][b]+[h][h]=[struc]"); - else - { - fprintf ( stderr, "\n%s: Unknown category for distance measure", argv[a]); - } - - } - else - { - sprintf ( sim_category_list, "%s", argv[a]); - } - } - else if ( strcmp ( argv[a], "-grep_value")==0) - { - sprintf ( grep_list[n_greps][0], "%s", argv[++a]); - sprintf ( grep_list[n_greps][1], "%s", argv[++a]); - n_greps++; - - } - else if ( strcmp ( argv[a], "-al1")==0) - { - - sprintf ( alignment1_file, "%s", argv[++a]); - - } - else if ( strcmp ( argv[a], "-al2")==0) - { - - sprintf ( alignment2_file, "%s", argv[++a]); - - } - else if ( strcmp ( argv[a], "-pep1")==0) - { - pep_compare=1; - sprintf ( pep1_file, "%s", argv[++a]); - } - else if ( strcmp ( argv[a], "-pep2")==0) - { - pep_compare=1; - sprintf ( pep2_file, "%s", argv[++a]); - } - else if ( strcmp ( argv[a], "-pep")==0) - { - pep_compare=1; - } - else if ( strcmp ( argv[a], "-count")==0) - { - count=1; - } - else if ( strcmp ( argv[a], "-output_aln")==0) - { - output_aln=1; - } - else if ( strcmp ( argv[a], "-output_aln_threshold")==0) - { - output_aln_threshold=atoi(argv[++a]); - } - else if ( strcmp ( argv[a], "-output_aln_file")==0) - { - sprintf (output_aln_file,"%s",argv[++a]); - } - else if ( strcmp ( argv[a], "-output_aln_format")==0) - { - sprintf (output_aln_format,"%s",argv[++a]); - } - else if ( strcmp ( argv[a], "-output_aln_modif")==0) - { - sprintf (output_aln_modif,"%s",argv[++a]); - } - else if ( strcmp ( argv[a], "-st")==0) - { - sprintf ( struct_file [n_structure], "%s", argv[++a]); - if (!NEXT_ARG_IS_FLAG && is_a_struc_format (argv[a+1])) - sprintf ( struct_format[n_structure], "%s", argv[++a]); - else - sprintf ( struct_format[n_structure], "%s", "pep"); - - if ( !NEXT_ARG_IS_FLAG && strcmp ( argv[a+1], "conv")==0) - { - a++; - while(!NEXT_ARG_IS_FLAG) - { - sprintf ( symbol_list[n_structure][n_symbol[n_structure]], "%s", argv[++a]); - n_symbol[n_structure]++; - } - } - else if (!NEXT_ARG_IS_FLAG) - { - symbol_list[n_structure]=make_symbols ( argv[++a], &n_symbol[n_structure]); - } - - else - { - symbol_list[n_structure]=make_symbols ( "any", &n_symbol[n_structure]); - } - - n_structure++; - - } - else if ( strcmp (argv[a], "-sep")==0) - { - if ( !NEXT_ARG_IS_FLAG) - get_separating_char ( argv[++a][0], &sep_l, &sep_r); - else - sep_l=sep_r=' '; - } - else if ( strncmp ( argv[a], "-io_format",5)==0) - { - sprintf ( io_format, "%s", argv[++a]); - } - else if ( strcmp ( argv[a], "-io_cat")==0) - { - if ( argv[++a][0]!='[') - { - if ( strcmp ( argv[a], "3d_ali")==0)sprintf ( category_list, "[b][b]+[h][h]=[struc];[*][*]=[tot]"); - } - else - { - sprintf ( category_list, "%s", argv[a]); - } - } - else - { - fprintf ( stdout, "\nOPTION %s UNKNOWN[FATAL]\n", argv[a]); - myexit (EXIT_FAILURE); - } - } - -/*PARAMETER PROCESSING*/ - -if ( pep_compare==1 || count==1)aln_compare=0; -if ( aln_compare==1)pep_compare=0; - -/*READ THE TOTAL SEQUENCES*/ - seq_list=declare_char ( 100,STRING); - n_seq_file=0; - - if ( alignment1_file[0] && !check_file_exists ( alignment1_file)) - { - fprintf (stderr, "\nERROR: %s DOES NOT EXIST[FATAL:%s]\n", alignment1_file, PROGRAM); - myexit(EXIT_FAILURE); - } - if ( alignment2_file[0] && !check_file_exists ( alignment2_file)) - { - fprintf (stderr, "\nERROR: %s DOES NOT EXIST[FATAL:%s]\n", alignment2_file, PROGRAM); - myexit(EXIT_FAILURE); - } - if ( pep1_file[0] && !check_file_exists ( pep1_file)) - { - fprintf (stderr, "\nERROR: %s DOES NOT EXIST[FATAL:%s]\n", pep1_file, PROGRAM); - myexit(EXIT_FAILURE); - } - if ( pep2_file[0] && !check_file_exists ( pep2_file)) - { - fprintf (stderr, "\nERROR: %s DOES NOT EXIST[FATAL:%s]\n", pep2_file, PROGRAM); - myexit(EXIT_FAILURE); - } - - if ( alignment1_file[0])sprintf ( seq_list[n_seq_file++], "A%s", alignment1_file); - if ( alignment2_file[0])sprintf ( seq_list[n_seq_file++], "A%s", alignment2_file); - if ( pep1_file[0])sprintf ( seq_list[n_seq_file++], "S%s", pep1_file); - if ( pep2_file[0])sprintf ( seq_list[n_seq_file++], "S%s", pep2_file); - - - - TOT_SEQ=read_seq_in_n_list ( seq_list, n_seq_file, NULL, NULL); - - A=declare_aln (TOT_SEQ); - B=declare_aln (TOT_SEQ); - - -/*1 COMPARISON OF THE SEQUENCES*/ - if ( pep_compare==1 || count==1) - { - f=0; - - if ( pep1_file[0]!='\0')SA=main_read_seq (pep1_file); - else if (alignment1_file[0]!='\0') - { - main_read_aln ( alignment1_file, A); - SA=aln2seq ( A); - } - else - { - main_read_aln ("stdin", A); - sprintf ( alignment1_file, "stdin"); - SA=aln2seq ( A); - } - if ( pep2_file[0]!='\0')SB=main_read_seq (pep2_file); - else if (alignment2_file[0]!='\0') - { - main_read_aln ( alignment2_file, B); - - SB=aln2seq (B); - } - else - { - SB=SA; - sprintf ( alignment2_file, "%s", alignment1_file ); - } - buf1=(pep1_file[0]!='\0')?pep1_file: alignment1_file; - buf2=(pep2_file[0]!='\0')?pep2_file: alignment2_file; - /*Output of the Results*/ - - fp=vfopen ( io_file, "w"); - - - if ( count==1) - { - fprintf (fp, "Number of seq: %d %d\n", SA->nseq,SA->nseq); - for ( a=0; a< SA->nseq; a++) fprintf (fp, "%-15s %d %d\n", SA->name[a], (int)strlen (SA->seq[a]), (int)strlen (SA->seq[a])); - } - - if (SA->nseq!=SB->nseq) - { - - fprintf ( fp, "DIFFERENCE TYPE 1: Different number of sequences %3d/%3d",SA->nseq,SB->nseq); - f=1; - } - - trim_seq ( SA, SB); - for ( a=0; a< SA->nseq; a++) - { - lower_string (SA->seq[a]); - lower_string (SB->seq[a]); - ungap ( SA->seq[a]); - ungap ( SB->seq[a]); - - if ( strcmp ( SA->seq[a], SB->seq[a])!=0) - { - fprintf ( fp, "DIFFERENCE TYPE 2: %s is different in the 2 files\n", SA->name[a]); - f=1; - } - } - for ( a=0; a< SA->nseq; a++) - { - lower_string (SA->seq[a]); - lower_string (SB->seq[a]); - ungap ( SA->seq[a]); - ungap ( SB->seq[a]); - - if ( strlen ( SA->seq[a])!= strlen (SB->seq[a])) - { - fprintf ( fp, "DIFFERENCE TYPE 3: %s has != length in the 2 files (%d-%d)\n", SA->name[a],(int)strlen ( SA->seq[a]), (int)strlen (SB->seq[a])); - f=1; - } - } - if ( f==1) - { - fprintf ( fp, "\nDIFFERENCES found between:\n\t%s\n\t%s\n**********\n\n",buf1, buf2); - } - fclose (fp); - } - -/*2 COMPARISON OF THE ALIGNMENTS*/ - else if ( aln_compare==1) - { - - n_categories=parse_category_list ( category_list, category, n_sub_categories); - sim_n_categories=parse_category_list ( sim_category_list, sim_category, sim_n_sub_categories); - - main_read_aln ( alignment1_file, A); - main_read_aln ( alignment2_file, B); - CLS=trim_aln_seq ( A, B); - - - defined_residueA=get_defined_residues (A); - defined_residueB=get_defined_residues (B); - - - A=thread_defined_residues_on_aln(A, defined_residueA); - A=thread_defined_residues_on_aln(A, defined_residueB); - B=thread_defined_residues_on_aln(B, defined_residueA); - B=thread_defined_residues_on_aln(B, defined_residueB); - - - CL_A=declare_constraint_list ( CLS, NULL, NULL, 0, NULL, NULL); - CL_B=declare_constraint_list ( CLS, NULL, NULL, 0, NULL, NULL); - - - CL_A=aln2constraint_list (A,CL_A, "sim"); - CL_B=aln2constraint_list (B,CL_B, "sim"); - - clist_entry=vcalloc ( CL_A->entry_len, CL_A->el_size); - - glob=vcalloc ( A->nseq+1, sizeof (int)); - pw_glob=declare_int ( A->nseq+1, A->nseq+1); - - - if ( strm( compare_mode, "sp")) - { - for ( b=0,a=0; ane; a++) - { - s1=vread_clist(CL_A, a, SEQ1); - s2=vread_clist(CL_A, a, SEQ2); - clist_entry=extract_entry ( clist_entry, a, CL_A); - - glob[0]++; - glob[s1+1]++; - glob[s2+1]++; - pw_glob[s1][s2]++; - pw_glob[s2][s1]++; - - clist_entry=extract_entry ( clist_entry, a, CL_A); - - if ((main_search_in_list_constraint (clist_entry,&pos_in_clist,4,CL_B))!=NULL) - { - vwrite_clist ( CL_A, a, MISC, 1); - b++; - } - else - { - vwrite_clist ( CL_A, a, MISC, 0); - } - } - } - else if ( strm( compare_mode, "column")) - { - posA=aln2pos_simple_2(A); - posB=aln2pos_simple_2(B); - seq_cache=declare_int ( A->nseq, A->len_aln+1); - for ( n=0,a=0; a< A->len_aln; a++) - for ( b=0; blen_aln; b++) - { - is_same=compare_pos_column(posA, a, posB, b, A->nseq); - - n+=is_same; - if (is_same) - { - for (c=0; c< A->nseq;c++)if ( posA[c][a]>0)seq_cache[c][posA[c][a]]=1; - } - } - - for ( a=0,b=0; a< CL_A->ne; a++) - { - s1=vread_clist(CL_A, a, SEQ1); - s2=vread_clist(CL_A, a, SEQ2); - glob[0]++; - glob[s1+1]++; - glob[s2+1]++; - pw_glob[s1][s2]++; - pw_glob[s2][s1]++; - r1=vread_clist(CL_A, a, R1); - if (seq_cache[s1][r1]){b++;vwrite_clist ( CL_A, a, MISC, 1);} - } - free_int (posA, -1); - free_int (posB, -1); - free_int (seq_cache, -1); - - } - - for ( a=0; a< n_structure; a++) - { - ST=read_structure (struct_file[a],struct_format[a], A,B,ST,n_symbol[a], symbol_list[a]); - } - - /*RESULT ARRAY DECLARATION*/ - - tot_count=declare_int (n_categories+1, A->nseq+1); - pos_count=declare_int (n_categories+1, A->nseq+1); - pw_tot_count=vcalloc ( A->nseq, sizeof (int**)); - for ( a=0; a< A->nseq; a++)pw_tot_count[a]=declare_int ( A->nseq, n_categories); - - - pw_pos_count=vcalloc ( A->nseq, sizeof (int**)); - for ( a=0; a< A->nseq; a++)pw_pos_count[a]=declare_int ( A->nseq, n_categories); - - /*COMPARISON MODULE*/ - for ( a=0; a< n_categories; a++) - { - for (b=0; bne; b++) - { - s1=vread_clist(CL_A, b, SEQ1); - s2=vread_clist(CL_A, b, SEQ2); - - r1=vread_clist(CL_A, b, R1); - r2=vread_clist(CL_A, b, R2); - - c=vread_clist(CL_A, b, MISC); - - if ( is_in_struct_category ( s1, s2, r1, r2, ST, category[a], n_sub_categories[a])) - { - - tot_count[a][0]++; - tot_count[a][s1+1]++; - tot_count[a][s2+1]++; - pw_tot_count[s1][s2][a]++; - pw_tot_count[s2][s1][a]++; - if ( c==1) - { - pw_pos_count[s1][s2][a]++; - pw_pos_count[s2][s1][a]++; - pos_count[a][0]++; - pos_count[a][s1+1]++; - pos_count[a][s2+1]++; - } - } - - } - } - - - - - - /*Measure of Aligned Sequences Similarity*/ - - sim=get_aln_compare_sim ((strcmp (sim_aln, "al1")==0)?A:B, ST,sim_category[0], sim_n_sub_categories[0], sim_matrix); - sim_param=analyse_sim ((strcmp (sim_aln, "al1")==0)?A:B, sim); - - - /*Fill the Result_structure*/ - R=vcalloc ( 1, sizeof (Result)); - - R->grep_list=grep_list; - R->n_greps=n_greps; - R->A=A; - R->B=B; - - R->S=S; - R->ST=ST; - R->sim_aln=sim_aln; - R->alignment1_file=alignment1_file; - R->alignment2_file=alignment2_file; - R->io_format=io_format; - R->n_structure=n_structure; - R->struct_file=struct_file; - R->struct_format=struct_format; - R->n_symbol=n_symbol; - R->symbol_list=symbol_list; - - - R->tot_count=tot_count; - R->pos_count=pos_count; - R->pw_tot_count=pw_tot_count; - R->pw_pos_count=pw_pos_count; - R->glob=glob; - R->pw_glob=pw_glob; - R->n_categories=n_categories; - R->category=category; - R->category_list=category_list; - R->n_sub_categories=n_sub_categories; - R->sim=sim; - R->sim_param=sim_param; - R->sim_matrix=sim_matrix; - R->sim_n_categories=sim_n_categories; - R->sim_category=sim_category; - R->sim_category_list=sim_category_list; - R->sim_n_sub_categories=sim_n_sub_categories; - R->sep_r=sep_r; - R->sep_l=sep_l; - - /*Output of the Results*/ - - fp=vfopen ( io_file, "w"); - fp=output_format (io_format, fp, R); - vfclose ( fp); - - - /*Rewriting of Alignment A*/ - if ( output_aln) - { - A->residue_case=2; - aln_output_tot =declare_int ( A->nseq, A->len_aln+1); - aln_output_count=declare_int ( A->nseq, A->len_aln+1); - - for ( a=0; a< CL_A->ne; a++) - { - clist_entry=extract_entry ( clist_entry, a, CL_A); - aln_output_tot[clist_entry[SEQ1]][clist_entry[R1]]++; - aln_output_tot[clist_entry[SEQ2]][clist_entry[R2]]++; - - aln_output_count[clist_entry[SEQ1]][clist_entry[R1]]+=clist_entry[MISC]; - aln_output_count[clist_entry[SEQ2]][clist_entry[R2]]+=clist_entry[MISC]; - } - for ( a=0; a< A->nseq; a++) - { - - for (c=0, b=0; b< A->len_aln; b++) - { - if ( !is_gap(A->seq_al[a][b])) - { - c++; - if ( aln_output_tot[a][c] && ((aln_output_count[a][c]*100)/aln_output_tot[a][c])seq_al[a][b]=tolower(A->seq_al[a][b]); - else - A->seq_al[a][b]=output_aln_modif[0]; - } - else A->seq_al[a][b]=toupper(A->seq_al[a][b]); - } - - } - } - A->score_aln=(int)(R->tot_count[0][0]==0)?0:((R->pos_count[0][0]*100)/(float)R->tot_count[0][0]); - - output_format_aln (output_aln_format,A,NULL,output_aln_file); - - free_int ( aln_output_tot, -1); - free_int ( aln_output_count, -1 ); - } - } - return EXIT_SUCCESS; - } -/************************************************************************************/ -/* */ -/* OUTPUT */ -/* */ -/* */ -/************************************************************************************/ -FILE *output_format (char *iof,FILE *fp, Result *R) - { - int a; - int l; - - /* - H: files Header - h: basic header; - s: sequence results - t: total results (global); - p: pairwise_results; - */ - l=strlen ( iof); - - for ( a=0; a< l; a++) - { - if ( iof[a]=='H')fp=output_large_header (fp,R); - else if ( iof[a]=='h')fp=output_header (fp,R); - else if ( iof[a]=='t')fp=output_total_results (fp, R); - else if ( iof[a]=='s')fp=output_sequence_results (fp,R); - else if ( iof[a]=='p')fp=output_pair_wise_sequence_results(fp,R); - } - return fp; - } - -FILE *output_pair_wise_sequence_results (FILE *fp, Result *R) - { - int a,c,d; - - - for ( c=0; c<(R->A)->nseq-1; c++) - { - for ( d=c+1; d< (R->A)->nseq; d++) - { - fprintf (fp, "%-10s %-10s%s",(R->A)->name[c],(R->A)->name[d],SSPACE); - fprintf (fp, "%5.1f%s", R->sim[c][d], SSPACE); - - for (a=0; a< R->n_categories; a++) - { - fprintf ( fp, "%5.1f ",(R->pw_tot_count[c][d][a]==0)?0:((float)(R->pw_pos_count[c][d][a]*100)/(float)R->pw_tot_count[c][d][a])); - fprintf ( fp, "%c%5.1f%c%s",(R->sep_l),(R->pw_glob[c][d]==0)?0:((float)(R->pw_tot_count[c][d][a]*100)/(float)R->pw_glob[c][d]),(R->sep_r),SSPACE); - } - fprintf ( fp, "%c%5d%c\n",(R->sep_l), R->pw_glob[c][d],(R->sep_r)); - } - } - - return fp; - } -FILE *output_sequence_results (FILE *fp, Result *R) - { - int a,c; - - for ( c=1; c<=R->A->nseq; c++) - { - fprintf (fp, "%-10s %-10s%s",(R->A)->name[c-1], "..",SSPACE); - fprintf (fp, "%5.1f%s", R->sim_param[c-1][0],SSPACE); - for (a=0; a< R->n_categories; a++) - { - fprintf ( fp, "%5.1f ",(R->tot_count[a][c]==0)?0:((float)(R->pos_count[a][c]*100)/(float)R->tot_count[a][c])); - fprintf ( fp, "%c%5.1f%c%s",(R->sep_l),(R->glob[c]==0)?0:((float)(R->tot_count[a][c]*100)/(float)R->glob[c]),(R->sep_r), SSPACE); - } - fprintf ( fp, "%c%5d%c\n",(R->sep_l), R->glob[c],(R->sep_r)); - } - return fp; - } - - -FILE *output_total_results (FILE *fp, Result *R) - { - int a; - - - fprintf ( fp, "%-13s %-7d%s",extract_suffixe (R->alignment1_file),(R->A)->nseq, SSPACE); - fprintf (fp, "%5.1f%s", R->sim_param[(R->A)->nseq][0], SSPACE); - for (a=0; a< R->n_categories; a++) - { - fprintf ( fp, "%5.1f ",(R->tot_count[a][0]==0)?0:((float)(R->pos_count[a][0]*100)/(float)R->tot_count[a][0])); - fprintf ( fp, "%c%5.1f%c%s",(R->sep_l),(R->glob[0]==0)?0:((float)(R->tot_count[a][0]*100)/(float)R->glob[0]),(R->sep_r), SSPACE); - } - fprintf ( fp, "%c%5d%c\n",(R->sep_l), R->glob[0],(R->sep_r)); - return fp; - } -FILE *output_header (FILE *fp, Result *R) - { - int a; - - - fprintf ( fp, "%s\n",generate_string ( R->n_categories*(13+strlen(SSPACE))+31+2*strlen(SSPACE),'*')); - - fprintf ( fp, "%-10s %-10s %s%-3s%s", "seq1", "seq2",SSPACE,"Sim",SSPACE); - - for ( a=0; a< R->n_categories; a++) - fprintf ( fp, "%-12s%s ",R->category[a][0], SSPACE); - fprintf (fp, "%-5s", "Tot"); - fprintf (fp, "\n"); - return fp; - } -FILE *output_large_header ( FILE *fp, Result *R) - { - int a, b; - - fprintf ( fp, "AL1: %s\n", R->alignment1_file); - fprintf ( fp, "AL2: %s\n", R->alignment2_file); - for ( a=0; a< R->n_structure; a++) - { - fprintf (fp, "ST %d: %s [%s]/[", a, R->struct_file[a], R->struct_format[a]); - for ( b=0; b< R->n_symbol[a]; b++)fprintf (fp, "%s ", R->symbol_list[a][b]); - fprintf ( fp, "]\n"); - } - return (fp); - } - -void get_separating_char ( char s, char *l, char *r) - { - if ( s=='{' || s=='}') - { - l[0]='{'; - r[0]='}'; - return; - } - else if ( s==']' || s=='[') - { - l[0]='['; - r[0]=']'; - return; - } - else if ( s==')' || s=='(') - { - l[0]='('; - r[0]=')'; - return; - } - else - { - l[0]=s; - r[0]=s; - return; - } - } - -/************************************************************************************/ -/* */ -/* SIM MEASURE */ -/* */ -/* */ -/************************************************************************************/ -float **get_aln_compare_sim ( Alignment *A, Structure *ST, char **cat, int n_cat, char *matrix) - { - int a, b, c; - - float **sim; - char cr1, cr2; - int r1, r2; - int p1, p2; - float pos,tot; - - sim=declare_float ( A->nseq, A->nseq); - - for ( a=0; anseq-1; a++) - { - for (b=a+1; b< A->nseq; b++) - { - for ( r1=0, r2=0,tot=0, pos=0,c=0; c< A->len_aln; c++) - { - p1=is_gap(A->seq_al[a][c]); - p2=is_gap(A->seq_al[b][c]); - - r1+=1-p1; - r2+=1-p2; - cr1=A->seq_al[a][c]; - cr2=A->seq_al[b][c]; - if (!p1 && !p2) - { - if (is_in_struct_category (a, b, r1, r2, ST, cat, n_cat)) - { - tot++; - pos+=is_in_same_group_aa ( cr1, cr2, 0, NULL, matrix); - } - } - } - sim[a][b]=sim[b][a]=(tot==0)?0:((pos*100)/tot); - } - } - return sim; - } -float **analyse_sim ( Alignment *A, float **sim) - { - int a,b,c; - - float **an, d; - - - an=declare_float ( A->nseq+1,2); - - for (d=0, a=0; a< A->nseq;a++) - { - for ( b=0; b< A->nseq; b++) - { - if ( b!=a) - { - an[a][0]+=sim[a][b]; - an[A->nseq][0]+=sim[a][b]; - d++; - } - } - an[a][0]=((float)an[a][0]/(float)(A->nseq-1)); - } - - - an[A->nseq][0]=an[A->nseq][0]/d; - - for ( d=0,a=0; a< A->nseq; a++) - { - for ( b=0; b< A->nseq; b++) - { - if ( b!=a) - { - c=an[a][0]-sim[a][b]; - an[a][1]+=(c>0)?c:-c; - an[A->nseq][1]+=(c>0)?c:-c; - d++; - } - } - an[a][1]=((float)an[a][1]/(float)(A->nseq-1)); - } - - an[A->nseq][1]=an[A->nseq][1]/d; - - return an; - } - - - - - - - - -/************************************************************************************/ -/* */ -/* STRUC ANALYSE */ -/* */ -/* */ -/************************************************************************************/ -int is_in_struct_category ( int s1, int s2, int r1, int r2, Structure *ST, char **cat, int n_cat) - { - int a; - static char *struc_r1; - static char *struc_r2; - char first[STRING]; - char second[STRING]; - static int **r; - - - - - if ( ST==NULL)return 1; - if ( struc_r1!=NULL) - { - vfree ( struc_r1); - vfree ( struc_r2); - } - - if ( r==NULL)r=declare_int (2, 2); - else r[0][0]=r[1][1]=r[1][0]=r[0][1]=0; - - - struc_r1=get_structure_residue ( s1, r1, ST); - struc_r2=get_structure_residue ( s2, r2, ST); - - - for ( a=1; a< n_cat; a+=2) - { - sprintf ( first, "%s", cat[a]); - sprintf ( second,"%s", cat[a+1]); - r[0][0]=struc_matches_pattern ( struc_r1, first); - r[0][1]=struc_matches_pattern ( struc_r2, first); - r[1][0]=struc_matches_pattern ( struc_r1, second); - r[1][1]=struc_matches_pattern ( struc_r2, second); - - if ( (r[0][0]&&r[1][1])||(r[1][0]&&r[0][1]))return 1; - } - return 0; - } - -char * get_structure_residue (int seq, int res, Structure *ST) - { - int a; - char *s; - - s=vcalloc ( ST->n_fields+1, sizeof (char)); - for (a=0; a< ST->n_fields; a++) - s[a]=ST->struc[seq][res][a]; - s[a]='\0'; - return s; - } - -int struc_matches_pattern ( char *struc, char *pattern) - { - char p[STRING]; - char *y; - - int b,l; - - - sprintf ( p, "%s", pattern); - - if ( strcmp (p, "*")==0)return 1; - else - { - l=strlen ( struc); - y=strtok ( p, "."); - - for ( b=0; bnseq, A->seq_al); - else - ST=extend_structure ( ST); - - StrucAln=declare_Alignment(NULL); - if (strm ( format, "pep")) - { - SEQ=main_read_seq ( fname); - StrucAln=seq2aln(SEQ,StrucAln,0); - } - - else if ( strcmp ( format, "aln")==0) - { - StrucAln=main_read_aln (fname, StrucAln); - } - - reorder_aln(StrucAln, A->name, A->nseq); - - SA=aln2seq(StrucAln); - string_array_convert (SA->seq, SA->nseq, n_symbols, symbol_table); - seq2struc (SA, ST); - - free_aln(StrucAln); - if (SEQ)free_sequence (SEQ, SEQ->nseq); - - return ST; - } - -int parse_category_list ( char *category_list_in, char ***category, int*n_sub_categories) - { - int n,a; - char *category_list; - char *y,*z; - category_list=vcalloc ( strlen(category_list_in)+1, sizeof (char)); - sprintf (category_list, "%s", category_list_in); - - n=0; - z=category_list; - while ((y=strtok(z, ";"))!=NULL) - { - sprintf ( category[n++][2], "%s", y); - z=NULL; - } - - - for ( a=0; a< n; a++) - { - sprintf (category_list,"%s",strtok(category[a][2], "=")); - sprintf (category[a][0],"%s",strtok (NULL, "=")); - sprintf ( category[a][++n_sub_categories[a]],"%s", strtok(category_list, "[]+")); - while ( ((y=strtok(NULL, "[]+"))!=NULL)) - { - if ( strcmp (y, "#")==0)y=category[a][n_sub_categories[a]]; - sprintf ( category[a][++n_sub_categories[a]],"%s",y); - } - } - return n; - } - - -int is_a_struc_format (char *format) - { - if ( strcmp ( format, "pep")==0)return 1; - if ( strcmp ( format, "aln")==0)return 1; - return 0; - } -/************************************************************************************/ -/* */ -/* Informations */ -/* */ -/* */ -/************************************************************************************/ -void output_informations () -{ -fprintf ( stderr, "\nPROGRAM: %s (%s)\n",PROGRAM,VERSION); -fprintf ( stderr, "******INPUT***************************"); -fprintf ( stderr, "\n-al1 al1_file"); -fprintf ( stderr, "\n-al2 al2_file"); -fprintf ( stderr, "\n-compare_mode [sp] or column"); -fprintf ( stderr, "\n-pep (compare only the sequences"); -fprintf ( stderr, "\n-count"); -fprintf ( stderr, "\n-pep1 pep1_file"); -fprintf ( stderr, "\n-pep1 pep2_file"); -fprintf ( stderr, "\n-st str_file st_format conversion"); -fprintf ( stderr, "\n **st_format: aln, pep"); -fprintf ( stderr, "\n **conversion: 3d_ali, conv abcZ #X"); -fprintf ( stderr, "\nNOTE: Several structures in a row are possible"); - -fprintf ( stderr, "\n\n****DISTANCE MEASURE*****************"); -fprintf ( stderr, "\n-sim_cat category_format or category_name"); -fprintf ( stderr, "\n **category_format: [*][*]=[tot]"); -fprintf ( stderr, "\n **category_name : 3d_ali (<=>[h][h]+[e][e]=[Struc]"); -fprintf ( stderr, "\n-sim_matrix matrix_name"); -fprintf ( stderr, "\n **matrix_name: idmat,pam250mt.."); -fprintf ( stderr, "\n-sim_aln al1 or al2"); -fprintf ( stderr, "\n\n****COMPARISON***********************"); -fprintf ( stderr, "\n-io_cat category_format or category_name"); -fprintf ( stderr, "\n **category_format: [*][*]=[tot]"); -fprintf ( stderr, "\n **category_name : 3d_ali(<=>[h][h]+[e][e]=[Struc];[*][*]=[Tot]"); -fprintf ( stderr, "\n\nNOTE: if two structures:"); -fprintf ( stderr, "\n[he.123][#]=[he123VShe123];[beh.*][he.2345]=[other]"); -fprintf ( stderr, "\n\n****OUTPUT****************************"); -fprintf ( stderr, "\n-f stdout"); -fprintf ( stderr, "\n stderr"); -fprintf ( stderr, "\n file_name"); -fprintf ( stderr, "\n-io_format hts"); -fprintf ( stderr, "\n H ->large Header"); -fprintf ( stderr, "\n h ->small Header"); -fprintf ( stderr, "\n t->global (average)results"); -fprintf ( stderr, "\n s ->average results for each sequence"); -fprintf ( stderr, "\n p ->results for each pair of sequences"); -fprintf ( stderr, "\n-output_aln Outputs al1 with conserved bits in Upper"); -fprintf ( stderr, "\n-output_aln_threshold [100]"); -fprintf ( stderr, "\n-output_aln_file [stdout]"); -fprintf ( stderr, "\n-output_aln_format [clustalw]"); -fprintf ( stderr, "\n-output_aln_modif [lower]"); -fprintf ( stderr, "\n"); -myexit (EXIT_SUCCESS); -} - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Jul 1 10:00:54 WEST 2008. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/aln_convertion_util.c b/binaries/src/tcoffee/t_coffee_source/aln_convertion_util.c deleted file mode 100644 index 4bdbb51..0000000 --- a/binaries/src/tcoffee/t_coffee_source/aln_convertion_util.c +++ /dev/null @@ -1,17537 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "dp_lib_header.h" -#include "define_header.h" - -int aln_has_stockholm_structure (Alignment *A) -{ - return name_is_in_list ("#=GC SS_cons", A->name, A->nseq, 100); -} - -int get_aln_stockholm_structure (Alignment *A) -{ - int i; - if ((i=aln_has_stockholm_structure(A))==-1) - A=add_alifold2aln (A, NULL); - return aln_has_stockholm_structure(A); -} -int ** update_RNAfold_list (Alignment *A, int **pos, int s, int **l) -{ - int a=0; - while (l[a]) - { - if (!is_gap(A->seq_al[s][l[a][0]]) && !is_gap (A->seq_al[s][l[a][1]])) - { - l[a][2]=pos[s][l[a][0]]; - l[a][3]=pos[s][l[a][1]]; - } - else - { - l[a][2]=l[a][3]=-1; - } - a++; - } - return l; -} - -Alignment *compare_RNA_fold ( Alignment *A, Alignment *B) -{ - int i1, i2, i; - int **l1, **l2; - int **pos1, **pos2; - int a, b; - int tot_ol=0, tot_l=0; - - i1=get_aln_stockholm_structure (A); - i2=get_aln_stockholm_structure (B); - - l1=vienna2list (A->seq_al[i1]); - l2=vienna2list (B->seq_al[i2]); - - pos1=aln2pos_simple(A, A->nseq); - pos2=aln2pos_simple(B, B->nseq); - - - - for (a=0; a< A->nseq; a++) - { - char **lu; - int ol=0, ll1=0, ll2=0; - if ( A->name[a][0]=='#')continue; - i=name_is_in_list (A->name[a], B->name, B->nseq, 100); - if (i!=-1) - { - l1=update_RNAfold_list (A,pos1,a, l1); - l2=update_RNAfold_list (B,pos2,i, l2); - lu=declare_char (A->len_aln, B->len_aln); - - b=0; - while (l2[b]) - { - - if (l2[b][2]==-1 || l2[b][3]==-1); - else - { - ll2++; - lu[l2[b][2]][l2[b][3]]=1; - - } - b++; - } - b=0; - - while (l1[b]) - { - - if (l1[b][2]==-1 || l1[b][3]==-1); - else - { - ll1++; - if (lu[l1[b][2]][l1[b][3]]==1) - { - A->seq_al[a][l1[b][0]]='6'; - A->seq_al[a][l1[b][1]]='6'; - ol++; - } - else - { - A->seq_al[a][l1[b][0]]='0'; - A->seq_al[a][l1[b][1]]='0'; - } - } - b++; - } - - free_char (lu, -1); - } - tot_ol+=ol; - tot_l+=ll1; - tot_l+=ll2; - fprintf ( stdout, "@@ Seq: %s Overalp: %.2f Al1: %.2f Al2: %.2f \n", A->name[a], (float)(ol*200)/(ll1+ll2), (float)(ol*100)/ll1,(float)(ol*100)/ll2); - } - - fprintf ( stdout, "@@ Seq: Tot Overalp: %.2f \n", (float)(tot_ol*200)/(tot_l)); - - return A; -} -int is_neutral(char c1, char c2); -int is_watson (char c1, char c2); -int is_watson2 (char c1, char c2); -int is_watson (char c1, char c2) -{ - c1=tolower (c1); - c2=tolower (c2); - if ( is_watson2 (c1, c2)) return 1; - else return is_watson2 (c2, c1); -} -int is_watson2 (char c1, char c2) -{ - - if ( c1=='g' && c2=='c')return 1; - else if (c1=='a' && (c2=='t' || c2=='u'))return 1; - return 0; -} -int is_neutral (char c1, char c2) -{ - - c1=tolower (c1); - c2=tolower (c2); - if (is_watson (c1, c2)) return 1; - else if (c1=='g' && (c2=='t' || c2=='u'))return 1; - else if ((c1=='t' || c1=='u') && c2=='g')return 1; - return 0; -} - -int ** vienna2list ( char *seq) -{ - int a, b, i, i2,l; - int **list; - l=strlen (seq); - list=declare_int (l+1, 8); - for (i=0,a=0; a=0; b++) - { - if (seq[b]=='(')i2++; - else if (seq[b]==')')i2--; - } - list[i][1]=b-1; - i++; - } - } - - list[i]=NULL; - return list; -} -Alignment *aln2alifold(Alignment *A) -{ - char *tmp1; - char *tmp2; - - tmp1=vtmpnam (NULL); - tmp2=vtmpnam (NULL); - output_clustal_aln (tmp1,A); - printf_system ("RNAalifold %s >%s 2>/dev/null", tmp1, tmp2); - return alifold2aln (tmp2); -} - -Alignment *add_alifold2aln (Alignment *A, Alignment *ST) -{ - int a,b,c,d,p1,p2; - int r1, rr1, r2, rr2; - int watson, comp,tot; - int **compmat; - int max, p,k; - int minseq=3; - int **list; - int ncomp=0, nwatson=0; - int cons_l, fold_l; - int i,l; - - if (!ST) - { - char *tmp1, *tmp2; - int f; - Alignment *T; - T=copy_aln (A, NULL); - tmp1=vtmpnam (NULL); - tmp2=vtmpnam (NULL); - cons_l=A->len_aln; - for (a=0; alen_aln; a++) - { - for (f=0,b=0; bnseq && f==0; b++) - { - if (is_gap (A->seq_al[b][a]))f=1; - - } - if (f) - { - cons_l--; - for (b=0; bnseq; b++)T->seq_al[b][a]='-'; - } - } - ST=aln2alifold (T); - } - - - //add or Replace the structure - l=strlen (ST->seq_al[0]); - - if ( l!=A->len_aln) - { - HERE ("\n%s\n%s\n", ST->seq_al[0], A->seq_al[0]); - printf_exit ( EXIT_FAILURE, stderr, "ERROR the predicted structure and the multiple alignment do not have the same length [FATAL:%s]\n", PROGRAM); - - } - - for (a=0; a< l; a++)if (ST->seq_al[0][a]==STOCKHOLM_CHAR)ST->seq_al[0][a]='.'; - if ((i=name_is_in_list ("#=GC SS_cons", A->name, A->nseq, 100))!=-1) - { - sprintf (A->seq_al[i], "%s", ST->seq_al[0]); - } - else - { - A=realloc_aln2 ( A, A->nseq+1, A->len_aln+1); - sprintf (A->name[A->nseq], "#=GC SS_cons"); - sprintf (A->seq_al[A->nseq], "%s", ST->seq_al[0]); - A->nseq++; - } - return A; -} -Alignment * alifold2analyze (Alignment *A, Alignment *ST, char *mode) -{ - int s; - int **list; - int usegap; - - s=name_is_in_list ("#=GC SS_cons", A->name,A->nseq, 100); - - if (s==-1) - { - A=add_alifold2aln (A,ST); - s=name_is_in_list ("#=GC SS_cons", A->name,A->nseq, 100); - } - - list=vienna2list (A->seq_al[s]); - list=alifold_list2cov_list (A, list); - - usegap=0; //do not use gaped positions by default - if (mode && strstr (mode, "usegap"))usegap=1;//count positions with gaps - - if (!mode) - { - A=alifold2cov_stat (A, list,usegap); - } - else - { - if ( strstr (mode, "stat")) A=alifold2cov_stat (A, list, usegap); - if ( strstr (mode, "list")) A=alifold2cov_list (A, list, usegap); - if ( strstr (mode, "aln")) A=alifold2cov_aln (A, list, usegap); - if ( strstr (mode, "color") ) - { - Alignment *C; - C=copy_aln (A, NULL); - C=alifold2cov_cache (C, list, usegap); - A=alifold2cov_aln (A, list, usegap); - if ( strstr ( mode, "ps")) - output_color_ps (A, C, "stdout"); - else - output_color_html (A, C, "stdout"); - exit (EXIT_SUCCESS); - } - } - return A; -} - - -int ** alifold_list2cov_list (Alignment *A, int **list) -{ - int a,b,c,d,p1,p2,s; - int r1, rr1, r2, rr2; - int neutral,watson, comp,tot, occupancy; - int **compmat; - int max, p,k; - int minseq=3; - - int ncomp=0, nwatson=0, nneutral=0, ncomp_wc=0; - int cons_l, fold_l; - int nseq; - - - - for (nseq=0,a=0; a< A->nseq; a++)if ( A->name[a][0]!='#')nseq++; - max=((nseq*(nseq-1))/2); - a=0; - while (list[a]) - { - p1=list[a][0]; - p2=list[a][1]; - watson=0; - comp=0; - neutral=0; - tot=0; - occupancy=0; - for (c=0; cnseq-1; c++) - { - if (A->name[c][0]=='#')continue; - r1=tolower(A->seq_al[c][p1]); - r2=tolower(A->seq_al[c][p2]); - if (is_gap(r1) || is_gap(r2))continue; - for (d=c+1; dnseq; d++) - { - if (A->name[d][0]=='#')continue; - rr1=tolower(A->seq_al[d][p1]); - rr2=tolower(A->seq_al[d][p2]); - if (is_gap(rr1) || is_gap(rr2))continue; - if (is_watson (r1, r2))watson++; - if (is_watson (rr1, rr2))watson++; - if (is_neutral (r1, r2))neutral++; - if (is_neutral (rr1, rr2))neutral++; - if (r1!=rr1 && r2!=rr2)comp++; - occupancy++; - } - - } - watson=(watson*100)/(occupancy*2); - comp=(comp*100)/occupancy; - neutral=(neutral*100)/(occupancy*2); - occupancy=(occupancy*100)/max; - list[a][3]=neutral; - list[a][4]=watson; - list[a][5]=comp; - list[a][6]=occupancy; - - if (list[a][3]<100) - { - if (list[a][5]>0)list[a][7]='I';//compensated incompatible pair - else list[a][7]='i'; //non compensated incompatible pair - } - else - { - list[a][7]='N';//Neutral pair - if (list[a][4]==100) - { - list[a][7]='W';//Watson and Crick - if ( list[a][5]>0)list[a][7]='C'; //Watson and crick compensated - } - else if ( list[a][5]>0) - { - list[a][7]='c';//compensated - } - } - a++; - } - - return list; -} -Alignment *alifold2cov_aln (Alignment *inA,int **list, int ug) -{ - int a=0; - a=0; - Alignment *A; - - A=copy_aln (inA, NULL); - A=realloc_aln2 ( A, A->nseq+1, A->len_aln+1); - sprintf (A->name[A->nseq], "#=GC SS_analyze"); - sprintf (A->seq_al[A->nseq], "%s", A->seq_al[A->nseq-1]); - A->nseq++; - while (list[a]) - { - char s; - if (list[a][6]<100 && !ug); - else - { - s=list[a][7]; - A->seq_al[A->nseq-1][list[a][0]]=s; - A->seq_al[A->nseq-1][list[a][1]]=s; - } - a++; - } - return A; -} -Alignment *alifold2cov_stat (Alignment *A,int **list, int ug) -{ - int fold=0,watson=0, comp=0, compwc=0, incomp=0, neutral=0; - int a; - - a=0; - while (list[a]) - { - int s; - fold++; - if (list[a][6]<100 && !ug); - else - { - s=list[a][7]; - watson +=(s=='W')?1:0; - compwc +=(s=='C')?1:0; - comp +=(s=='c')?1:0; - neutral+=(s=='N')?1:0; - incomp +=(s=='I')?1:0; - } - a++; - } - fprintf ( stdout, "@@ TOT Nseq:%d tot_len: %d fold: %d neutral: %d watson: %d CorWC: %d cor: %d CorIncompatible: %d\n",A->nseq-1, A->len_aln,fold, neutral,watson, compwc,comp,incomp); - return A; -} -Alignment *alifold2cov_cache (Alignment *inA, int **list, int ug) -{ - int a,b, c; - Alignment *A; - - A=copy_aln (inA, NULL); - a=0; - while (list[a]) - { - int v, s; - if (list[a][6]<100 && !ug); - else - { - s=list[a][7]; - if (s=='C')v=9; //red - else if ( s=='c')v=7; //orange - else if ( s=='W')v=5; //Yellow - else if ( s=='N')v=2; //green - else if ( s=='I')v=0; //blue; - for (b=0;bnseq; b++) - { - if (A->name[b][0]=='#'); - else - { - for (c=0; c<2; c++) - { - A->seq_al[b][list[a][c]]='0'+v; - } - } - } - } - a++; - } - return A; -} - -Alignment *alifold2cov_list (Alignment *A,int **list, int ug) -{ - int a,b, s; - - a=0; - while (list[a]) - { - s=list[a][7]; - if (list[a][6]<100 && !ug); - else if (s=='C') - { - fprintf ( stdout, "@@ WC Compensated pair: %4d %4d =>", list[a][0]+1, list [a][1]+1); - for (b=0; bnseq; b++)if (A->name[b][0]!='#')fprintf ( stdout, "[%c%c]", toupper (A->seq_al[b][list[a][0]]), toupper(A->seq_al[b][list[a][1]])); - fprintf (stdout,"\n"); - } - else if (s=='c') - { - fprintf ( stdout, "@@ Neural Compensated pair: %4d %4d =>", list[a][0]+1, list [a][1]+1); - for (b=0; bnseq; b++)if (A->name[b][0]!='#')fprintf ( stdout, "[%c%c]", toupper (A->seq_al[b][list[a][0]]), toupper(A->seq_al[b][list[a][1]])); - fprintf (stdout,"\n"); - } - else if (s=='W') - { - fprintf ( stdout, "@@ WC pair: %4d %4d =>", list[a][0]+1, list [a][1]+1); - for (b=0; bnseq; b++)if (A->name[b][0]!='#')fprintf ( stdout, "[%c%c]", toupper (A->seq_al[b][list[a][0]]), toupper(A->seq_al[b][list[a][1]])); - fprintf (stdout,"\n"); - } - else if (s=='N') - { - fprintf ( stdout, "@@ Neutral pair: %4d %4d =>", list[a][0]+1, list [a][1]+1); - for (b=0; bnseq; b++)if (A->name[b][0]!='#')fprintf ( stdout, "[%c%c]", toupper (A->seq_al[b][list[a][0]]), toupper(A->seq_al[b][list[a][1]])); - fprintf (stdout,"\n"); - } - else if (s=='I') - { - fprintf ( stdout, "@@ incompatible pair: %4d %4d =>", list[a][0]+1, list [a][1]+1); - for (b=0; bnseq; b++)if (A->name[b][0]!='#')fprintf ( stdout, "[%c%c]", toupper (A->seq_al[b][list[a][0]]), toupper(A->seq_al[b][list[a][1]])); - fprintf (stdout,"\n"); - } - a++; - } - - return A; -} - - -Alignment *aln2sample (Alignment *A, int n) -{ - Alignment *B; - int a, b, p; - int **pos; - - B=copy_aln (A, NULL); - - vsrand(0); - - pos=declare_int (A->len_aln, 2); - for (a=0; alen_aln; a++){pos[a][0]=a;pos[a][1]=rand()%(1000*A->len_aln);} - - sort_int (pos, 2, 1, 0, A->len_aln-1); - - n=(n==0)?A->len_aln:(MIN (n, (A->len_aln))); - for (a=0; anseq; b++) - A->seq_al[b][a]=B->seq_al[b][pos[a][0]]; - for (b=0; bnseq; b++) - A->seq_al[b][n]='\0'; - A->len_aln=n; - - free_aln (B); - free_int (pos, -1); - return A; -} -Alignment *aln2bootstrap (Alignment *A, int n) -{ - Alignment *B; - int a, b, p; - - if (n==0)n=A->len_aln; - else A=realloc_aln (A, n+1); - vsrand(0); - B=copy_aln (A, NULL); - for (a=0; alen_aln; - for (b=0; bnseq; b++) - A->seq_al[b][a]=B->seq_al[b][p]; - } - for ( b=0; bnseq; b++)A->seq_al[b][n]='\0'; - A->len_aln=n; - - free_aln (B); - return A; - -} - - -Alignment * aln2random_aln (Alignment *A, char *smode) - -{ - int a, b, n, **res; - int max; - - - - if ( smode==NULL) - { - smode=vcalloc (4, sizeof (char)); - sprintf ( smode, "SCR");//Sequences, Column Residues - } - else if ( strm (smode, "NO"))return A; - - vsrand(0); - max=A->nseq*1000; - - if ( strstr ( smode, "S")) - { - A=aln2scramble_seq (A); - } - if ( strstr ( smode, "C")) - { - - res=declare_int (A->nseq, 2); - for (a=0; a< A->len_aln; a++) - { - for (n=0,b=0;bnseq; b++) - { - if ( !is_gap(A->seq_al[b][a])) - { - res[n][0]=A->seq_al[b][a]; - res[n][1]=rand()%max; - n++; - } - sort_int (res, 2, 1, 0, n-1); - } - for (n=0,b=0;bnseq; b++) - { - if ( !is_gap(A->seq_al[b][a]))A->seq_al[b][a]=res[n++][0]; - } - } - free_int (res, -a); - } - - - //Redistributes the residues randomly without changing the gap pattern - if ( strstr ( smode, "R")) - { - max=A->len_aln*A->nseq; - res=declare_int (max, 2); - - for (n=0,a=0; a< A->len_aln; a++) - { - for (b=0;bnseq; b++) - { - if ( !is_gap(A->seq_al[b][a])) - { - res[n][0]=A->seq_al[b][a]; - res[n][1]=rand()%max; - n++; - } - - } - } - sort_int (res, 2, 1, 0, n-1); - for (n=0,a=0; a< A->len_aln; a++) - { - for (b=0;bnseq; b++) - { - if ( !is_gap(A->seq_al[b][a])) - { - A->seq_al[b][a]=res[n++][0]; - } - - } - } - - free_int (res, -1); - } - - return A; -} -Alignment *score_aln2score_ascii_aln (Alignment *A, Alignment *C) -{ - //Convert the output of T-Coffee evaluate into a printable score_ascii alignment*/ - //A and C must be sorted - //sets to 0 lone residues - int a, b; - - for (a=0; anseq; a++) - for (b=0; blen_aln; b++) - { - - int rC=C->seq_al[a][b]; - int rA=A->seq_al[a][b]; - if ( !strm (A->name[a], C->name[a])){HERE ("Unsorted aln in score_aln2score_ascii"); exit (EXIT_FAILURE);} - - if ( rA=='x' || rA=='X')C->seq_al[a][b]='9'; - else if ( rC >='0' && rC<='9'); - else if ( rC<10)C->seq_al[a][b]='0'+rC; - else if ( rC==NO_COLOR_RESIDUE && !is_gap(rA)) C->seq_al[a][b]='0'; - else if ( rC==NO_COLOR_RESIDUE && is_gap(rA))C->seq_al[a][b]='-'; - } - return C; -} -Alignment*aln2gap_cache (Alignment *A, int val) -{ - Alignment *B; - int a, b, c, nr; - - B=copy_aln (A, NULL); - for (b=0; blen_aln; b++) - { - for (nr=0,a=0; anseq; a++)nr+=!is_gap (A->seq_al[a][b]); - for (a=0; anseq; a++)if (!is_gap(A->seq_al[a][b]))B->seq_al[a][b]=(nr==1)?'0'+val:'1'; - } - return B; -} - -Alignment* aln2case_aln (Alignment *B, char *upper, char *lower) -{ - int a, b, c, up, lo; - Alignment *A; - - A=copy_aln (B, NULL); - - up=(upper)?upper[0]:'u'; - lo=(lower)?lower[0]:'l'; - - for (a=0; anseq; a++) - for (b=0; blen_aln; b++) - { - c=A->seq_al[a][b]; - - if ( is_gap(c)); - else A->seq_al[a][b]=(isupper (c))?up:lo; - } - return A; -} -Alignment *aln2scale (Alignment *A, char *coffset) -{ - int a, b, t, v, n; - char *s1, *s2; - char s[1000]; - int offset; - - if (coffset)offset=atoi(coffset); - else offset=0; - - sprintf (s, "%d", A->len_aln+offset); - n=strlen (s); - - A=realloc_aln2 (A, A->nseq+n, A->len_aln+1); - s1=vcalloc ( n+1, sizeof (char)); - s2=vcalloc ( n+1, sizeof (char)); - - for (a=0; aname[A->nseq+a], "%s", s2); - } - - for (a=0; alen_aln; a++) - { - sprintf (s1, "%d", a+1+offset); - s2=invert_string (s1); - t=strlen (s2); - - for (b=0; b<=n; b++) - { - if (b>=t) v='0'; - else v=s2[b]; - - A->seq_al[A->nseq+b][a]=v; - } - } - - A->nseq+=n; - return A; -} - - - - -int * pos2list (int * pos, int len, int *nl) -{ - int *list; - int a; - nl[0]=0; - list=vcalloc (len, sizeof (int)); - for (a=0; anseq+((B)?B->nseq:0), sizeof (int)); - pos=aln2pos_simple_2 (A); - if (B) - { - n=B->nseq; - for ( a=0; anseq; a++) - { - list[a]=name_is_in_list(B->name[a], A->name, A->nseq, 100); - } - } - else - { - for ( a=0; anseq; a++) - list[a]=a; - n=A->nseq; - } - - - fprintf ( fp, "#"); - for ( b=0; bname[s]); - } - fprintf (fp, "\n"); - - for ( a=0; alen_aln; a++) - { - for ( b=0; bx, where x is the position of residue z of seq1/S1 in S2->seq[index[Seq1/S1]] - */ - int a; - int **index; - char *seq1=NULL, *seq2=NULL; - Alignment *Profile; - - index=vcalloc ( S1->nseq, sizeof (int*)); - - for (a=0; a< S1->nseq; a++) - { - int len1, len2, b, c; - - seq1=S1->seq[a]; - - if (name_index[a][0]==-1) - seq2=NULL; - else if (name_index[a][1]==-1) - { - seq2=S2->seq[name_index[a][0]]; - } - else if ((Profile=seq2R_template_profile (S2, name_index[a][0])) !=NULL) - { - seq2=Profile->seq_al[name_index[a][1]]; - } - - len1=strlen (seq1);len2=strlen (seq2); - index[a]=vcalloc (len2, sizeof(int)); - - - for (c=0,b=0; bx if seq1 is the xth sequence of S2 - ->-1 if seq1 is nowhere to be found - index[seq1 of S1][1]->z if seq1 is the zth sequence within the xth profile of S2 - */ - int **index; - int a, b, x, z; - Alignment *Profile; - index=declare_int (S1->nseq, 2); - - - for ( a=0; anseq; a++) - { - index[a][0]=index[a][1]=-1; - x=name_is_in_list (S1->name[a],S2->name,S2->nseq,100); - if ( x!=-1){index[a][0]=x;index[a][1]=-1;} - for ( b=0; bnseq; b++) - { - if ((Profile=seq2R_template_profile (S2,b))) - { - z=name_is_in_list (S1->name[a],Profile->name,Profile->nseq,100); - if ( z!=-1){index[a][0]=b;index[a][1]=z;b=S2->nseq;} - } - } - } - return index; -} - - - - -int *get_name_index (char **l1, int n1, char **l2, int n2) -{ - int *r; - int a; - /*return Array[Index_L1]=Index_L2 */ - r=vcalloc ( n1, sizeof (int)); - for ( a=0; a< n1; a++) - r[a]=name_is_in_list (l1[a],l2,n2,100); - return r; -} - -int* get_res_index (char *seq0, char *seq1) -{ - int *coor, a; - - if ( !seq0 || !seq1) return NULL; - - - coor=vcalloc ( strlen (seq0)+1, sizeof (int)); - if (!strm (seq0, seq1)) - { - int r0, r1 , isr0, isr1; - int l0=0, l1=0; - Alignment *A; - A=align_two_sequences (seq0,seq1,"pam250mt",-5,-1, "myers_miller_pair_wise"); - - for ( a=0; a< A->len_aln; a++) - { - r0=A->seq_al[0][a];r1=A->seq_al[1][a]; - isr0=!is_gap(r0); - isr1=!is_gap(r1); - l0+= isr0; - l1+= isr1; - if (isr0 && isr1)coor[l0-1]=l1-1; - else if (isr0) coor[l0-1]=-1; - } - free_aln (A); - } - else - { - int l0; - - l0=strlen (seq0); - for ( a=0;a< l0; a++) - coor[a]=a; - } - - return coor; -} - -int change_residue_coordinate ( char *in_seq1, char *in_seq2, int v) -{ - /*Expresses the coordinate of a residue in seq1, in the coordinate system of seq2*/ - - - static char *seq1, *seq2; - static int *coor; - - - if ( seq1 !=in_seq1 || seq2 !=in_seq2) - { - int r0, r1 , isr0, isr1; - int l0=0, l1=0; - Alignment *A; - int a; - - vfree (coor); - seq1=in_seq1, seq2=in_seq2; - A=align_two_sequences (seq1,seq2,"pam250mt", -14, -2, "myers_miller_pair_wise"); - - coor=vcalloc ( A->len_aln, sizeof (int)); - for ( a=0; a< A->len_aln; a++) - { - r0=A->seq_al[0][a];r1=A->seq_al[1][a]; - - isr0=!is_gap(r0); - isr1=!is_gap(r1); - l0+= isr0; - l1+= isr1; - - if (isr0 && isr1)coor[l0-1]=l1-1; - else if (isr0) coor[l0-1]=-1; - } - free_aln (A); - } - return coor[v]; -} - - -int ** minimise_repeat_coor (int **coor, int nseq, Sequence *S) - { - int **new_coor; - int a, min; - new_coor=declare_int ( nseq, 3); - min=return_min_int (coor, nseq, 2); - for ( a=0; a< nseq; a++) - { - new_coor[a][0]=coor[a][0]; - new_coor[a][1]=coor[a][1]; - new_coor[a][2]=min; - } - return new_coor; - } -int ** get_nol_seq ( Constraint_list *CL, int **coor, int nseq, Sequence *S) - { - int a, s, p, l, nl; - int **buf; - int **new_coor; - - new_coor=declare_int ( nseq+1, 3); - - - buf=get_undefined_list ( CL); - - - - for ( a=0; a< nseq; a++)buf[coor[a][0]][coor[a][1]]=1; - - - for ( a=0; a< nseq; a++) - { - s=coor[a][0]; - p=coor[a][1]+1; - l=strlen(S->seq[s]); - nl=0; - while ( p<=l && !buf[s][p++])nl++; - new_coor[a][0]=s; - new_coor[a][1]=coor[a][1]; - new_coor[a][2]=nl; - } - free_int ( buf, -1); - return new_coor; - } - - - -int compare_pos_column( int **pos1,int p1, int **pos2,int p2, int nseq) - { - int a,v1, v2; - int identical=0; - - - - for ( a=0; a< nseq; a++) - { - - v1=pos1[a][p1]; - v2=pos2[a][p2]; - - if (v1>0 || v2>0) - { - if ( v1!=v2)return 0; - else identical=1; - } - } - - return identical; - } - -char *seq2alphabet (Sequence *S) -{ - return array2alphabet (S->seq, S->nseq, ""); -} - -char *aln2alphabet (Alignment *A) -{ - return array2alphabet (A->seq_al, A->nseq, ""); -} - -char *array2alphabet (char **array, int n, char *forbiden) -{ - int a, b, l; - int *hasch; - char *alphabet; - - hasch=vcalloc (256, sizeof (int)); - alphabet=vcalloc ( 257, sizeof (char)); - - - for ( a=0; anseq; a++) - { - char s; - s=Pred->seq_al[a][pos]; - if (!is_gap(s)) - { - score[tolower(s)]++; - tot++; - } - } - - if ( score['h']>score['i'] && score['h']>score['o'])cons='h'; - - else if ( score['i']>score['o'])cons='i'; - else cons='o'; - if (tot==0) return ""; - - - if (mode==VERBOSE)sprintf (result, " H: %3d I: %3d O: %3d P: %c", (score['h']*100)/tot, (score['i']*100)/tot, (score['o']*100)/tot, cons); - else if (mode == SHORT)sprintf ( result, "%c", cons); - score['h']=score['o']=score['i']=0; - return result; -} - - -Alignment * aln2hmmtop_pred (Alignment *A) - { - int a, b, c; - char *buf, *pred; - Alignment *PA; - - PA=copy_aln (A, NULL); - buf=vcalloc ( A->len_aln+1, sizeof (char)); - - for ( a=0; a< A->nseq; a++) - { - sprintf (buf, "%s", A->seq_al[a]); - pred=seq2tmstruc (buf); - for (c=0,b=0; blen_aln; b++) - { - if (!is_gap (PA->seq_al[a][b]))PA->seq_al[a][b]=pred[c++]; - } - vfree (pred); - } - vfree (buf); - return PA; - } - -char * seq2tmstruc ( char *seq) - { - static Sequence *S; - char *seqfile, *predfile, *buf; - FILE *fp; - - seqfile=vtmpnam (NULL); - predfile=vtmpnam (NULL); - - fp=vfopen (seqfile, "w"); - fprintf ( fp, ">seq1\n%s", seq); - vfclose (fp); - - - printf_system ( "fasta_seq2hmmtop_fasta.pl -in=%s -out=%s -arch=%s/%s -psv=%s/%s", seqfile, predfile, get_mcoffee_4_tcoffee(), "hmmtop.arch", get_mcoffee_4_tcoffee(), "hmmtop.psv"); - S=get_fasta_sequence (predfile, NULL); - buf=vcalloc ( strlen (S->seq[0])+1, sizeof (char)); - sprintf ( buf, "%s", S->seq[0]); - - free_sequence (S, S->nseq); - - return buf; - } - -char * set_blast_default_values() -{ - set_string_variable ("blast_server", (getenv ("blast_server_4_TCOFFEE"))?getenv ("blast_server_4_TCOFFEE"):"EBI"); - set_string_variable ("pdb_db", (getenv ("pdb_db_4_TCOFFEE"))?getenv ("pdb_db_4_TCOFFEE"):"pdb"); - set_string_variable ("prot_db", (getenv ("prot_db_4_TCOFFEE"))?getenv ("prot_db_4_TCOFFEE"):"uniprot"); - set_int_variable ("prot_min_sim", 0); - set_int_variable ("prot_max_sim", 100); - - set_int_variable ("prot_min_cov", 0); - set_int_variable ("prot_max_cov", 100); - - set_int_variable ("pdb_min_sim", 0); - set_int_variable ("pdb_max_sim", 100); - set_int_variable ("pdb_min_cov", 0); - set_int_variable ("pdb_max_cov", 100); - - return; -} - -char * seq2pdb (Sequence *S) -{ - set_blast_default_values(); - S->nseq=1; - S=seq2template_seq (S, "PDB", NULL); - return seq2P_pdb_id(S,0); -} - -Alignment * seq2blast ( Sequence *S) -{ - Alignment *A; - set_blast_default_values(); - - if (S->nseq==1) - { - S=seq2template_seq (S, "BLAST", NULL); - A=seq2R_template_profile(S,0); - sprintf ( A->name[0], "%s", S->name[0]); - } - else - { - int a; - for (a=0; a< S->nseq; a++) - { - Sequence *NS; - char name[1000]; - NS=fill_sequence_struc(1, &(S->seq[a]), &(S->name[a])); - NS=seq2template_seq (NS, "BLAST", NULL); - A=seq2R_template_profile(NS,0); - sprintf ( name, "%s.prf", S->name[a]); - - output_fasta_aln (name,A); - fprintf (stdout, "\nOUTPUT %s\n", name); - } - exit (EXIT_SUCCESS); - } - return A; -} - - - - -Sequence * seq2unique_name_seq ( Sequence *S) -{ - int a; - if ((a=name_list2unique_name_list (S->nseq, S->name))) - { - add_warning ( stderr, "\nWarning: Sequence %s is duplicated in file %s. The sequence will be renamed", S->name[a-1], S->file[a-1]); - } - return S; -} -Alignment * aln2unique_name_aln ( Alignment *S) -{ - int a; - if ((a=name_list2unique_name_list (S->nseq, S->name))) - { - add_warning ( stderr, "\nWarning: Sequence %s is duplicated in file %s. The sequence will be renamed", S->name[a-1], S->file[a-1]); - } - return S; -} - - -int name_list2unique_name_list (int n, char **name) -{ - int duplicate=0; - int a, b; - - for (a=0; a%s\naggggg\n", name[a]); - vfclose (fp); - printf_system ("fasta_aln2fasta_aln_unique_name.pl %s > %s", tmp1, tmp2); - S=get_fasta_sequence (tmp2, NULL); - for (a=0; aname[a])+1)); - sprintf ( name[a], "%s", S->name [a]); - } - free_sequence(S, -1); - } - return duplicate; -} - -Sequence* seq2clean_seq (Sequence *S, char *alp) -{ - int a, b, c, d, l; - - for (a=0; a< S->nseq; a++) - { - l=strlen (S->seq[a]); - for (d=0,b=0; bseq[a][b]; - if ( alp==NULL && !strchr (AA_ALPHABET, c) && !strchr (DNA_ALPHABET, c)); - else if (alp && strchr (alp, c)); - else S->seq[a][d++]=c; - } - S->seq[a][d]='\0'; - S->len[a]=strlen (S->seq[a]); - } - return S; -} -int ** seq2aln_pos (Alignment *A, int *ns, int **l_s) - { - int **code; - int a, b,c, d,l, p , g; - - - l=MAX(strlen (A->seq_al[l_s[0][0]]), strlen (A->seq_al[l_s[1][0]])); - code=declare_int ((A->S)->nseq,l+1); - - for (c=0; c<2; c++) - { - l=strlen (A->seq_al[l_s[c][0]]); - for (d=0; dorder[l_s[c][d]][0]; - for (p=0, b=0; bseq_al[l_s[c][d]][b]); - if (!g){p++; code[a][p]=b+1;} - } - } - } - return code; - } - -Alignment *local_maln2global_maln (char *seq, Alignment *A) - { - /*inputs a BLAST alignmnent where the master sequence may be partila - outputs the same alignment, while amkeing sure the profile is perfectly in sink with its master sequence - */ - - int a, b, c; - int start, end, rend; - char qname[100], *p; - Alignment *B=NULL; - - sprintf ( qname, "%s", A->name[0]); - p=strtok (qname, "_"); - if ( !strm (p, "QUERY")) - { - fprintf ( stderr, "\nUnappropriate format for the alignment [%s:FATAL]", PROGRAM); - myexit (EXIT_FAILURE); - } - - start=atoi(strtok (NULL, "_")); - end=atoi(strtok (NULL, "_")); - rend=strlen (seq); - - B=copy_aln (A,NULL); - if ( start>1 || endseq_al[0][a]=seq[a]; - for ( b=1; b< A->nseq; b++)A->seq_al[b][a]='-'; - } - - for (c=0,a=start-1; a< end; a++, c++) - { - A->seq_al[0][a]=seq[a]; - for ( b=1; b< A->nseq; b++) - { - A->seq_al[b][a]=B->seq_al[b][c]; - } - } - for ( a=end; aseq_al[0][a]=seq[a]; - for ( b=1; b< A->nseq; b++)A->seq_al[b][a]='-'; - } - for ( a=0; a< A->nseq; a++) A->seq_al[a][rend]='\0'; - free_aln (B); - - A->len_aln=rend; - return A; - } - -int ** aln2inv_pos ( Alignment *A) -{ - int **pos,a; - pos=vcalloc (A->nseq, sizeof (char*)); - for (a=0; a< A->nseq; a++)pos[a]=seq2inv_pos (A->seq_al[a]); - return pos; -} -int * seq2inv_pos ( char *seq) -{ - /*returns a list where each value gives the index of the corresponding residue in seq*/ - /*Numbering: 1 to L : Analogy to the aln2pos*/ - - int a,l1, l2; - int *pos; - - l1=strlen ( seq); - for ( l2=a=0; a< l1; a++)l2+=1-is_gap(seq[a]); - pos=vcalloc (l2+1, sizeof (int)); - for ( l2=a=0; a< l1; a++)if (!is_gap(seq[a]))pos[++l2]=a+1; - return pos; -} - - -int ** aln2pos_simple_2 (Alignment *A) - { - int **pos1; - int **pos2; - pos1=aln2pos_simple (A, A->nseq); - pos2=duplicate_int (pos1, A->nseq,read_size_int (pos1[0],sizeof (int))); - pos1=aln2pos_simple (NULL, 0); - return pos2; - } -int ** aln2pos_simple (Alignment *A, int n_nseq, ...) - { - /* - function documentation: start - int ** aln2pos_simple (Alignment *A, int n_nseq, ...) - -####with two parameter only: Alignment *A, int n_nseq - - this function turns A into pos, a matrix where each residue is replace by its index according to the complete sequence. - the indices in pos are computed using A->order[x][1] that contains the indice of the first residue of seq x of A - - n_nseq MUST not be null - -####with more than two param: - int ** aln2pos_simple (Alignment *A, int n_nseq, int *ns, int **ls) - n_nseq must be set to 0 for the param 3 and four to be read - - ns[x]=number seq in group - ls[x]=list of the sequences in group x ( size=ns[x]) - - The computation of the indices is only carried out on the scpecified residues - -####IMPORTANT - in pos, the numbering of the residues goes from 1 to L: - pos[0][0]=3, means that the first position of the first sequence - in the alignmnet contains residue #3 from sequence A->order[0][0]; - - function documentation: end - */ - - int a, b,c, p, g,l; - int **T; - - int max_nseq; - int n_len=0; - - int *list=NULL; - int *ns=NULL; - int **ls=NULL; - - - - va_list ap; - - - if ( A==NULL) - { - return NULL; - } - else - { - if ( n_nseq>0) - { - list=vcalloc(n_nseq, sizeof (int)); - for ( a=0; a< n_nseq; a++)list[a]=a; - } - else - { - va_start (ap, n_nseq); - ns=va_arg(ap, int * ); - ls=va_arg(ap, int **); - va_end(ap); - list=vcalloc ( ns[0]+ns[1], sizeof (int)); - n_nseq=0; - for ( a=0; a< ns[0]; a++)list[n_nseq++]=ls[0][a]; - for ( a=0; a< ns[1]; a++)list[n_nseq++]=ls[1][a]; - - } - max_nseq=MAX(read_size_int(A->order,sizeof (int*)),return_max_int (A->order, read_size_int(A->order,sizeof (int*)),0))+1; - n_len=get_longest_string ( A->seq_al,A->max_n_seq, NULL, NULL)+1; - - - T=declare_int (max_nseq, n_len); - for ( c=0; c< n_nseq; c++) - { - a=list[c]; - l=strlen ( A->seq_al[a]); - - for ( p=A->order[a][1],b=0; bseq_al[a][b]); - p+=g; - T[a][b]=(g==1)?p:-(1+p); - if ( A->seq_al[a][b]==UNDEFINED_RESIDUE)T[a][b]=0; - if ( A->seq_cache && T[a][b]>0)T[a][b]=A->seq_cache[A->order[a][0]][T[a][b]]; - } - } - vfree (list); - } - - return T; - } -Alignment ** split_seq_in_aln_list ( Alignment **aln, Sequence *S, int n_seq, char **seq_list) - { - int a, b, c; - char * long_seq=NULL; - int len,l; - int **translation; - int **table; - - - - - if ( aln==NULL)return NULL; - translation=declare_int ( S->nseq,2); - - for (len=0,a=0; a< S->nseq; a++) - { - if((b=name_is_in_list (S->name[a],seq_list, n_seq, 100))!=-1) - { - l=strlen(S->seq[a])+1; - long_seq=vrealloc(long_seq,(len+l+1)*sizeof(char)); - long_seq=strcat(long_seq, S->seq[a]); - long_seq=strcat(long_seq, "*"); - - translation[a][0]=b; - translation[a][1]=len; - len+=l; - } - else translation[a][0]=-1; - } - - long_seq[len-1]='\0'; - len--; - - table=declare_int ( len+1, 2); - - for ( b=0,a=0; a< S->nseq; a++) - { - if ( translation[a][0]!=-1) - { - c=1; - while (long_seq[b]!='\0' && long_seq[b]!='*') - { - table[b+1][1]=c++; - table[b+1][0]=translation[a][0]; - b++; - } - table[b][1]=c++; - table[b][0]=translation[a][0]; - b++; - } - } - - for ( a=0; a< (aln[-1])->nseq; a++) - { - for ( b=0; b< (aln[a])->nseq; b++) - { - - (aln[a])->order[b][0]=table[(aln[a])->order[b][1]][0]; - (aln[a])->order[b][1]=table[(aln[a])->order[b][1]][1]; - sprintf ( (aln[a])->name[b],"%s_%d_%d", S->name[(aln[a])->order[b][0]],a+1,b+1); - } - } - free_int (translation, -1); - free_int (table, -1); - return aln; - } - - - -Sequence * fill_sequence_struc ( int nseq, char **sequences, char **seq_name) - { - int a; - Sequence *S; - int shortest, longuest; - - if (!sequences) - { - shortest=longuest=0; - } - else if ( nseq>1) - { - shortest=get_shortest_string( sequences, nseq, NULL, NULL); - longuest=get_longest_string (sequences, nseq, NULL, NULL); - } - else if ( nseq==1) - { - shortest=longuest=strlen (sequences[0]); - } - else - { - return NULL; - } - - - S=declare_sequence (shortest, longuest,nseq); - S->nseq=nseq; - - if (sequences)S->seq=copy_char ( sequences, S->seq, nseq, -1); - else S->seq=declare_char (S->nseq, 1); - - S->name=copy_char ( seq_name, S->name,nseq, -1); - - ungap_array (S->seq,nseq); - for ( a=0; a< S->nseq; a++)S->len[a]=strlen(S->seq[a]); - return S; - } - - -Alignment * thread_profile_files2aln (Alignment *A, char *template_file, Fname *F) -{ - - Alignment *P; - int a; - - if (!A->S)A->S=aln2seq (A); - if (template_file)A->S=seq2template_seq (A->S, template_file,F); - for ( a=0; a< A->nseq; a++) - { - P=seq2R_template_profile (A->S, a); - if ( P) - { - P->expand=1; - sprintf ( P->name[0], "%s", A->name[a]); - } - } - - return expand_aln (A); -} - - - - -Alignment * expand_aln (Alignment *A) - { - /*This function expands the profiles within an alignment*/ - - - int a, b, d, e; - Alignment *MAIN=NULL, *SUB=NULL; - int n_sub_seq=0; - int new_nseq=0; - int *list; - Alignment *Profile; - - if ( !A)return A; - - - - list=vcalloc (A->nseq, sizeof (int)); - for ( a=0; a< A->nseq; a++) - { - Profile=seq2R_template_profile (A->S, A->order[a][0]); - if (Profile && Profile->expand) - { - new_nseq+=Profile->nseq; - } - else - { - new_nseq++; - list[n_sub_seq++]=a; - } - } - - if ( n_sub_seq==A->nseq){vfree(list);return A;} - else if (n_sub_seq==0){MAIN=copy_aln (A, MAIN);MAIN->nseq=0;} - else - { - MAIN=extract_sub_aln (A, n_sub_seq, list); - } - vfree(list); - - - for ( a=0; a< A->nseq; a++) - { - Profile=seq2R_template_profile (A->S, A->order[a][0]); - if ( Profile && Profile->expand) - { - SUB=copy_aln (Profile,SUB); - SUB=realloc_aln2(SUB, SUB->nseq, A->len_aln+1); - - for ( e=0,b=0; b< A->len_aln; b++) - { - if ( is_gap(A->seq_al[a][b])) - {for (d=0; d< SUB->nseq; d++)SUB->seq_al[d][b]='-';} - else - { - for(d=0; dnseq; d++)SUB->seq_al[d][b]=Profile->seq_al[d][e]; - e++; - } - - } - MAIN=stack_aln(MAIN, SUB); - } - } - free_aln (A); - free_aln (SUB); - return MAIN; - } -Alignment * expand_number_aln (Alignment *A,Alignment *EA) - { - /*This function expands the profiles within an alignment*/ - - - int a, b, d, e; - Alignment *MAIN=NULL, *SUB=NULL, *C=NULL; - int n_sub_seq=0; - int new_nseq=0; - int *list; - Alignment *Profile; - - if ( !EA || !A)return EA; - - if ( EA->nseqnseq) - { - fprintf (stderr, "\n[ERROR:expand_number_aln] Using as a master an expanded aln (%d %d) [FATAL:%s]", EA->nseq, A->nseq,PROGRAM); - EA->A=A->A=NULL; - print_aln (EA); - print_aln (A); - myexit (EXIT_FAILURE); - } - - - list=vcalloc (EA->nseq, sizeof (int)); - for ( a=0; a< EA->nseq; a++) - { - Profile=seq2R_template_profile (EA->S, EA->order[a][0]); - if (Profile && Profile->expand)new_nseq+=Profile->nseq; - else - { - new_nseq++; - list[n_sub_seq++]=a; - } - } - - if ( n_sub_seq==EA->nseq){vfree(list);return EA;} - else if (n_sub_seq==0){MAIN=copy_aln (EA, MAIN);MAIN->nseq=0;} - else - { - MAIN=extract_sub_aln (EA, n_sub_seq, list); - } - - - list[0]=EA->nseq; - C=extract_sub_aln (EA,1, list); - vfree(list); - - - - for ( a=0; a< EA->nseq; a++) - { - Profile=seq2R_template_profile (EA->S, EA->order[a][0]); - if ( Profile && Profile->expand) - { - SUB=copy_aln (Profile,SUB); - SUB=realloc_aln2(SUB, SUB->nseq, EA->len_aln+1); - - for ( e=0,b=0; b<= EA->len_aln; b++) - { - if (is_gap(A->seq_al[a][b])) - { - for ( d=0; dnseq; d++) - SUB->seq_al[d][b]=NO_COLOR_RESIDUE; - } - else - { - for ( d=0; dnseq; d++) - { - - if ( is_gap (Profile->seq_al[d][e])) - { - SUB->seq_al[d][b]=NO_COLOR_RESIDUE; - } - else SUB->seq_al[d][b]=EA->seq_al[a][b]; - } - e++; - } - } - for (d=0; d< SUB->nseq; d++)SUB->score_seq[d]=EA->score_seq[a]; - - MAIN=stack_aln(MAIN, SUB); - } - } - - MAIN=stack_aln(MAIN, C); - MAIN->nseq--; - MAIN->score=MAIN->score_aln=EA->score_aln; - - free_aln (SUB); - free_aln (EA); - - free_aln (C); - - return MAIN; - } - -Alignment * probabilistic_rm_aa ( Alignment *A, int pos, int len) -{ - int random_len=0; - int a, b; - int left, right; - - if ( len<0) - { - random_len=1; - len=-len; - } - - vsrand(0); - - if (pos==0)pos= (rand()%(A->len_aln-(2*len+len))) +len; - - - for ( a=0; a< A->nseq; a++) - { - if (random_len)left =rand()%len; - else left=len; - if (random_len)right=rand()%len; - else right=len; - if ( (pos-right)<0 || (pos+left)>A->len_aln) - { - add_warning ( stderr, "\nWarning: probabilistic_rm_aa, pos out of range [%s]\n", PROGRAM); - } - else - for ( b=pos-right; bseq_al[a][b]=(b==pos)?'~':'*'; - } - - ungap_aln (A); - free_sequence ( A->S, A->nseq); - A->S=aln2seq (A); - return A; - -} - -Alignment * remove_gap_column ( Alignment *A, char *mode) - { - int a, b; - char *p; - int *seq_list; - int nseq=0; - int keep_col, cl; - - - seq_list =vcalloc ( A->nseq, sizeof (int)); - while ( (p=strtok(mode, ":"))) - { - mode=NULL; - if (p[0]=='#') - { - seq_list[nseq++]=atoi(p+1)-1; - } - else if ( (a=name_is_in_list (p, A->name, A->nseq, 100))!=-1) - { - seq_list[nseq++]=a; - } - } - - if ( nseq==0) - { - for ( a=0; a< A->nseq; a++)seq_list[a]=a; - nseq=A->nseq; - } - - for ( cl=0,a=0; a<=A->len_aln; a++) - { - for (keep_col=1, b=0; b< nseq && keep_col; b++) - { - keep_col=(is_gap(A->seq_al[seq_list[b]][a]))?0:keep_col; - } - - if ( keep_col) - { - for ( b=0; b< A->nseq; b++) - { - A->seq_al[b][cl]=A->seq_al[b][a]; - } - cl++; - } - else - { - for ( b=0; b< A->nseq; b++) - { - A->seq_al[b][cl]='-'; - } - cl++; - } - } - A->len_aln=cl; - vfree (seq_list); - - return A; - } - - -Alignment * ungap_sub_aln (Alignment *A, int ns, int *ls) - { - - int a, b, c,t; - int len; - - len=strlen ( A->seq_al[ls[0]]); - - for ( c=0,a=0; aseq_al[ls[b]][a]); - if (t==ns); - else - { - for ( b=0; bseq_al[ls[b]][c]=A->seq_al[ls[b]][a]; - c++; - } - } - for ( b=0; bseq_al[ls[b]][c]='\0'; - return A; - } - -Sequence * ungap_seq ( Sequence *S) - { - int a; - - if ( !S)return NULL; - ungap(S->seq[0]); - S->max_len=S->min_len=strlen (S->seq[0]); - for ( a=0; a< S->nseq; a++) - { - ungap(S->seq[a]); - S->len[a]=strlen (S->seq[a]); - S->max_len=MAX(S->max_len,S->len[a]); - S->min_len=MAX(S->min_len,S->len[a]); - } - return S; - - } -Alignment * unalign_aln (Alignment *A, Alignment *C, int t) -{ - int a, b, pos, len; - Sequence *S; - - for (a=0; anseq; a++) - for (b=0; blen_aln; b++) - { - int res=C->seq_al[a][b]; - A->seq_al[a][b]=toupper(A->seq_al[a][b]); - if ((isdigit (res) && (res-'0')<=t)) - A->seq_al[a][b]=tolower(A->seq_al[a][b]); - } - - - for (pos=-1, a=0; anseq; a++) - { - b=0; - while ( C->seq_al[a][b]) - { - int res=C->seq_al[a][b]; - if ((isdigit (res) && (res-'0')<=t)) - { - if (pos==-1){pos=b;len=1;} - else len++; - } - else if (pos!=-1) - { - - C=unalign_aln_pos(C,a,pos, len); - pos=-1; - } - b++; - } - if ( pos!=-1){C=unalign_aln_pos(C,a,pos, len);pos=-1;} - } - S=aln2seq (A); - thread_seq_struc2aln (C, S); - A=realloc_aln2 (A, A->nseq, C->len_aln+1); - A->len_aln=C->len_aln; - for (a=0; anseq; a++)sprintf ( A->seq_al[a], "%s", C->seq_al[a]); - ungap_aln (A); - - free_sequence (S, -1); - return A; -} -Alignment * unalign_aln_pos (Alignment *A, int s, int p, int l) -{ - int a; - char *buf; - int unalign=0; - - - buf=vcalloc (l+1, sizeof (char)); - for (a=0; aseq_al[s][p+a]; - A->seq_al[s][p+a]='-'; - } - - - A=insert_gap_col (A,p, l); - for (a=0; aseq_al[s][p+a]=buf[a]; - } - vfree (buf); - return A; -} -Alignment * insert_gap_col (Alignment *A, int p, int l) -{ - int a, c; - char *buf; - char *gap; - - gap=generate_null(l); - if ( !A || p>=A->len_aln || p<0 || p<=0)return A; - - buf=vcalloc (A->len_aln+l+1, sizeof (char)); - A=realloc_aln2(A,A->nseq, A->len_aln+l+1); - for (a=0; anseq; a++) - { - c=A->seq_al[a][p]; - A->seq_al[a][p]='\0'; - sprintf ( buf, "%s%s%c%s", A->seq_al[a],gap,c,A->seq_al[a]+p+1); - sprintf (A->seq_al[a], "%s", buf); - } - vfree (buf); - A->len_aln+=l; - return A; -} -Alignment * unalign_residues (Alignment *A, int si1, int si2) -{ - char *s1, *s2, *ns1, *ns2; - int l, a, b,r1, r2; - - s1=A->seq_al[si1];s2=A->seq_al[si2]; - l=strlen (s1); - - ns1=vcalloc (2*l+1, sizeof (char)); - ns2=vcalloc (2*l+1, sizeof (char)); - - for (b=a=0; a< l; a++) - { - r1=s1[a]; r2=s2[a]; - if (is_gap(r1) || is_gap(r2) || isupper (r1) || isupper(r2)) - { - ns1[b]=(r1=='.')?'-':r1; - ns2[b]=(r2=='.')?'-':r2; - b++; - } - else - { - ns1[b]=r1; - ns2[b]='-'; - b++; - ns2[b]=r2; - ns1[b]='-'; - b++; - } - } - ns1[b]='\0'; - ns2[b]='\0'; - A->seq_al[si1]=ns1; - A->seq_al[si2]=ns2; - - - A->len_aln=strlen (ns1); - return A; -} -Alignment *degap_aln (Alignment *A) -{ - //Reomove all the gaps - int a; - for ( a=0; a< A->nseq; a++)ungap (A->seq_al[a]); - return A; -} - -Alignment *ungap_aln_n ( Alignment *A, int p) - { -/*remove all the columns of gap-only within an alignment*/ - int a, b, c; - int t; - int gp; - - if ( A->nseq==0)return A; - - for ( c=0,a=0; a< A->len_aln; a++) - { - for ( t=0,b=0; bnseq; b++) - t+=is_gap(A->seq_al[b][a]); - gp=(t*100)/A->nseq; - if (p>0 && (gp>=p || (t==A->nseq && p==100) || (t && p==1)));//Remove columns containing more than p% gaps - else if (p<0 && (gp<=p || (t==0 && p==-100) ||(t && p==-1)));//remove columns containing less than p% gaps - else - { - for ( b=0; bnseq; b++) - A->seq_al[b][c]=A->seq_al[b][a]; - c++; - } - } - for ( b=0; bnseq; b++)A->seq_al[b][c]='\0'; - A->len_aln=c; - return A; - } - -Alignment *ungap_aln ( Alignment *A) -{ - return ungap_aln_n (A, 100); -} -/* -Alignment *ungap_aln ( Alignment *A) - { - int a, b, c,t; - - for ( c=0,a=0; a< A->len_aln; a++) - { - for ( t=0,b=0; bnseq; b++) - t+=is_gap(A->seq_al[b][a]); - if (t==A->nseq); - else - { - for ( b=0; bnseq; b++) - A->seq_al[b][c]=A->seq_al[b][a]; - c++; - } - } - for ( b=0; bnseq; b++)A->seq_al[b][c]='\0'; - A->len_aln=c; - return A; - - } -*/ - - -Alignment *remove_end (Alignment *A) - { - int a, b, d; - int left, right; - - for (a=0; a< A->len_aln; a++) - { - for ( b=0, d=0; b< A->nseq; b++) - if ( !is_gap(A->seq_al[b][a]))d++; - if ( d>1)break; - } - left=a; - for (a=A->len_aln-1; a>0; a--) - { - for ( b=0, d=0; b< A->nseq; b++) - if ( !is_gap(A->seq_al[b][a]))d++; - if ( d>1)break; - } - right=a; - - return extract_aln(A, left, right+1); - } - -Alignment* condense_aln (Alignment *A) -{ - /* condense complementarz columns: - X- X - -X ....>X - X- X - - */ - int a, b, plen, n,m, r1, r2; - - plen=0; - while ( A->len_aln !=plen) - { - plen=A->len_aln; - for ( a=0; a< A->len_aln-1; a++) - { - for ( n=m=b=0; b< A->nseq; b++) - { - r1=is_gap(A->seq_al[b][a]); - r2=is_gap(A->seq_al[b][a+1]); - n+=(r1 || r2); - m+=r1; - } - - if ( n==A->nseq && m!=A->nseq) - { - for (b=0; b< A->nseq; b++) - { - if (!is_gap(A->seq_al[b][a+1])) - { - A->seq_al[b][a]=A->seq_al[b][a+1]; - A->seq_al[b][a+1]='-'; - } - } - a++; - } - } - } - A=ungap_aln(A); - return A; -} - - - - -void compress_aln ( Alignment *A) - { - - /*remove all the columns of gap-only within an alignment*/ - int a, b, c, d; - - - - for (c=0, a=0; a< A->len_aln; a++) - { - for ( b=0, d=0; b< A->nseq; b++) - if ( A->seq_al[b][a]!='-'){d=1; break;} - if ( d==0); - else - { - for (b=0; b< A->nseq; b++) - A->seq_al[b][c]=A->seq_al[b][a]; - c++; - } - } - A->len_aln=c; - - for ( a=0; a< A->nseq; a++) - A->seq_al[a][c]='\0'; - } - -Alignment *seq_coor2aln ( Sequence *S, Alignment *A, int **coor, int nseq) - { - int a; - char *buf; - - A=realloc_alignment2(A, nseq, return_maxlen ( S->seq, S->nseq)+1); - for ( a=0; a< S->nseq; a++)sprintf ( A->file[a], "%s", S->file[a]); - for ( a=0; a< nseq; a++) - { - sprintf (A->name[a], "Repeat_%d_%d", a, coor[a][0]); - buf=extract_char ( S->seq[coor[a][0]], coor[a][1]-1, coor[a][2]); - sprintf ( A->seq_al[a],"%s", buf); - vfree(buf); - A->order[a][0]=0; - A->order[a][1]=coor[a][1]-1; - } - A->nseq=nseq; - return A; - } - -Alignment *strings2aln (int nseq,...) - { - /*strings2aln(nseq, , , , ....)*/ - va_list ap; - char **list, **list2; - char **name, **name2; - Sequence *S; - Alignment *A; - int a, max; - - va_start(ap, nseq); - list=vcalloc (nseq, sizeof (char*)); - name=vcalloc (nseq, sizeof (char*)); - for ( a=0; a< nseq; a++) - { - name[a]=va_arg(ap,char*); - list[a]=va_arg(ap,char*); - - } - va_end(ap); - - for ( max=0,a=0; a< nseq; a++) - { - max=(strlen (list[a])>max)?strlen(list[a]):max; - } - list2=declare_char (nseq, max+1); - name2=declare_char (nseq, MAXNAMES+1); - - for ( a=0; a< nseq; a++) - { - sprintf ( list2[a], "%s", list[a]); - sprintf ( name2[a], "%s", name[a]); - } - - - S=fill_sequence_struc(nseq,list2,name2); - - free_char (list2, -1); - free_char (name2, -1); - vfree (list); - vfree(name); - A=seq2aln(S,NULL, 1); - return A; - } -Alignment *seq2aln ( Sequence *S, Alignment *A,int rm_gap) - { - int a; - - A=realloc_alignment2(A, S->nseq, S->max_len+1); - for ( a=0; a< S->nseq; a++)sprintf ( A->file[a], "%s", S->file[a]); - A->nseq=S->nseq; - A->max_len=S->max_len; - A->min_len=S->min_len; - - for ( a=0; a< S->nseq; a++) - { - A->order[a][0]=a; - A->order[a][1]=0; - - sprintf ( A->seq_comment[a], "%s", S->seq_comment[a]); - sprintf ( A->aln_comment[a], "%s", S->aln_comment[a]); - - sprintf ( A->name[a], "%s", S->name[a]); - sprintf ( A->seq_al[a], "%s", S->seq[a]); - - ungap ( A->seq_al[a]); - A->len[a]=strlen ( A->seq_al[a]); - - if ( rm_gap==0 || rm_gap==NO_PAD)sprintf ( A->seq_al[a], "%s", S->seq[a]); - - } - if (rm_gap!=NO_PAD)padd_aln (A); - A->S=S; - return A; - } - -Alignment *padd_aln ( Alignment *A) -{ - A->seq_al=padd_string (A->seq_al, A->nseq, '-'); - A->len_aln=strlen (A->seq_al[0]); - return A; -} - -char **padd_string ( char **string, int n,char pad) -{ - /*Pads a the strings so that they all have the same length*/ - - int max_len, a; - char *buf; - - max_len=get_longest_string (string,n, NULL, NULL); - for (a=0; anseq; a++) sprintf (A->name[a], "tmpname_%d", seqindex++); - - R=copy_aln (A, NULL); - for (c=0, a=0; a< A->len_aln; a++) - { - if ( is_gap (A->seq_al[0][a])); - else - { - for ( b=0; bnseq; b++) - R->seq_al[b][c]=A->seq_al[b][a]; - c++; - } - } - for ( a=0; a< A->nseq; a++)R->seq_al[a][c]='\0'; - R->len_aln=c; - R->S=aln2seq (R); - - free_aln (S); - free_aln (P); - free_aln (A); - - return R; -} - -Alignment * add_align_seq2aln ( Alignment *A, char *seq, char *seq_name) - { - if ( !A) - { - A=declare_aln (NULL); - A=realloc_aln2 ( A, 1, strlen (seq)+1); - A->nseq=0; - sprintf ( A->name[A->nseq], "%s", seq_name); - sprintf ( A->seq_al[A->nseq], "%s", seq); - A->nseq++; - - } - else if ( strlen (seq)!=A->len_aln) - { - fprintf ( stderr, "\nError: Attempt to stack incompatible aln and aligned sequence[FATAL]\n"); - myexit (EXIT_FAILURE); - A=NULL; - } - else - { - - A=realloc_aln2 ( A, A->nseq+1, A->len_aln+1); - sprintf ( A->name[A->nseq], "%s", seq_name); - sprintf ( A->seq_al[A->nseq], "%s", seq); - A->nseq++; - } - return A; - } - - -Alignment *aln2number (Alignment *A) - { - A->seq_al=char_array2number(A->seq_al, A->nseq); - return A; - } -Sequence *seq2number (Sequence *A) - { - A->seq=char_array2number(A->seq, A->nseq); - return A; - } - -Sequence * aln2seq (Alignment *A) -{ - return aln2seq_main(A, RM_GAP); -} -Sequence * aln2seq_main (Alignment *A, int mode) - { - Sequence *LS; - int a; - int maxlen; - - if ( !A) return NULL; - else if ( A->nseq==0)return NULL; - for (maxlen=0,a=0; anseq; a++)maxlen=MAX(maxlen, strlen (A->seq_al[a])); - - - LS=declare_sequence ( maxlen+1, maxlen+1, A->nseq); - LS->nseq=A->nseq; - for ( a=0; a< LS->nseq; a++) - { - sprintf (LS->file[a],"%s", A->file[a]); - - sprintf ( LS->seq[a], "%s", A->seq_al[a]); - - if (mode==RM_GAP)ungap ( LS->seq[a]); - - LS->len[a]=strlen ( LS->seq[a]); - - sprintf ( LS->seq_comment[a], A->seq_comment[a]); - sprintf ( LS->aln_comment[a], A->aln_comment[a]); - sprintf ( LS->name[a], "%s", A->name[a]); - } - return LS; - } - -Sequence *keep_residues_in_seq ( Sequence *S, char *list, char replacement) -{ - Alignment *A=NULL; - int a; - - A=seq2aln (S, A,1); - A=keep_residues_in_aln ( A, list, replacement); - for ( a=0; a< A->nseq; a++) - { - ungap (A->seq_al[a]); - sprintf ( S->seq[a], "%s", A->seq_al[a]); - } - free_aln (A); - return S; -} - - -Alignment *aln2short_aln ( Alignment *A, char *list, char *new, int spacer) -{ - int a, b, r, cl, l; - char *buf; - - for ( a=0; a< A->nseq; a++) - { - buf=vcalloc ( strlen (A->seq_al[a])+1, sizeof (char)); - - for (l=0,cl=0, b=0; b< A->len_aln; b++) - { - r=A->seq_al[a][b]; - if ( is_gap(r)); - else if ( is_in_set (r, list)) - { - if (cl){cl=0; buf[l++]=new[0];} - buf[l++]=r; - } - else - { - if ( cl==spacer){buf[l++]=new[0];cl=0;} - cl++; - } - - } - - buf[l]='\0'; - sprintf (A->seq_al[a], "%s", buf); - vfree (buf); - } - return A; -} - -Alignment *keep_residues_in_aln ( Alignment *A, char *list, char replacement) -{ - return filter_keep_residues_in_aln (A,NULL, 0, -1, list, replacement); -} -Alignment *filter_keep_residues_in_aln ( Alignment *A,Alignment *ST, int use_cons, int value, char *list, char replacement) -{ - char **sl; - int n, a; - - n=strlen (list); - sl=declare_char (n+1, 256); - for (a=0; a< n; a++) - sprintf ( sl[a], "%c%c", list[a], list[a]); - sprintf ( sl[a],"#%c", replacement); - A=filter_aln_convert (A, ST,use_cons,value, n+1, sl); - free_char (sl, -1); - return A; -} - - -Alignment *filter_convert_aln ( Alignment *A,Alignment *ST, int use_cons, int value, int n, ...) -{ - va_list ap; - char **sl; - int a; - va_start (ap, n); - sl=vcalloc ( n,sizeof(char*)); - for ( a=0; a< n; a++) - { - sl[a]=va_arg(ap, char * ); - } - va_end(ap); - A=filter_aln_convert (A,ST,use_cons,value, n,sl); - vfree(sl); - return A; -} - -Alignment * filter_aln ( Alignment *A, Alignment *ST, int value) - { - return filter_aln_convert (A, ST,0,value,DELETE, NULL); - } -Alignment * filter_aln_switchcase ( Alignment *A, Alignment *ST,int use_cons, int value) - { - return filter_aln_convert (A, ST,0,value,SWITCHCASE, NULL); - } -Alignment * filter_aln_upper_lower ( Alignment *A, Alignment *ST,int use_cons, int value) - { - return filter_aln_convert (A, ST,use_cons,value, LOWER, NULL); - } -Alignment * filter_aln_lower_upper ( Alignment *A, Alignment *ST,int use_cons, int value) - { - - return filter_aln_convert (A, ST,use_cons,value, UPPER, NULL); - } -Alignment * STseq2STaln ( Alignment *A, Alignment *ST) - { - int a, i=0; - - if (ST && ST->len_aln !=A->len_aln) - { - Sequence *S_T, *S_A; - - S_T=aln2seq (ST); - S_A=aln2seq (A); - - for (a=0; a< A->nseq; a++) - { - i=name_is_in_list (A->name[a], S_T->name,S_T->nseq, 100); - if (i!=-1) - { - char *s1, *s2; - s1=(S_T)->seq[i];ungap(s1); - s2=(S_A)->seq[a];ungap(s2); - - if ( strlen (s1)!=strlen(s2)) - { - fprintf ( stderr, "%s\n%s\n", s1, s2); - printf_exit (EXIT_FAILURE, stderr, "ERROR: Sequence %s has different length in the alignment and in the structure Alignment [FATAL:%s]\n", A->name[a], PROGRAM); - } - } - } - ST=copy_aln (A, ST); - thread_seq_struc2aln (ST,S_T); - } - - return ST; - } -Alignment * merge_annotation ( Alignment *A, Alignment *ST, char *seq) -{ - int s, a, b; - - ST=STseq2STaln (A, ST); - if ( seq==NULL)s=0; - else - s=name_is_in_list ( seq, A->name, A->nseq, 100); - - if (s==-1) - { - add_warning ( stderr, "\nERROR: %s is not in your MSA [FATAL: %s]", PROGRAM); - myexit (EXIT_FAILURE); - } - - for (a=0; alen_aln; a++) - { - int t, r; - - t=A->seq_al[s][a]; - if (is_gap (t))continue; - for (b=0; bnseq; b++) - { - t=A->seq_al[s][a]; - r=ST->seq_al[b][a]; - if ( isdigit (r)) - { - if (!isdigit(t) || (isdigit (t) && tseq_al[s][a]=r; - } - } - } - return A; -} - - - -Alignment * filter_aln_convert ( Alignment *A, Alignment *ST,int use_cons, int value, int n_symbol,char **symbol_list) - { - int a, b, c; - int st; - int cons=0; - - - ST=STseq2STaln (A, ST); - if ( ST && use_cons) - { - cons=name_is_in_list ("con", ST->name,ST->nseq+1, 100); - if ( cons==-1)cons=name_is_in_list ("cons", ST->name,ST->nseq+1, 100); - if ( cons==-1)cons=name_is_in_list ("Cons", ST->name,ST->nseq+1, 100); - if ( cons==-1) - { - use_cons=0; - fprintf (stderr, "WARNING: Could Not Use the Consensus Sequence [WARNING:%s]\n", PROGRAM); - } - } - - A->residue_case=KEEP_CASE; - for ( a=0; a< A->nseq; a++) - { - if(value!=10 && ST && !use_cons) - { - c=name_is_in_list (A->name[a], ST->name, ST->nseq,100); - if (c==-1)st=11; - } - - for ( b=0; b< A->len_aln; b++) - { - if ( value==10 || !ST)st=11; - else if ( ST && use_cons) - { - st=(isdigit(ST->seq_al[cons][b]))?ST->seq_al[cons][b]-'0':ST->seq_al[cons][b]; - } - else st=(isdigit(ST->seq_al[c][b]))?ST->seq_al[c][b]-'0':ST->seq_al[c][b]; - - - if ( st==value || value==-1 || st==NO_COLOR_RESIDUE) - { - if ( n_symbol==UPPER && !symbol_list)A->seq_al[a][b]=toupper (A->seq_al[a][b]); - else if ( n_symbol==LOWER && !symbol_list)A->seq_al[a][b]=tolower (A->seq_al[a][b]); - else if ( n_symbol==SWITCHCASE && !symbol_list) - { - if ( !isalpha(A->seq_al[a][b])); - else if (isupper (A->seq_al[a][b]))A->seq_al[a][b]=tolower (A->seq_al[a][b]); - else if (islower (A->seq_al[a][b]))A->seq_al[a][b]=toupper (A->seq_al[a][b]); - } - else if ( n_symbol==DELETE && !symbol_list)A->seq_al[a][b]='-'; - else - { - A->seq_al[a][b]=convert(A->seq_al[a][b],n_symbol,symbol_list); - } - } - - } - } - return A; - } - - -char ** sar_aln2motif (Alignment *A, Alignment *B, int *pos, int c); -char ** sar_aln2motif (Alignment *A, Alignment *B, int *pos, int c) -{ - static Alignment *I; - static Alignment *O; - int a, b, o, i; - - float tp,tn,fp,fn,best, sp, sn, sen2; - float best_pred=-1; - int best_motif=0; - - - int n1; - static char ***alp; - static int *alp_size; - - char ***motif_list; - int n; - - - if (!I) - { - I=copy_aln(A, NULL); - O=copy_aln(A, NULL); - } - - - - I->nseq=O->nseq=I->len_aln=O->len_aln=0; - for (a=0; alen_aln; a++) - { - if (pos[a]) - { - for (i=o=0,b=0; bnseq; b++) - { - - if ( is_gap(A->seq_al[b][a]))return 0; - if (B->seq_al[b][c]=='I')I->seq_al[i++][I->len_aln]=A->seq_al[b][a]; - else O->seq_al[o++][O->len_aln]=A->seq_al[b][a]; - } - I->len_aln++; - O->len_aln++; - } - } - - if (O->len_aln==0 || I->len_aln==0) return 0; - O->nseq=o; - I->nseq=i; - for (a=0; aseq_al[a][O->len_aln]='\0'; - for (a=0; aseq_al[a][I->len_aln]='\0'; - - alp=vcalloc ( sizeof (char**), I->len_aln); - alp_size= vcalloc ( I->len_aln, sizeof (int)); - for (a=0; alen_aln; a++) - { - char *col; - alp[a]=string2alphabet ( (col=aln_column2string (I,a)),2, &alp_size[a]); - vfree (col); - } - - - - motif_list=generate_array_string_list (I->len_aln, alp, alp_size, &n, NULL, OVERLAP); - best_pred=best_motif=0; - for (a=0; anseq; b++) - { - if (match_motif (I->seq_al[b], motif_list[a]))tp++; - else fn++; - } - for (b=0; bnseq; b++) - { - if (match_motif (O->seq_al[b], motif_list[a]))fp++; - else tn++; - } - rates2sensitivity (tp, tn, fp, fn, &sp, &sn, &sen2, &best); - - if (best> best_pred) - { - best_pred=best; - best_motif=a; - } - } - - output_Alignment_without_header ( I, stdout); - fprintf ( stdout, "\n"); - output_Alignment_without_header ( O, stdout); - - - fprintf ( stdout, "\nMotifCompound %d pred: %.2f motif: ", c, best_pred); - for (n1=0, a=0; alen_aln; a++) - { - char *m; - int l; - m=motif_list[best_motif][a]; - fprintf ( stdout, "[%s]-", m); - l=strlen (m); - n1+=(l==1 && !strm ("*",m) )?1:0; - } - fprintf (stdout, "SCORE: %d", n1); - - for (a=0; alen_aln) - { - fprintf ( stdout, "\n W:"); - for (a=0; alen_aln; a++)fprintf ( stdout, "%d", array[a]); - fprintf ( stdout, " %.4f",(float)sar_aln2r(A,B,array,0)); - return; - } - else - { - for ( a=0; alen_aln=w; - for ( a=0; alen_aln-w;a++) - { - for (b=0; bnseq; b++) - { - for (c=0; cseq_al[b][c]=B->seq_al[b][a+c]; - } - S->seq_al[b][c]='\0'; - } - - s=search_best_combo (A, S); - fprintf ( stdout,"\nP: XXXX \nP: XXXXX A=%d / %d", a, B->len_aln); - - } - -} - -float search_best_combo(Alignment *A, Alignment *B) -{ - int a, b, c, d, best_pos,nl, max; - float best_score, score; - int *list, *pos; - - int w; - int combo_mode=1; //1: greedy 2: consider all thw w combinations; - FILE *fp2; - static int **M; - max=2; - int delta=0; - w=1; - - pos=vcalloc ( A->len_aln, sizeof (int)); - list=vcalloc (A->len_aln, sizeof (int)); - nl=0; - - if ( combo_mode==1) - { - for (a=0; a< max; a++) - { - for (best_score=-9999,best_pos=0,b=0; b< A->len_aln-w; b++) - { - for (c=0; cbest_score) - { - best_score=score; - best_pos=b; - } - for (c=0; clen_aln-1, 1,NULL, tmpf); - printf_system ( "cp %s testfile", tmpf); - buf=vcalloc ( 1000, sizeof (char)); - fp=vfopen (tmpf, "r"); - best_score=-99999; - - n_preset=0; - preset=vcalloc (A->len_aln, sizeof (int)); - preset[n_preset++]=353; - preset[n_preset++]=361; - //preset[n_preset++]=365; - //preset[n_preset++]=187; - //preset[n_preset++]=397; - //preset[n_preset++]=492; - - - while ( (buf=vfgets ( buf, fp))!=NULL) - { - - array=string2num_list (buf); - - for (a=1; a<=max; a++) - { - pos[array[a]]=1; - } - for ( a=0; abest_score) - { - best_score=score; - fprintf ( stdout, "\n"); - for (a=0; alen_aln; c++) - { - sar_aln2motif (A,B,pos, c); - - } - exit (0); - HERE ("***************"); - fp2=vfopen ("aln.aln", "w"); - for (a=0; anseq; a++) - { - fprintf (fp2, ">%s\n", A->name[a]); - for ( b=0; bseq_al[a][list[b]]); - fprintf ( fp2, "\n"); - } - vfclose (fp2); - HERE ("Output aln.aln"); - if (1) - { - float tp=0, tn=0, fp=0, fn=0, pp2=0,pp=0, sn,sn2, sp; - int **result,**result2,**compound_score, *ref_score,n2,n, s, p, c; - Alignment *AI, *AO; - int simI, simO; - - compound_score=declare_int (B->len_aln, 2); - ref_score=vcalloc (nl, sizeof (int)); - - result=declare_int (B->len_aln*A->nseq*A->nseq, 2); - result2=declare_int (B->len_aln*A->nseq*A->nseq, 2); - - for (n2=c=0; c< B->len_aln; c++) - { - - int sar1, sar2; - pp=tp=tn=fp=fn=0; - if (!M)M=read_matrice ("blosum62mt"); - for (n=0,a=0; anseq-1; a++) - { - for (b=a+1; bnseq;b++) - { - for (s=0,p=0; pseq_al[a][list[p]]; - r2=A->seq_al[b][list[p]]; - if ( !is_gap (r1) && !is_gap(r2))s+=M[r1-'A'][r2-'A']; - } - result2[n2][0]=result[n][0]=s; - - sar1=B->seq_al[a][c];sar2=B->seq_al[b][c]; - - if (sar1=='I' && sar1==sar2) - { - result2[n2][1]=result[n][1]=1; - pp++;pp2++; - n++;n2++; - } - else if ( sar1==sar2 && sar1=='O') - { - ; - } - else - { - result2[n2][1]=result[n][1]=0; - n++;n2++; - } - //else if ( s1==s2=='O')result[n][1]=-1; - } - } - - if (pp==0)continue; - sort_int_inv (result, 2, 0, 0, n-1); - - - for (tp=0,a=0; alen_aln-1); - - fp2=vfopen ("compound.fasta", "w"); - for (d=0; dnseq; a++) - for (b=0; bnseq; b++) - { - r1= A->seq_al[b][list[d]]; - r2= A->seq_al[b][list[d]]; - if (is_gap(r1) || is_gap(r2))continue; - else - { - ref_score[d]+=M[r1-'A'][r2-'A']; - n++; - } - } - ref_score[d]/=n; - } - AO=copy_aln (A, NULL); - AI=copy_aln (A,NULL); - AO->len_aln=AI->len_aln=nl; - for (a=0; anseq; a++)AO->seq_al[a][nl]=AI->seq_al[a][nl]='\0'; - - for (a=0; alen_aln; a++) - { - fprintf (stdout, "\n>%4d %4d ", compound_score[a][0], compound_score[a][1]); - for (b=0; bnseq; b++) fprintf (stdout, "%c", B->seq_al[b][compound_score[a][0]]); - fprintf ( stdout, "\n"); - - for (AI->nseq=0,b=0; bnseq; b++) - { - if (B->seq_al[b][compound_score[a][0]]=='O')continue; - fprintf ( stdout, "\n\t"); - for (c=0; cseq_al[b][list[c]]); - AI->seq_al[AI->nseq][c]=A->seq_al[b][list[c]]; - } - AI->nseq++; - } - fprintf ( stdout, "\n\t"); - for (d=0; dnseq; b++) - { - if (B->seq_al[b][compound_score[a][0]]=='O')continue; - for (c=0; cnseq; c++) - { - if (B->seq_al[c][compound_score[a][0]]=='O')continue; - { - int r1, r2; - - r1= A->seq_al[b][list[d]]; - r2= A->seq_al[b][list[d]]; - if (is_gap(r1) || is_gap(r2))continue; - else score+=M[r1-'A'][r2-'A']; - n++; - } - } - } - score/=n; - if ((float)score/(float)ref_score[d]>1.2)fprintf ( stdout, "*"); - else fprintf ( stdout, " "); - } - for (AO->nseq=0,b=0; bnseq; b++) - { - if (B->seq_al[b][compound_score[a][0]]=='I')continue; - fprintf ( stdout, "\n\t"); - for (c=0; cseq_al[AO->nseq][c]=A->seq_al[b][list[c]]; - fprintf ( stdout, "%c", A->seq_al[b][list[c]]); - } - AO->nseq++; - } - simI=aln2sim (AI, "blosum62mt"); simO=aln2sim (AO, "blosum62mt"); - fprintf ( stdout, "\nDELTA: I: %d O: %d %d",simI,simO, simI-simO); - delta+=simI-simO; - } - - for ( a=0; anseq; a++) - { - - fprintf ( fp2, ">%s\n", B->name[a]); - for (b=0; blen_aln/2; b++) - fprintf ( fp2, "%c", B->seq_al[a][compound_score[b][0]]); - fprintf (fp2, "\n"); - } - vfclose (fp2); - HERE ("OUTPUT compound.fasta"); - result=result2; - n=n2; - pp=pp2; - - sort_int_inv (result, 2, 0, 0, n-1); - - - for (tp=0,a=0; anseq; b++) - C->seq_al[b][a]=A->seq_al[b][list[a]]; - C->len_aln=nl; - array=vcalloc (C->len_aln, sizeof (int)); - explore_weight_matrix (C, B, 6,0, array); - */ - - return best_score; -} - - -void count_misc (Alignment *A, Alignment *B) -{ - int **done, a, b, c, d, e,f, g, *list, n, score; - double **slist, *r; - int *pos; - int w=1; - - search_best_combo (A,B); - exit (0); - pos=vcalloc (A->len_aln+1, sizeof (int)); - /* - pos[354]=1; - pos[362]=1; - pos[366]=1; - pos[398]=1; - pos[476]=1; - - - fprintf ( stdout, "\nR: %3f " ,(float)sar_aln2r(A,B,pos,1));exit (0); - */ - for (a=0; a< A->len_aln-w; a++) - { - for (c=0; clen_aln-1; a++) - { - pos[a-w]=0; - pos[a]=1; - fprintf ( stdout, "\nP: %3d W:2 R: %3f ",a, (float)sar_aln2r(A,B,pos,0)); - } - - exit (0); - pos[2]=1; - pos[3]=1; - - - - explore_weight_matrix (A, B,3, 0,pos); - exit (0); - - for (a=0; alen_aln; a++) - for ( b=0; blen_aln; b++) - for (c=0; clen_aln; c++) - for (d=0; dlen_aln; d++) - for (f=0; flen_aln; f++) - for (g=0; glen_aln; g++) - { - e=0; - pos[e++]=a; - pos[e++]=b; - pos[e++]=c; - pos[e++]=d; - pos[e++]=f; - pos[e++]=g; - pos[e++]=-1; - fprintf ( stdout, "\n%d %d %d %d %d %d %.3f", a, b,c,d,f, g, sar_aln2r(A,B, pos,0)); - - } - - exit (0); - - - slist=declare_double (A->nseq*A->nseq*10, 2); - done=declare_int (256, 256); - list=vcalloc ( A->nseq, sizeof (int)); - - for (a=0; alen_aln-1; a++) - { - for (b =0; b<256; b++)for (c=0; c<256; c++)done[b][c]=0; - - for (b=0; bnseq-1; b++) - { - int r1, r2; - r1=A->seq_al[b][a]; - r2=A->seq_al[b][a+1]; - if (done[r1][r2])continue; - n=0; - done[r1][r2]=1; - list[n++]=b; - fprintf ( stdout, "\n%3d %c%c: %s ",a+1, r1, r2, A->name[b]); - for ( c=b+1; cnseq; c++) - { - if (r1==A->seq_al[c][a] && r2==A->seq_al[c][a+1]) - { - fprintf ( stdout, "%s ", A->name[c]); - list[n++]=c; - } - - } - if (B && n>1) - { - for (e=0,score=0,c=0; cseq_al[list[c]], B->seq_al[list[d]]); - fprintf ( stdout, " Score=%d", score/e); - } - } - } - for (score=0,e=0,a=0; anseq-1; a++) - for (b=a+1; bnseq; b++,e++) - { - score+=get_sar_sim2(B->seq_al[a], B->seq_al[b]); - } - fprintf (stdout,"AVG=%d", score/e); - for (n=0,a=0; a< A->nseq-1; a++) - { - static int **M; - int sim; - if (!M)M=read_matrice ("blosum62mt"); - - - for (b=a+1; bnseq; b++) - { - int n11, n01, n10, n00, n1; - - for (sim=d=0;dlen_aln; d++) - { - int r1, r2; - r1=A->seq_al[a][d]; - r2=A->seq_al[b][d]; - sim+=(r1==r2)?1:0; - //sim +=(M[r1-'A'][r2-'A']>0)?1:0; - } - - sim=(100*sim)/(A->len_aln);//+rand()%10; - for (n1=n00=n11=n10=n01=score=0, d=0; dlen_aln; d++) - { - int r1, r2; - r1=B->seq_al[a][d]; - r2=B->seq_al[b][d]; - n11+=(r1=='I' && r2=='I'); - n00+=(r1=='O' && r2=='O'); - n10+=(r1=='I' && r2=='0'); - n01+=(r1=='O' && r2=='I'); - n1+=(r1=='I' || r2=='I'); - } - score =((n11+n00)*100)/B->len_aln; - - //score=get_sar_sim2(B->seq_al[a], B->seq_al[b]); - - fprintf ( stdout, "\nSIM: %d SC: %d", sim, score); - slist[n][0]=(double)sim; - slist[n][1]=(double)score; - n++; - } - } - r=return_r(slist, n); - fprintf ( stdout, "\nR= %.4f", (float)r[0]); - exit (0); -} - -int aln2ngap ( Alignment *A) -{ - int ngap=0, a, b; - for (a=0; a< A->len_aln; a++) - for (b=0; bnseq; b++) ngap+=is_gap (A->seq_al[b][a]); - return ngap; -} -int * count_in_aln ( Alignment *A, Alignment *ST, int value, int n_symbol,char **symbol_list, int *table) - { - int a, b, c=0, d; - int st; - - if (!table)table=vcalloc (n_symbol, sizeof (int)); - - A->residue_case=KEEP_CASE; - for ( a=0; a< A->nseq; a++) - { - if(value!=10 && ST)for ( c=0; c< ST->nseq; c++)if ( strm(ST->name[c], A->name[a]))break; - for ( b=0; b< A->len_aln; b++) - { - if ( value==10 || !ST)st=11; - else st=(isdigit(ST->seq_al[c][b]))?ST->seq_al[c][b]-'0':ST->seq_al[c][b]; - if ( st==value || value==-1) - { - for ( d=0; dseq_al[a][b], symbol_list[d]); - } - } - } - return table; - } - -char *dna_aln2cons_seq ( Alignment *A) - { - int a, b, best; - static int **column_count; - static int **old_tot_count; - static int **new_tot_count; - static char *string1, *string2; - int **count_buf; - char r1, r2,*seq; - int NA=0, NG=1, NC=2, NT=3, IGAP=4; - static int MAX_EST_SIZE=10000; - static int size_increment=1000; - static int first; - int overlap=0, best_overlap=0; - - - seq=vcalloc ( A->len_aln+1, sizeof (char)); - - if (!column_count ) - { - column_count=vcalloc(MAX_EST_SIZE, sizeof (int*)); - for ( a=0; a< MAX_EST_SIZE; a++) - column_count[a]=vcalloc (5, sizeof (int)); - - old_tot_count=vcalloc(MAX_EST_SIZE, sizeof (int*)); - new_tot_count=vcalloc(MAX_EST_SIZE, sizeof (int*)); - A->P=declare_profile( "agct-",MAX_EST_SIZE); - string1=vcalloc (MAX_EST_SIZE, sizeof (char)); - string2=vcalloc (MAX_EST_SIZE, sizeof (char)); - } - else if (A->len_aln>MAX_EST_SIZE) - { - if ( column_count) - { - for ( a=0; a< MAX_EST_SIZE; a++) - vfree(column_count[a]); - vfree(column_count); - vfree(old_tot_count); - vfree(new_tot_count); - vfree(string1); - vfree(string2); - } - - column_count=vcalloc(MAX_EST_SIZE+ size_increment, sizeof (int*)); - for ( a=0; a< MAX_EST_SIZE+ size_increment; a++) - column_count[a]=vcalloc (5, sizeof (int)); - - old_tot_count=vcalloc(MAX_EST_SIZE+ size_increment, sizeof (int*)); - new_tot_count=vcalloc(MAX_EST_SIZE+ size_increment, sizeof (int*)); - - for (a=0; a< MAX_EST_SIZE; a++) - { - old_tot_count[a]=*(column_count++); - for ( b=0; b<5; b++)old_tot_count[a][b]=(A->P)->count[b][a]; - } - free_int ( (A->P)->count, -1); - - (A->P)->count=declare_int (5, MAX_EST_SIZE+ size_increment); - (A->P)->max_len=MAX_EST_SIZE+ size_increment; - MAX_EST_SIZE+= size_increment; - string1=vcalloc (MAX_EST_SIZE, sizeof (char)); - string2=vcalloc (MAX_EST_SIZE, sizeof (char)); - } - - - sprintf ( string1, "%s",A->seq_al[0]); - sprintf ( string2, "%s",A->seq_al[1]); - - - string1=mark_internal_gaps(string1,'.'); - string2=mark_internal_gaps(string2,'.'); - - - - for (b=0,a=0; a< A->len_aln; a++) - { - r1=string1[a]; - r2=string2[a]; - - if ( r1==r2) - { - overlap++; - } - else - { - best_overlap=MAX(overlap, best_overlap); - overlap=0; - } - - - if (!is_gap(r1) && first==1)new_tot_count[a]=old_tot_count[b++]; - else if (is_gap(r1) || first==0){new_tot_count[a]=*column_count;column_count++;}; - - if ( first==0) - { - if(r1=='a') new_tot_count[a][NA]++; - else if ( r1=='g')new_tot_count[a][NG]++; - else if ( r1=='c')new_tot_count[a][NC]++; - else if ( r1=='t')new_tot_count[a][NT]++; - else if (is_gap(r1)); - else - { - new_tot_count[a][NA]++; - new_tot_count[a][NG]++; - new_tot_count[a][NC]++; - new_tot_count[a][NT]++; - } - } - if ( a> 0 && alen_aln-1 && r1=='.') - { - new_tot_count[a][IGAP]+=((new_tot_count[a-1][NA]+new_tot_count[a-1][NG]+new_tot_count[a-1][NC]+new_tot_count[a-1][NT])); - } - - - if(r2=='a') new_tot_count[a][NA]++; - else if ( r2=='g')new_tot_count[a][NG]++; - else if ( r2=='c')new_tot_count[a][NC]++; - else if ( r2=='t')new_tot_count[a][NT]++; - else if ( r2=='.')new_tot_count[a][IGAP]++; - else if ( r2=='-'); - else - { - new_tot_count[a][NA]++; - new_tot_count[a][NG]++; - new_tot_count[a][NC]++; - new_tot_count[a][NT]++; - } - (A->P)->count[0][a]=new_tot_count[a][NA]; - (A->P)->count[1][a]=new_tot_count[a][NG]; - (A->P)->count[2][a]=new_tot_count[a][NC]; - (A->P)->count[3][a]=new_tot_count[a][NT]; - (A->P)->count[4][a]=new_tot_count[a][IGAP]; - - best_int(4,1, &best,new_tot_count[a][NA], new_tot_count[a][NG],new_tot_count[a][NC],new_tot_count[a][NT]); - if( best==0) seq[a]='a'; - else if ( best==1)seq[a]='g'; - else if ( best==2)seq[a]='c'; - else if ( best==3)seq[a]='t'; - } - - first=1; - - seq[a]='\0'; - fprintf ( stderr, "[Best Overlap: %d Residues]", best_overlap); - count_buf=old_tot_count; - old_tot_count=new_tot_count; - new_tot_count=count_buf; - - return seq; - - } - -char *aln2cons_maj ( Alignment *A, int ns, int *ls, int n_groups, char **group_list) - { - char *seq; - int a, b; - int len; - int clean_ls=0; - static int *aa; - - if ( !aa) aa=vcalloc (1000, sizeof (int)); - - len=strlen (A->seq_al[ls[0]]); - seq=vcalloc (len+1, sizeof (char)); - - if ( ns==0) - { - ns=A->nseq; - ls=vcalloc ( A->nseq, sizeof (int)); - for ( a=0; a< A->nseq; a++)ls[a]=a; - clean_ls=1; - } - - for ( a=0; aseq_al[ls[b]][a]); - aa[r]++; - if (!is_gap(r) && aa[r]>best_s) - { - best_s=aa[r]; - best_aa=r; - } - seq[a]=best_aa; - } - for (best_s=0, best_aa=0,b=0; b< ns; b++) - { - aa[tolower(A->seq_al[ls[b]][a])]=0; - } - } - if ( clean_ls)vfree(ls); - seq[a]='\0'; - - return seq; - } - -char *aln2cons_seq ( Alignment *A, int ns, int *ls, int n_groups, char **group_list) - { - char *seq; - int a, b, c; - int best_group=0; - int aa_group=0; - int *group; - int len; - int clean_ls=0; - - len=strlen (A->seq_al[ls[0]]); - seq=vcalloc (len+1, sizeof (char)); - - if ( ns==0) - { - ns=A->nseq; - ls=vcalloc ( A->nseq, sizeof (int)); - for ( a=0; a< A->nseq; a++)ls[a]=a; - clean_ls=1; - } - - - if ( !group_list) - { - group_list=declare_char ( 26, 2); - for ( a=0; a<26; a++)group_list[a][0]=a+'a'; - n_groups=26; - aa_group=1; - } - - - for ( a=0; aseq_al[ls[b]][a])) - { - for (c=0; c< n_groups; c++) - if ( is_in_set (tolower(A->seq_al[ls[b]][a]), group_list[c])) - {group[c]++; - best_group=(group[c]>group[best_group])?c:best_group; - } - } - seq[a]=group_list[best_group][0]; - } - vfree (group); - } - seq[a]='\0'; - if ( aa_group) free_char (group_list, -1); - - if ( clean_ls)vfree(ls); - - return seq; - } - -Alignment *aln2conservation ( Alignment *A, int threshold,char *seq) -{ - int a, b, c, d, i, c1, c2; - int *pos; - float *eval; - float tot=0; - float tn=0; - int **sim; - int w=0; - - pos =vcalloc (A->len_aln, sizeof (int)); - eval=vcalloc (A->len_aln, sizeof (int)); - sim=aln2sim_mat (A, "idmat"); - if (seq)i=name_is_in_list (seq, A->name, A->nseq, 100); - else i=0; - - if ( i==-1) {HERE ("%s is an unknown:sequence [FATAL]"); exit (EXIT_FAILURE);} - - for (a=0; alen_aln; a++) - { - double s; - int e; - for (c=0,e=a-w; e<=a+w; e++) - { - if (e<0 || e==A->len_aln)continue; - c1=toupper (A->seq_al[i][e]); - for (b=0; bnseq; b++) - { - c2=toupper (A->seq_al[b][a]); - if (c1==c2) - { - c++; - s=(double)((double)sim[i][b]/(double)(100)); - - } - else - { - s=(double)(((double)100-(double)sim[i][b])/(double)(100)); - } - eval[a]+=(s==0)?0:log(s); - } - } - pos[a]=(c*100)/A->nseq; - if (!is_gap(c1)){tot+=pos[a]; tn++;} - - if (pos[a]>=threshold)A->seq_al[i][a]=toupper (A->seq_al[i][a]); - else A->seq_al[i][a]=tolower (A->seq_al[i][a]); - } - fprintf (stdout, ">%s %s [i=%d]\n%s\n", A->name[i],A->aln_comment[i],i, A->seq_al[i]); - tot=(tn>0)?(float)tot/(float)tn:0; - - for (d=0,a=0; alen_aln; a++) - { - fprintf (stdout, "# %c %4d", A->seq_al[i][a],pos[a]); - - - if ( !is_gap (A->seq_al[i][a])) - { - fprintf (stdout, " LogOdd: %6.2f ", (tot==0 || pos[a]==0)?0:(float)log((float)pos[a]/tot)); - fprintf ( stdout, " Pos: %5d E-Val: %9.2f", ++d, eval[a]/(A->nseq)); - } - fprintf ( stdout, "\n"); - } - fprintf ( stdout, "#average conservation: %.2f", tot); - exit (EXIT_SUCCESS); -} -char *aln2cons_seq_mat ( Alignment *A, char *mat_name) -{ - return sub_aln2cons_seq_mat (A, A->nseq, NULL, mat_name); -} -char *sub_aln2cons_seq_mat2 ( Alignment *A,int ns, char **ls, char *mat_name) -{ - char *cons; - int *list; - list=name_array2index_array(ls, ns, A->name, A->nseq); - cons=sub_aln2cons_seq_mat ( A,ns, list, mat_name); - vfree (list); - return cons; -} - -char *sub_aln2cons_seq_mat ( Alignment *A,int ns, int *ls, char *mat_name) -{ - int a, b, c, s; - char *seq, r1, r2; - int **mat; - int score=0, best_score=0, best_r=0; - int len; - int naa; - - mat=read_matrice (mat_name); - len=strlen ( A->seq_al[(ls==NULL)?0:ls[0]]); - seq=vcalloc (len+1, sizeof (char)); - for ( a=0; aseq_al[s][a]))continue; - else - { - naa++; - r2=A->seq_al[s][a]; - score+=mat[r1-'A'][r2-'A']; - } - } - if (naa==0)best_r='-'; - if ( b==0 || score>best_score){best_score=score; best_r=r1;} - } - seq[a]=best_r; - } - free_int (mat, -1); - return seq; -} - -int seq_list2in_file ( TC_method *M, Sequence *S, char *list, char *file) -{ - X_template *T=NULL; - - if ( !S)return 0; - else - { - int t; - t=tolower(M->seq_type[0]); - - if ( t=='s') - { - return seq_list2fasta_file ( S, list, file); - - } - else - { - FILE *fp, *fp2; - int a, n, s, c; - int *slist; - - - - fp=vfopen ( file, "w"); - slist=string2num_list (list); - n=slist[0]; - - if (strlen (M->seq_type) >1) - { - add_warning( stderr, "\nERROR: Mixed seq_type not supported for external methods\n[FATAL:%s]", PROGRAM); - } - - for ( a=2; aT[s])->P; - else if (t=='r')T=(S->T[s])->R; - else if (t=='g')T=(S->T[s])->G; - - if (!T && t=='r') - { - fprintf ( fp, ">%s\n%s%s", S->name[s], S->seq[s], LINE_SEPARATOR); - } - else if ( T && T->template_file && T->template_file[0]) - { - fp2=vfopen (T->template_file, "r"); - while ( (c=fgetc (fp2))!=EOF) - { - fprintf ( fp, "%c", c); - } - fprintf (fp, "%s", LINE_SEPARATOR); - vfclose (fp2); - } - } - - fprintf (fp, "TARGET_SEQ_NAME: "); - for (a=2; aname[slist[a]])); - fprintf ( fp, "%s", LINE_SEPARATOR); - - vfclose (fp); vfree (slist); - - } - - return 1; - } -} - -int seq_list2fasta_file( Sequence *S, char *list, char *file) - { - FILE *fp; - int n, a, s; - static char *buf; - static int blen; - int l; - - - /*Buf is used because cmalloced functions cannot go through strtok*/ - if ( !S)return 0; - else - { - fp=vfopen ( file, "w"); - if ( !list) - { - for ( a=0; anseq; a++) - { - fprintf ( fp, ">%s %s\n%s\n", decode_name (S->name[a], CODE),S->name[a], S->seq[a]); - } - } - else - { - l=strlen (list); - if ( l>blen) - { - if (buf)vfree(buf); - buf=vcalloc ( strlen (list)+1, sizeof (char)); - sprintf ( buf, "%s", list); - blen=l; - } - n=atoi(strtok (list,SEPARATORS)); - for ( a=0; a< n; a++) - { - s=atoi(strtok (NULL, SEPARATORS)); - fprintf ( fp, ">%s %s\n%s\n", decode_name (S->name[s], CODE), S->name[a],S->seq[s]); - } - } - vfclose (fp); - } - return 1; - } -Structure * seq2struc ( Sequence *S, Structure *ST) - { - int a, b; - - for ( a=0; a< S->nseq; a++) - for ( b=0; b< S->len[a]; b++) - ST->struc[a][b+1][ST->n_fields-1]=S->seq[a][b]; - return ST; - } - -void aln2struc (Alignment *A, Structure *ST) - { - int a, b, c; - - for ( a=0; a< A->nseq; a++) - for (c=0, b=0; b< A->len_aln; b++) - { - if ( !is_gap (A->seq_al[a][b])) - { - ST->struc[a][c][ST->n_fields-1]=A->seq_al[a][b]; - c++; - } - } - } -Alignment *stack_aln (Alignment *A, Alignment *B) - { - int a,b; - int max_len=0, max_nseq=0; - if ( B==NULL)return A; - if ( A==NULL)return B; - - max_nseq=A->nseq+B->nseq; - for (a=0; a< A->nseq; a++)max_len=MAX(strlen(A->seq_al[a]),max_len); - for (a=0; a< B->nseq; a++)max_len=MAX(strlen(B->seq_al[a]),max_len); - - A=realloc_aln2 ( A,max_nseq,max_len+1); - - for (a=A->nseq,b=0; b< B->nseq; b++, a++) - { - sprintf ( A->seq_comment[a] , "%s", B->seq_comment[b]); - sprintf ( A->aln_comment[a] , "%s", B->aln_comment[b]); - - sprintf ( A->seq_al [a] , "%s", B->seq_al [b]); - sprintf ( A->name [a] , "%s", B->name[b]); - sprintf ( A->file [a], "%s" , B->file[b]); - A->order[a][0]=B->order[b][0]; - A->order[a][1]=B->order[b][1]; - A->score_seq[a]=B->score_seq[b]; - A->len[a]=B->len[b]; - } - - A->len_aln=MAX(A->len_aln, B->len_aln); - A->nseq=A->nseq+B->nseq; - A->score_aln=A->score_aln+B->score_aln; - - A->finished=A->finished+B->finished; - return A; - } - -Alignment *chseqIaln(char *name, int seq_n, int start,int len,Sequence *S, int seqIaln, Alignment *A) - { - char *seq; - - seq=extract_char ( S->seq[seq_n], start, len); - A=realloc_aln2 (A, (A==NULL)?(seqIaln+1):MAX(A->nseq,seqIaln+1), ((A==NULL)?(strlen (seq)):MAX(strlen (seq),A->len_aln))+1); - - - sprintf ( A->seq_al[seqIaln], "%s",seq); - - - A->order[seqIaln][0]=seq_n; - A->order[seqIaln][1]=start; - sprintf ( A->name[seqIaln], "%s", name); - A->nseq=MAX(A->nseq, seqIaln+1); - A->len_aln=return_maxlen(A->seq_al, A->nseq); - A->S=S; - vfree (seq); - return A; - } - -Alignment * aln_gap2random_aa(Alignment *A) - { - int a, b,l; - char alp[200]; - - if (strm ( (A->S)->type, "PROTEIN")) - sprintf ( alp, "acefghiklmnpqrstuvwy"); - else if ( strm ( (A->S)->type, "DNA") ||strm ( (A->S)->type, "RNA") ) - sprintf ( alp, "agct"); - l=strlen (alp); - - - for (a=0; anseq; a++) - for ( b=0; blen_aln; b++) - if ( is_gap (A->seq_al[a][b]))A->seq_al[a][b]=alp[(int)rand()%(l)]; - return A; - } - -Alignment * make_random_aln(Alignment *A,int nseq, int len, char *alphabet) - { - int a; - - - A=realloc_aln2(A, nseq, len+1); - - A->nseq=0; - A->len_aln=len; - for ( a=0; a< A->nseq; a++)sprintf ( A->file[a], "random alignment"); - for ( a=0; a< nseq; a++) - A=add_random_sequence2aln(A,alphabet); - return A; - } -Alignment * add_random_sequence2aln( Alignment *A, char *alphabet) - { - int a, n; - - vsrand(0); - - n=strlen(alphabet); - A=realloc_alignment2 (A, A->nseq+1, A->len_aln+1); - - for ( a=0; a< A->len_aln; a++)A->seq_al[A->nseq][a]=alphabet[rand()%n]; - if (! A->name[A->nseq][0]) - { - for ( a=0; a<10; a++)A->name[A->nseq][a]=alphabet[rand()%n]; - A->name[A->nseq][a]='\0'; - } - - A->nseq++; - return A; - } - -Sequence *get_defined_residues( Alignment *A) - { - char *buf; - Sequence *S; - int a, b, s, l, r; - if ( !A || !A->S) return NULL; - - S=duplicate_sequence (A->S); - for ( a=0; a< S->nseq; a++) - for ( b=0; b< S->len[a]; b++)S->seq[a][b]=UNDEFINED_RESIDUE; - buf=vcalloc(A->len_aln+1,sizeof (char)); - for ( a=0; a< A->nseq; a++) - { - sprintf ( buf, "%s",A->seq_al[a]); - ungap(buf); - l=strlen (buf); - s=A->order[a][0]; - - for ( b=1; b<= l; b++) - { - r=A->seq_cache[s][b]; - - if ( r>=0)S->seq[s][r-1]=(A->S)->seq[s][r-1]; - } - } - vfree(buf); - return S; - } -Alignment *thread_defined_residues_on_aln ( Alignment *A, Sequence *S1) - { - int a, b; - int gap, r,s, r2; - for ( a=0; a< A->nseq; a++) - { - s=A->order[a][0]; - r=A->order[a][1]; - for (b=0;b< A->len_aln; b++) - { - gap=is_gap(A->seq_al[a][b]); - - if (!gap) - { - r+=!gap; - r2=A->seq_cache[s][r]-1; - - if (r2>=0 && S1->seq[s][r2]==UNDEFINED_RESIDUE) - A->seq_al[a][b]=UNDEFINED_RESIDUE; - } - } - } - return A; - } - -int ** trim_aln_borders (char **seq1, char **seq2, int nseq) - { - int a, b, c,l1,l2; - char *buf1; - char *buf2; - int max; - - - - - max=MAX(get_longest_string (seq1,-1, NULL, NULL),get_longest_string (seq2,-1, NULL, NULL))+1; - buf1=vcalloc ( max, sizeof(char)); - buf2=vcalloc ( max, sizeof(char)); - - for ( a=0; a< nseq; a++) - { - sprintf ( buf1, "%s", seq1[a]); - sprintf ( buf2, "%s", seq2[a]); - - - - ungap (buf1); - ungap (buf2); - - if (str_overlap ( buf1, buf2,'*')!=0) - { - l1=strlen ( seq1[a]); - l2=strlen ( seq2[a]); - for ( b=0,c=0; c< l1; c++) - if ( !is_gap(seq1[a][c]))seq1[a][c]=buf1[b++]; - seq1[a][c]='\0'; - for ( b=0,c=0; c< l2; c++) - if ( !is_gap(seq2[a][c]))seq2[a][c]=buf2[b++]; - seq2[a][c]='\0'; - } - } - vfree (buf1); - vfree (buf2); - return NULL; - - } -Sequence * merge_seq ( Sequence *IN, Sequence *OUT) - { - int a; - - if ( OUT==NULL)return duplicate_sequence (IN); - else - { - if ( IN && check_list_for_dup( IN->name, IN->nseq)) - { - fprintf ( stderr, "\nERROR: %s is duplicated in file %s[FATAL]\n", check_list_for_dup( IN->name, IN->nseq), IN->file[0]); - myexit (EXIT_FAILURE); - } - for ( a=0; a< IN->nseq; a++) - if ((OUT=add_sequence ( IN, OUT, a))==NULL)return NULL; - return OUT; - } - } - -Alignment *seq_name2removed_seq_name(Sequence *S, Alignment *NA, float **diff) -{ - int a, b, rb, s; - float min_diff; - for (a=0; a< S->nseq; a++) - { - if (name_is_in_list( S->name[a], NA->name, NA->nseq, 100)!=-1) continue; - for ( min_diff=100, s=0, b=0; b< NA->nseq; b++) - { - rb=name_is_in_list ( NA->name[b], S->name, S->nseq, 100); - if ( diff[a][rb]seq_comment[s], " "); - strcat ( NA->seq_comment[s], S->name[a]); - } - return NA; -} - - - - -int seq_name2index (char *name, Sequence *S) -{ - if ( !S) return -1; - else return name_is_in_list ( name, S->name, S->nseq, MAXNAMES+1); -} -char * seq_name2coor ( char *s, int *start, int *end, char sep) -{ - /*name|start|end */ - char n1[100], n2[100]; - int a=0, b=0, c=0; - - n1[0]=n2[0]='\0'; - start[0]=end[0]=0; - - while ( s[a]!=sep && s[a]!='\0')a++; - if ( s[a]=='\0')return s; - else - s[a++]='\0'; - - - - while ( s[a]!=sep && s[a]!='\0')n1[b++]=s[a++]; - - if ( s[a]=='\0'){n1[b]='\0';if ( n1[0])start[0]=atoi(n1);return s;} - else s[a++]=n1[b]='\0'; - - - while ( s[a]!=sep && s[a]!='\0')n2[c++]=s[a++]; - n2[c]='\0'; - - - if ( n1[0])start[0]=atoi(n1); - if ( n2[0])end[0]=atoi(n2); - - - return s; -} - -Sequence *extract_one_seq(char *n,int start, int end, Alignment *S, int keep_name) - { - - int seq, a; - FILE*fp; - char *name; - Sequence *OUT_S; - - - if ( n[0]=='#')seq=S->nseq; - else if ( (seq=name_is_in_list (n, S->name, S->nseq, 100)+1)!=0); - else if (is_number (n) && (seq=atoi(n))!=0) seq=atoi(n); - else - { - fprintf ( stderr, "\nCould not find Sequence %s [FATAL]", n); - myexit (EXIT_FAILURE); - } - seq--; - - name=vtmpnam ( NULL); - fp=vfopen ( name, "w"); - if ( start && end &&!keep_name)fprintf (fp, ">%s_%d_%d\n",S->name[seq],start, end); - else if ( start && end==0 && !keep_name)fprintf (fp, ">%s_%d_%d\n",S->name[seq],start,(int)strlen ( S->seq_al[seq])); - else fprintf (fp, ">%s\n", S->name[seq]); - - if ( start==0 && end==0){fprintf (fp, "%s\n", S->seq_al[seq]);} - else if (end==0){fprintf (fp, "%s\n", S->seq_al[seq]+start-1);} - else - { - for ( a=start-1; aseq_al[seq][a]);} - fprintf ( fp, "\n"); - } - - - vfclose (fp); - OUT_S=get_fasta_sequence_num (name, NULL); - - return OUT_S; - } - - - -Sequence * extract_sub_seq( Sequence *COOR, Sequence *S) - { - int a, b, c,s; - int start, end; - - for ( a=0; a< S->nseq; a++) - { - if ( (s=name_is_in_list ( S->name[a], COOR->name, COOR->nseq, 100))!=-1) - { - - sscanf ( COOR->seq_comment[s], "%d %d", &start, &end); - for (c=0,b=start-1; b< end; b++, c++)S->seq[a][c]=S->seq[a][b]; - S->seq[a][c]='\0'; - sprintf ( S->seq_comment[a], "%s",COOR->seq_comment[s]); - - } - } - S=reorder_seq ( S, COOR->name, COOR->nseq); - return S; - } - - - -char * aln_column2string (Alignment *A, int p) - { - char *s; - int a; - if (p>=A->len_aln) - { - HERE ("ERROR: index (p=%d) loger than aln (l=%d) [FATAL]", p, A->len_aln); - exit (EXIT_FAILURE); - } - else - { - s=vcalloc (A->nseq+1, sizeof (char)); - for (a=0; a< A->nseq; a++)s[a]=A->seq_al[a][p]; - } - return s; - } -Alignment * fix_aln_seq ( Alignment *A, Sequence *S) - { - int a, b, c; - char *buf1, *buf2; - int g0, g1, nr0, nr1; - int id, tot; - Alignment *B; - - - /*This function establishes the correspondance between every (1..N+1) residue of each aligned sequence - and its correspondance in S: - A->seq_cache[a][b]=x means that residue b of aligned sequence a corresponds to residue x of the sequence with tye same index in S - A->seq_cache[a][b]=0 means there is no correspondance. - a is the index of the sequence - Applying this function is needed for turning an alignment into a constraint list - */ - - - if ( S==NULL)return A; - - A->seq_cache=declare_int ( S->nseq, MAX((A->len_aln+1), S->max_len+1)); - - for (a=0; a< S->nseq; a++) - for ( b=0; b< A->len_aln; b++)A->seq_cache[a][b]=-1; - - buf1=buf2=NULL; - for ( a=0; a< S->nseq; a++) - { - for (b=0; b< A->nseq; b++) - { - if (strm ( S->name[a], A->name[b])) - { - A->order[b][0]=a; - - vfree (buf1); - buf1=vcalloc ( A->len_aln+1, sizeof (char)); - sprintf (buf1, "%s", A->seq_al[b]); - ungap (buf1); - upper_string (buf1); - - vfree(buf2); - buf2=vcalloc (strlen(S->seq[a])+1, sizeof (char)); - sprintf (buf2, "%s",S->seq[a]); - ungap (buf2); - upper_string (buf2); - - - - if ( strm (buf1,buf2)) - { - - for ( c=0; clen[a]; c++)A->seq_cache[a][c+1]=c+1; - } - else - { - - B=align_two_sequences (buf2,buf1,"blosum62mt",-4,-1, "myers_miller_pair_wise"); - if ( getenv ("DEBUG_RECONCILIATION")) - { - fprintf (stderr, "\n[DEBUG_RECONCILIATION:fix_aln_seq]\nReconciliation of %s\nA=Ref_sequence\nB=New_seq", S->name[a]); - print_aln (B); - } - - for (id=0, tot=0,nr0=0,nr1=0,c=0; clen_aln; c++) - { - g0=is_gap(B->seq_al[0][c]); - g1=is_gap(B->seq_al[1][c]); - nr0+=1-g0; - nr1+=1-g1; - if ( !g0 && !g1) - { - tot++; - id+=(B->seq_al[0][c]==B->seq_al[1][c])?1:0; - A->seq_cache[a][nr1]=nr0; - } - else if (g0 && !g1) - { - A->seq_cache[a][nr1]=0; - } - } - if ( ((id*100)/tot)<20) - { - print_aln (B); - fprintf ( stderr, "\nTwo different sequences have the same name: %s", S->name[a]); - fprintf ( stderr, "\nIf %s is a PDBID, Make sure it identifies the right chain (A, B, 1, 2...)", S->name[a]); - fprintf ( stderr, "\nChain number or index must be added to the PDB id (i.e. 1gowA)"); - fprintf ( stderr, "\nIf You want to use %s anyway, rename it with a non-PDB identifier such as seq_%s\n",S->name[a],S->name[a]); - myexit (EXIT_FAILURE); - } - - free_sequence ( B->S, -1); - free_aln (B); - } - - } - } - } - vfree(buf1);vfree(buf2); - return A; - } - -Sequence * add_prf2seq ( char *file, Sequence *S) - { - char **new_seq; - Sequence *NS; - - if ( !is_aln (file)&& !is_seq (file))return S; - else - { - X_template *R; - Alignment *A; - - - R=fill_R_template(file,file, S); - - A=(R->VR)->A; - ((R->VR)->A)->expand=1; - new_seq=declare_char (1,A->len_aln+1); - sprintf ( new_seq[0], "%s",aln2cons_seq_mat(A, "blosum62mt")); - - NS=fill_sequence_struc(1, new_seq,A->file); - S=add_sequence (NS, S, 0); - (S->T[S->nseq-1])->R=R; - - free_sequence (NS, NS->nseq); - free_char( new_seq, -1); - - return S; - } - } -int prf_in_seq ( Sequence *S) -{ - int a; - - if ( !S) return 0; - else - { - for ( a=0; a< S->nseq; a++) - if (seq2R_template_profile(S, a)) return 1; - } - return 0; -} -Sequence * add_sequence ( Sequence *IN, Sequence *OUT, int i) - { - int s, a; - - char *buf; - if (OUT==NULL) - { - - OUT=duplicate_sequence (IN); - return OUT; - } - for (a=0; anseq; a++) - { - Alignment *P; - P=seq2R_template_profile (OUT, a); - if (!P) continue; - else if (name_is_in_list (IN->name[i], P->name, P->nseq, 100)!=-1) return OUT; - } - - /*Adds sequence i of IN at the end of OUT*/ - - if ((s=name_is_in_list ( IN->name[i], OUT->name, OUT->nseq,STRING))==-1 ) - { - OUT=realloc_sequence (OUT, OUT->nseq+1, IN->len[i]); - sprintf ( OUT->name[OUT->nseq],"%s",IN->name[i]); - sprintf ( OUT->file[OUT->nseq],"%s",IN->file[i]); - sprintf ( OUT->seq_comment[OUT->nseq],"%s",IN->seq_comment[i]); - sprintf ( OUT->aln_comment[OUT->nseq],"%s",IN->aln_comment[i]); - - sprintf ( OUT->seq[OUT->nseq],"%s",IN->seq[i]); - OUT->len[OUT->nseq]=IN->len[i]; - OUT->T[OUT->nseq][0]=IN->T[i][0]; - OUT->nseq++; - return OUT; - } - else if ( s!=-1 && !case_insensitive_strcmp ( IN->seq[i], OUT->seq[s])) - { - - if ( getenv4debug("DEBUG_RECONCILIATION"))fprintf ( stderr,"[DEBUG_RECONCILIATION:add_sequence]\n%s\n%s\n", IN->seq[i], OUT->seq[s]); - - add_warning (stderr, "WARNING: DISCREPANCY:%s in [%s] and [%s]\n", IN->name[i], IN->file[i], OUT->file[s]); - - - if (((buf=build_consensus(IN->seq[i], OUT->seq[s],"cfasta_pair_wise" ))!=NULL)||((buf=build_consensus(IN->seq[i], OUT->seq[s],"myers_miller_pair_wise" ))!=NULL)) - { - - OUT->max_len=MAX(OUT->max_len, strlen(buf)); - OUT->min_len=MIN(OUT->min_len, strlen(buf)); - OUT->seq =realloc_char ( OUT->seq, -1, -1,OUT->nseq,OUT->max_len+1); - - sprintf ( OUT->seq[s],"%s",buf); - OUT->len[s]=strlen (buf); - vfree (buf); - return OUT; - } - else - { - fprintf ( stderr, "IMPOSSIBLE TO RECONCILIATE SOME SEQUENCES[FATAL:%s]\n", PROGRAM); - print_aln ( align_two_sequences (IN->seq[i], OUT->seq[s], "idmat", 0, 0, "fasta_pair_wise")); - myexit (EXIT_FAILURE); - return NULL; - } - - } - else - { - return OUT; - } - } - - -Sequence * trim_seq ( Sequence *A, Sequence *B) - { - int a; - Sequence *R; - - if (A->nseq>B->nseq) - { - Sequence *I; - I=A;A=B;B=I; - } - - R=declare_sequence (MIN(A->min_len,B->min_len), MAX(A->max_len, B->max_len), MIN(A->nseq, B->nseq)); - R->nseq=0; - - for (a=0; a< A->nseq; a++) - { - if ( name_is_in_list ( A->name[a], B->name, B->nseq,STRING+1)!=-1) - { - sprintf ( R->name[R->nseq], "%s", A->name[a]); - sprintf ( R->seq[R->nseq], "%s", A->seq[a]); - sprintf ( R->file[R->nseq], "%s", A->file[a]); - sprintf ( R->aln_comment[R->nseq], "%s", A->aln_comment[a]); - sprintf ( R->seq_comment[R->nseq], "%s", A->seq_comment[a]); - - R->len[R->nseq]=A->len[a]; - R->nseq++; - } - } - return R; - } - -Sequence * trim_aln_seq ( Alignment *A, Alignment *B) - { - int a; - static char **name_list; - int n=0; - Sequence *SA, *SB; - int **cache_A=NULL; - int **cache_B=NULL; - int * p; - - /*This function inputs two alignments A and B - It removes sequences that are not common to both of them - It rearange the sequences so that they are in the same order - A decides on the order - The Sequences (A->S) and (B->S) are treated the same way - Sequences are also merged in order to detects discrepencies. - A pointer to S is returned - */ - if (name_list)free_char (name_list, -1); - name_list=declare_char (MAX(A->nseq, B->nseq), STRING+1); - - for ( a=0; a< A->nseq; a++) - { - if ( name_is_in_list ( A->name[a], B->name, B->nseq,STRING)!=-1) - { - sprintf ( name_list[n++], "%s", A->name[a]); - } - } - - - - reorder_aln ( A, name_list, n); - if (A->seq_cache)cache_A=duplicate_int (A->seq_cache, -1, -1); - if (B->seq_cache)cache_B=duplicate_int (B->seq_cache, -1, -1); - reorder_aln ( B, name_list, n); - for ( a=0; a< n; a++) - { - if ( cache_A) - { - p=A->seq_cache[A->order[a][0]]; - A->seq_cache[A->order[a][0]]=cache_A[a]; - cache_A[a]=p; - } - if ( cache_B) - { - p=B->seq_cache[B->order[a][0]]; - B->seq_cache[B->order[a][0]]=cache_B[a]; - cache_B[a]=p; - } - A->order[a][0]=B->order[a][0]=a; - } - free_int(A->seq_cache, -1); - free_int(B->seq_cache, -1); - - A->seq_cache=cache_A; - B->seq_cache=cache_B; - - - - SA=aln2seq(A); - SB=aln2seq(B); - - A->S=B->S=merge_seq (SA, SB); - return A->S; - } -Sequence * trim_aln_seq_name ( Alignment *A, Alignment *B) - { - int a; - Sequence *S; - - /*This function inputs two alignments A and B - It removes sequences that are not common to both of them - It rearange the sequences so that they are in the same order - A decides on the order - */ - S=declare_sequence ( 1, 1, A->nseq+B->nseq); - S->nseq=0; - for ( a=0; a< A->nseq; a++) - { - if ( name_is_in_list ( A->name[a], B->name, B->nseq,STRING)!=-1) - { - sprintf ( S->name[S->nseq++], "%s", A->name[a]); - } - } - return S; - } - - - -char ** rm_name_tag (char **name, int nseq, char *tag) -{ - int a , b, ntag; - char **tag_list; - char *s; - char **template_list; - if ( !name )return NULL; - - tag_list=declare_char (10, 4); - - if ( tag) - { - ntag=1; sprintf ( tag_list[0], "%s", tag); - } - else - { - ntag=0; - sprintf ( tag_list[ntag++], "_S_"); - sprintf ( tag_list[ntag++], "_G_"); - } - template_list=declare_char (nseq, 100); - for ( a=0; a%s _%s_ %s", name[a], s+1, s+3); - break; - } - } - } - - free_char (tag_list, -1); - return template_list; -} -Sequence * swap_header ( Sequence *S, Sequence *H) -{ - int a, b, n; - - for ( a=0; a< S->nseq; a++) - { - if ( (n=name_is_in_list (S->name[a],H->name, H->nseq, 1000))!=-1) - { - char **list; - - - list=string2list (H->seq_comment[n]); - if ( list==NULL || atoi(list[0])==1)continue; - S->seq_comment[a]='\0'; - sprintf (S->name[a], "%s%s%s",H->name[n], list[1], list[2]); - vfree ( S->seq_comment[a]);S->seq_comment[a]=vcalloc ( strlen (H->seq_comment[n])+1, sizeof (char)); - for (b=3; b< atoi(list[0]); b++)S->seq_comment[a]=strcat (S->seq_comment[a], list[b]); - free_char (list, -1); - } - } - return S; -} - - -Sequence * profile_seq2template_seq ( Sequence *S, char *template_file, Fname *F) -{ - /*This function fetches potential templates associated with sequences within a profile*/ - int i; - Alignment *A; - char *tmp; - - if ( !check_file_exists (template_file)) return S; - tmp=vtmpnam (NULL); - for ( i=0; i< S->nseq; i++) - { - if ( (A=seq2R_template_profile (S, i))) - { - printf_system ("cp %s %s", template_file, tmp);//seq2template over-writes the temnplate file with a list of the templates effectively encounter - A->S=aln2seq (A); - A->S=seq2template_seq (A->S, tmp, F); - if (!A->S)return NULL; - } - } - - return S; -} - -Sequence * seq2template_type(Sequence *Seq) -{ - //add template - int a, e; - int s; - struct X_template *S=NULL; - struct X_template *P=NULL; - struct X_template *R=NULL; - struct X_template *G=NULL; - struct X_template *F=NULL; - struct X_template *T=NULL; - struct X_template *E=NULL; - struct X_template *U=NULL; - Alignment *A; - - - e=' '; - for (a=0; a< Seq->nseq; a++) - { - if (!Seq->T[a])continue; - //HERE ADD a Template - P=seq_has_template (Seq, a, "_P_"); - S=seq_has_template (Seq, a, "_S_"); - R=seq_has_template (Seq, a, "_R_"); - G=seq_has_template (Seq, a, "_G_"); - F=seq_has_template (Seq, a, "_F_"); - T=seq_has_template (Seq, a, "_T_"); - E=seq_has_template (Seq, a, "_E_"); - U=seq_has_template (Seq, a, "_U_"); - - s=(!P)?1:0; - sprintf ( (Seq->T[a])->seq_type, "%c%c%c%c%c%c%c%c", (P)?'P':e, (S)?'S':e, (S &&!P)?'s':e,(R)?'R':e, (G)?'G':e,(T)?'T':e,(E)?'E':e,(U)?'U':e); - - if (R && (A=seq2R_template_profile (Seq,a))) - { - - A->S=seq2template_type ( A->S); - } - } - return Seq; -} - -char * string_contains_template_tag (char *string_in) -{ - char string[100]; - - if ( strstr (string, "_P_"))return "_P_"; - if ( strstr (string, "_S_"))return "_S_"; - if ( strstr (string, "_R_"))return "_R_"; - if ( strstr (string, "_G_"))return "_G_"; - if ( strstr (string, "_F_"))return "_F_"; - if ( strstr (string, "_T_"))return "_T_"; - if ( strstr (string, "_E_"))return "_E_"; - if ( strstr (string, "_U_"))return "_U_"; - - return NULL; -} -static int check_blast_is_installed (char *server); - - - -static int check_blast_is_installed (char *server) -{ - if (strm (server, "EBI")); - else if ( strm (server, "NCBI")) - return check_program_is_installed (NCBIWEBBLAST_4_TCOFFEE,NULL, NULL,NCBIWEBBLAST_ADDRESS, INSTALL_OR_DIE); - else if ( strm (server, "LOCAL")) - return check_program_is_installed (NCBIBLAST_4_TCOFFEE,NULL, NULL,NCBIBLAST_ADDRESS, INSTALL_OR_DIE); - return 1; -} - - -Sequence * vremove_seq_template_files(Sequence *S) -{ - return handle_seq_template_file (S, "remove"); -} -Sequence * display_seq_template_files(Sequence *S) -{ - return handle_seq_template_file (S, "display"); -} -Sequence * handle_seq_template_file (Sequence *S, char *mode) -{ - int a; - Template *T; - - for (a=0; a< S->nseq; a++) - { - T=S->T[a]; - if (T) - { - handle_X_template_files (T->P, mode); - handle_X_template_files (T->F, mode); - handle_X_template_files (T->R, mode); - handle_X_template_files (T->T, mode); - handle_X_template_files (T->E, mode); - } - } - - return S; -} -int handle_X_template_files ( X_template *T, char *mode) - { - if (!T)return 0; - - if ( strm (mode, "remove")) - { - vremove (T->template_file); - vremove (T->template_name); - } - else if (strm (mode, "display")) - { - char buf[100]; - sprintf ( buf, "Template %s", template_type2type_name (T->template_type)); - if (check_file_exists (T->template_name))display_output_filename ( stdout,buf,T->template_format,T->template_name, STORE); - } - else - { - printf_exit (EXIT_FAILURE, stderr, "\nUnkonwn mode %s for template handling [FATAL:%s]", mode, PROGRAM); - } - return 1; - } -Sequence * seq2template_seq ( Sequence *S, char *template_list, Fname *F) -{ - /*Expected format for the template file: - >seq_name _X_ Target_template - X: S for Structures - G for genomes (Exoset) - When alternative templates are given for a sequence, the first one superseeds all the others - */ - - /*Fill the sequences*/ - /*1: No template*/ - char buf[1000]; - - int PmC,PmI,PMI; - int BmC,BmI,BMI; - char *server; - char *pdb_db,*prot_db; - - int remove_template_file=0; - - remove_template_file=get_int_variable ("remove_template_file"); - server=get_string_variable ("blast_server"); - pdb_db=get_string_variable ("pdb_db"); - prot_db=get_string_variable ("prot_db"); - - PmI=get_int_variable ("pdb_min_sim"); - PMI=get_int_variable ("pdb_max_sim"); - PmC=get_int_variable ("pdb_min_cov"); - - BmI=get_int_variable ("prot_min_sim"); - BMI=get_int_variable ("prot_max_sim"); - BmC=get_int_variable ("prot_min_cov"); - - if ( (template_list && template_list[0]=='\0') || strm ( template_list, "no_template")) - { - return S; - } - else if ( strstr (template_list, "MODE_"))//pre_set mode - { - return seq2template_seq ( S,template_list+strlen ("MODE_"),F); - } - else if ( strm ( template_list, "SSP")|| strm ( template_list, "GOR")) - { - - /*use GOR to Predict the secondary structure*/ - check_program_is_installed (GOR4_4_TCOFFEE,NULL, NULL,GOR4_ADDRESS, INSTALL_OR_DIE); - sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#ssp_template@seq#%s/%s@obs#%s/%s@cache#%s@type#_E_",get_mcoffee_4_tcoffee(), "New_KS.267.seq", get_mcoffee_4_tcoffee(), "New_KS.267.obs", get_cache_dir()); - S=seq2template_seq (S,buf, F); - return S; - } - else if ( strm ( template_list, "PSISSP") || strm (template_list, "PSIGOR")) - { - - /*Computes a GOR consensus on a psi-blast output*/ - check_program_is_installed (GOR4_4_TCOFFEE,NULL, NULL,GOR4_ADDRESS, INSTALL_OR_DIE); - check_blast_is_installed(server); - - sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#psissp_template@seq#%s/%s@obs#%s/%s@cache#%s@minid#%d@maxid#%d@mincov#%d@server#%s@type#_E_",get_mcoffee_4_tcoffee(), "New_KS.267.seq", get_mcoffee_4_tcoffee(), "New_KS.267.obs", get_cache_dir(), BmI,BMI,BmC,server); - S=seq2template_seq (S,buf, F); - return S; - } - else if ( strm ( template_list, "TM")) - { - - /*predict transmembrane structure*/ - check_program_is_installed (HMMTOP_4_TCOFFEE,NULL, NULL,HMMTOP_ADDRESS, INSTALL_OR_DIE); - sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#tm_template@arch#%s/%s@psv#%s/%s@type#_T_",get_mcoffee_4_tcoffee(), "hmmtop.arch", get_mcoffee_4_tcoffee(), "hmmtop.psv"); - S=seq2template_seq (S,buf, F); - return S; - } - else if ( strm ( template_list, "PSITM")) - { - - /*predict transmembrane structure*/ - check_program_is_installed (HMMTOP_4_TCOFFEE,NULL, NULL,HMMTOP_ADDRESS, INSTALL_OR_DIE); - check_blast_is_installed(server); - - sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#psitm_template@arch#%s/%s@psv#%s/%s@cache#%s@minid#%d@maxid#%d@mincov#%d@server#%s@type#_T_",get_mcoffee_4_tcoffee(), "hmmtop.arch", get_mcoffee_4_tcoffee(), "hmmtop.psv",get_cache_dir(), BmI,BMI,BmC,server); - S=seq2template_seq (S,buf, F); - return S; - } - - else if (strm ( template_list, "PSIBLAST")) - { - check_blast_is_installed(server); - sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#psiprofile_template@database#%s@method#psiblast@cache#%s@minid#%d@maxid#%d@mincov#%d@server#%s@type#_R_", prot_db,get_cache_dir(),BmI,BMI,BmC,server); - S=seq2template_seq (S,buf, F); - - return S; - } - else if (strm ( template_list, "BLAST") ) - { - check_blast_is_installed(server); - sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#profile_template@database#%s@method#blastp@cache#%s@minid#%d@maxid#%d@mincov#%d@server#%s@type#_R_", prot_db,get_cache_dir(),BmI,BMI,BmC,server); - S=seq2template_seq (S,buf, F); - - return S; - } - else if ( strm ( template_list, "EXPRESSO") || strm (template_list, "PDB")) - { - check_blast_is_installed(server); - - int isRNA = 0; - int i; - for (i= 0; i < S->len[0]; ++i) - { - isRNA = (isRNA || is_rna(S->seq[0][i])); - } - - if (isRNA) - { - sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#pdb_template@database#%s@method#blastn@cache#%s@minid#%d@maxid#%d@mincov#%d@server#%s@type#_P_",pdb_db, get_cache_dir(),PmI,PMI,PmC, server); - } - else - { - sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#pdb_template@database#%s@method#blastp@cache#%s@minid#%d@maxid#%d@mincov#%d@server#%s@type#_P_",pdb_db, get_cache_dir(),PmI,PMI,PmC, server); - } - return seq2template_seq (S,buf, F); - } - - else if ( strm (template_list, "RCOFFEE") || strm (template_list, "RNA")) - { - char *file_struc_clac = vtmpnam (NULL); - FILE* struc_calc_f =vfopen(file_struc_clac,"w"); - int i; - int j = 0; - for (i = 0; i< S->nseq; ++i) - { - if (S->T[i]->P) - { - ++j; - fprintf(struc_calc_f,"%s %s\n",S->name[i],S->T[i]->P->template_file); - } - } - - vfclose(struc_calc_f); - check_program_is_installed (RNAPLFOLD_4_TCOFFEE,NULL, NULL,RNAPLFOLD_ADDRESS, IS_FATAL); - sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#RNA_template@type#_F_"); - if (j > 0) - { - S = seq2template_seq (S,buf,F); - sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#calc_rna_template@pdbfile#%s@cache#%s@type#_F_", file_struc_clac,get_cache_dir()); - } -// printf("IN T_\n"); - return seq2template_seq (S,buf,F); - } - - /*2: Templates from seqnames (SELF) or named like the sequences (SEQFILE)*/ - else if ( strstr (template_list, "SELF_") ||strstr (template_list, "SEQFILE_") ) - { - int a; - char *p; - - //add template - for (a=0; a< S->nseq; a++) - { - - if ( (p=strstr (template_list,"SELF_")))p=S->name[a]; - else if ( strstr (template_list, "SEQFILE_"))p=template_list; - else - { - fprintf ( stderr, "\nUnkown mode for Template [FATAL:%s]\n", PROGRAM); - myexit (EXIT_FAILURE); - } - - if ( strstr (template_list, "_P_") && !(S->T[a])->P) - { - (S->T[a])->P =fill_P_template ( S->name[a], p,S);//PDB - - } - else if ( strstr (template_list, "_S_") && !(S->T[a])->S)(S->T[a])->S =fill_S_template ( S->name[a], p,S);//Sequence - else if ( strstr (template_list, "_R_" )&& !(S->T[a])->R)(S->T[a])->R =fill_R_template ( S->name[a], p,S);//pRofile - else if ( strstr (template_list, "_G_" )&& !(S->T[a])->G)(S->T[a])->G =fill_G_template ( S->name[a], p,S);//Genomic - else if ( strstr (template_list, "_F_" )&& !(S->T[a])->F)(S->T[a])->F =fill_F_template ( S->name[a], p,S);//Fold - else if ( strstr (template_list, "_T_" )&& !(S->T[a])->T)(S->T[a])->T =fill_T_template ( S->name[a], p,S);//Trans Membrane - else if ( strstr (template_list, "_E_" )&& !(S->T[a])->E)(S->T[a])->E =fill_E_template ( S->name[a], p,S);//Secondary Structure - else if ( strstr (template_list, "_U_" )&& !(S->T[a])->U)(S->T[a])->U =fill_U_template ( S->name[a], p,S);//unicode, list template - - } - return S; - } - - /*2: Templates comes in a template_file*/ - else if ( template_list==NULL || format_is_fasta (template_list)) - { - Sequence *T; - int a, i; - int ntemp=0; - T=(template_list!=NULL)?get_fasta_sequence (template_list, NULL):S; - for (a=0; a< T->nseq; a++) - { - - char *p; - if ((i=name_is_in_list(T->name[a], S->name, S->nseq, MAXNAMES))!=-1) - { - if ( (p=strstr (T->seq_comment[a], " _P_ ")) && !(S->T[i])->P &&( (S->T[i])->P=fill_P_template (S->name[i],p,S))) - { - ntemp++; - } - else if ( (p=strstr (T->seq_comment[a], " _F_ ")) && !(S->T[i])->F &&( (S->T[i])->F=fill_F_template (S->name[i],p,S)))ntemp++; - else if ( (p=strstr (T->seq_comment[a], " _S_ ")) && !(S->T[i])->S &&( (S->T[i])->S=fill_S_template (S->name[i],p,S)))ntemp++; - - else if ( (p=strstr (T->seq_comment[a], " _R_ ")) && !(S->T[i])->R &&( (S->T[i])->R=fill_R_template (S->name[i],p,S)))ntemp++; - else if ( (p=strstr (T->seq_comment[a], " _G_ ")) && !(S->T[i])->G &&( (S->T[i])->G=fill_G_template (S->name[i],p,S)))ntemp++; - else if ( (p=strstr (T->seq_comment[a], " _T_ ")) && !(S->T[i])->T &&( (S->T[i])->T=fill_T_template (S->name[i],p,S)))ntemp++; - else if ( (p=strstr (T->seq_comment[a], " _E_ ")) && !(S->T[i])->E &&( (S->T[i])->E=fill_E_template (S->name[i],p,S)))ntemp++; - else if ( (p=strstr (T->seq_comment[a], " _U_ ")) && !(S->T[i])->U &&( (S->T[i])->E=fill_U_template (S->name[i],p,S)))ntemp++; - - if (T!=S)strcat (S->seq_comment[i], T->seq_comment[a]); - } - } - - if (T!=S)free_sequence (T, -1); - - if ( remove_template_file==2 || ntemp==0) - { - vremove (template_list); - } - else - if (template_list)display_output_filename ( stdout, "Template_List","fasta_seq", template_list, STORE); - return S; - } - - /*3 Templates are generated with a script*/ - else if (strstr (template_list, "SCRIPT_") && get_string_variable ("multi_core") && strstr (get_string_variable ("multi_core"), "templates") && get_nproc()>1) - { - char *tmp1,*command; - Alignment *A; - char **temp_file,**seq_file; - int * pid_list, pid, npid, submited; - int nproc, max_nproc; - - char outfile[1000]; - static char *script; - static int ntemp; - char *p; - int z, i; - if (!script)script=vcalloc ( 1000, sizeof(char)); - - ntemp++; - - command=vcalloc ( 1000, sizeof (char)); - tmp1=vtmpnam (NULL); - - A=seq2aln (S,NULL, 0); - string_array_upper(A->seq_al, A->nseq); - output_fasta_seq (tmp1, A); - sprintf ( script, "%s", after_strstr (template_list, "SCRIPT_")); - - if ((p=strstr (template_list, "@type#"))) - p+=strlen ("@type#"); - if (F) - { - sprintf (outfile, "%s%s_%s%d.template_list", F->path,F->name,template_type2short_type_name(p),ntemp); - } - else - { - F=parse_fname (S->file[0]); - sprintf (outfile, "%s%s_%s%d.template_list",F->path, F->name,template_type2short_type_name(p),ntemp); - free_fname (F); - } - - nproc=get_nproc(); - max_nproc=2*nproc; - - script=substitute(script, "@", " -"); - script=substitute(script, "#", "="); - - temp_file=vcalloc ( A->nseq, sizeof (char*)); - seq_file =vcalloc (A->nseq, sizeof (char*)); - pid_list =vcalloc (MAX_N_PID, sizeof (int *)); - - fprintf ( stderr, "\n\t------ Fetch %Templates [Multi Core Mode %d CPUs]\n",get_nproc()); - for (npid=0, submited=0,i=0; inseq; i++) - { - FILE *fp2; - seq_file[i]=vtmpnam (NULL); - temp_file[i]=vtmpnam (NULL); - fp2=vfopen (seq_file[i], "w"); - fprintf ( fp2, ">%s\n%s\n", S->name[i], S->seq[i]); - vfclose (fp2); - - pid=vfork(); - if (pid==0) - { - initiate_vtmpnam (NULL); - if ( strstr (script, "tc_generic_method")) - { - //sprintf ( command, "%s -other_pg %s -infile=%s -outfile=%s -tmpdir=%s",get_string_variable ("t_coffee"),script,seq_file[i],temp_file[i],get_tmp_4_tcoffee()); - sprintf ( command, "%s -infile=%s -outfile=%s -tmpdir=%s",script,seq_file[i],temp_file[i],get_tmp_4_tcoffee()); - if (strstr (command, "EBI"))get_email (); - } - else - //sprintf ( command, "%s -other_pg %s -infile=%s -outfile=%s",get_string_variable("t_coffee"),script,seq_file[i],temp_file[i]); - sprintf ( command, "%s -infile=%s -outfile=%s",script,seq_file[i],temp_file[i]); - command=substitute(command, "@", " "); - my_system ( command); - exit (EXIT_SUCCESS); - } - else - { - pid_list[pid]=npid; - set_pid(pid); - npid++; - submited++; - submited=vwait_npid(submited,max_nproc,nproc); - } - } - - submited=vwait_npid(submited,0,0); - //Concatenate all the files - vremove (outfile); - for (i=0; iseq_al, A->nseq); - output_fasta_seq (tmp1, A); - sprintf ( script, "%s", after_strstr (template_list, "SCRIPT_")); - fprintf ( stderr, "\n"); - if ((p=strstr (template_list, "@type#"))) - p+=strlen ("@type#"); - if (F) - { - sprintf (outfile, "%s%s_%s%d.template_list", F->path,F->name,template_type2short_type_name(p),ntemp); - } - else - { - F=parse_fname (S->file[0]); - sprintf (outfile, "%s%s_%s%d.template_list",F->path, F->name,template_type2short_type_name(p),ntemp); - free_fname (F); - } - - script=substitute(script, "@", " -"); - script=substitute(script, "#", "="); - - if ( strstr (script, "tc_generic_method")) - { - sprintf ( command, "%s -other_pg %s -infile=%s -outfile=%s -tmpdir=%s",get_string_variable ("t_coffee"),script, tmp1,outfile,get_tmp_4_tcoffee()); - if (strstr (command, "EBI"))get_email (); - } - else sprintf ( command, "%s -other_pg %s -infile=%s -outfile=%s",get_string_variable("t_coffee"),script, tmp1, outfile); - - vremove (outfile); - command=substitute(command, "@", " "); - - my_system ( command); - - free_aln (A); - - if ( check_file_exists (outfile) && format_is_fasta(outfile)) - { - S=seq2template_seq (S, outfile, F); - } - else if (strstr (command, "webblast.pl"))return S; - else - { - - add_warning (stderr, "\nWARNING: Could not Run %s to find templates[%s]\n",command, PROGRAM); - return NULL; - } - - vfree (command); - return S; - } - - return S; -} - -char* seq2template_file (Sequence *S, char *file) -{ - Alignment *A; - int i; - - if (file==NULL)file=vtmpnam (NULL); - - seq2template_file2 (S, file, "w"); - for (i=0; inseq; i++) - if ( (A=seq2R_template_profile (S, i))) - { - seq2template_file2 (A->S, file, "a"); - } - return file; -} - -int seq2template_file2 (Sequence *S, char *file, char *mode) -{ - FILE *fp; - int i; - char buf1[10000]; - char buf2[10000]; - struct X_template *X; - - fp=vfopen ( file, mode); - for ( i=0; i< S-> nseq; i++) - { - buf1[0]=0; - if ( (X=(S->T[i])->P)){sprintf (buf2, " %s %s ", X->template_type, X->template_file);strcat (buf1, buf2);} - /*if ( (X=(S->T[i])->S)){sprintf (buf2, " %s %s ", X->template_type, X->template_file);strcat (buf1, buf2);}*/ - if ( (X=(S->T[i])->R)){sprintf (buf2, " %s %s ", X->template_type, X->template_file);strcat (buf1, buf2);} - if ( (X=(S->T[i])->G)){sprintf (buf2, " %s %s ", X->template_type, X->template_file);strcat (buf1, buf2);} - if (buf1[0])fprintf ( fp, ">%s %s\n", S->name[i], buf1); - } - vfclose (fp); - return EXIT_SUCCESS; -} - - - - -int seq2n_X_template ( Sequence *S, char *type) -{ - int a, n; - - for (n=0,a=0; a< S->nseq; a++) - { - if ( strm2 (type, "_P_","_*_") && (S->T[a])->P)n++; - if ( strm2 (type, "_F_","_*_") && (S->T[a])->F)n++; - if ( strm2 (type, "_S_","_*_") && (S->T[a])->S)n++; - if ( strm2 (type, "_R_","_*_") && (S->T[a])->R)n++; - if ( strm2 (type, "_G_","_*_") && (S->T[a])->G)n++; - } - return n; -} -struct X_template *fill_X_template ( char *name, char *p, char *token) -{ - struct X_template *X; - - - - - char *k; - - X=vcalloc (1, sizeof (X_template)); - sprintf ( X->seq_name, "%s", name); - if ( (k=strstr (p, token)))sscanf (k+strlen(token), "%s",X->template_name); - else sprintf (X->template_name, "%s", p); - - - /*Add a Structure HERE*/ - sprintf ( X->template_type, "%s", token); - if ( strm (token, "_P_"))X->VP=vcalloc (1, sizeof (P_template)); - if ( strm (token, "_F_"))X->VF=vcalloc (1, sizeof (F_template)); - - if ( strm (token, "_S_"))X->VS=vcalloc (1, sizeof (S_template)); - if ( strm (token, "_R_"))X->VR=vcalloc (1, sizeof (R_template)); - if ( strm (token, "_G_"))X->VG=vcalloc (1, sizeof (G_template)); - if ( strm (token, "_T_"))X->VT=vcalloc (1, sizeof (T_template)); - if ( strm (token, "_E_"))X->VE=vcalloc (1, sizeof (E_template)); - if ( strm (token, "_U_"))X->VU=vcalloc (1, sizeof (U_template)); - - return X; -} - -struct X_template* free_X_template ( struct X_template *X) -{ - if (X->VP) - { - vfree (X->VP); - } - if (X->VF) - { - vfree (X->VF); - } - if ( X->VS) - { - free_sequence ((X->VS)->S, -1); - vfree (X->VS); - } - if ( X->VR) - { - free_aln ((X->VR)->A); - vfree (X->VR); - } - if ( X->VG) - { - free_sequence ((X->VG)->S, -1); - vfree (X->VG); - } - - vfree (X); - return NULL; -} - -FILE * display_sequence_templates (Sequence *S,int i, FILE *io) -{ - - - io=display_X_template ( (S->T[i])->P, io); - - io=display_X_template ( (S->T[i])->F, io); - - io=display_X_template ( (S->T[i])->S, io); - - io=display_X_template ( (S->T[i])->R, io); - io=display_X_template ( (S->T[i])->G, io); - io=display_X_template ( (S->T[i])->T, io); - io=display_X_template ( (S->T[i])->E, io); - - return io; -} - -FILE * display_X_template (struct X_template *X, FILE *io) -{ - - if ( !X) return io; - if ( !strm (X->template_type, "_S_"))fprintf (io, "\n\t%s: Template=%s, File=%s",template_type2type_name (X->template_type), X->template_name,X->template_file); - return io; -} -char *template_type2short_type_name (char *type) -{ - //add_template - if (!type)return ""; - else if ( strstr (type, "_P_")) return "pdb"; - else if ( strstr (type, "_F_")) return "rfold"; - else if ( strstr (type, "_S_")) return "seq"; - else if ( strstr (type, "_R_")) return "prf"; - else if ( strstr (type, "_G_")) return "genome"; - else if ( strstr (type, "_E_")) return "ssp"; - else if ( strstr (type, "_T_")) return "tmp"; - else if ( strstr (type, "_U_")) return "unicode"; - else return type; -} -char *template_type2type_name (char *type) -{ - //add_template - if ( strstr (type, "_P_")) return "PDB struc"; - else if ( strstr (type, "_F_")) return "RNA Fold"; - else if ( strstr (type, "_S_")) return "Sequeence"; - else if ( strstr (type, "_R_")) return "Profile"; - else if ( strstr (type, "_G_")) return "Genomic"; - else if ( strstr (type, "_E_")) return "Protein Secondary Structure"; - else if ( strstr (type, "_T_")) return "Protein Trans Membrane Structure "; - else if ( strstr (type, "_U_")) return "Unicode and strings"; - - else return type; -} -struct X_template *fill_F_template ( char *name,char *p, Sequence *S) -{ - /*Profile template*/ - struct X_template *F; - - F=fill_X_template ( name, p, "_F_"); - sprintf (F->template_format , "TCOFFEE_LIBRARY"); - if (!F || !check_file_exists (F->template_name)) - { - fprintf ( stderr, "\nWARNING: Could Not Fill _F_ (Fold) template for sequence |%s|", name); - free_X_template (F); - return NULL; - } - else if ( check_file_exists (F->template_name)) - { - sprintf ( F->template_file, "%s", F->template_name); - } - - return F; - -} - - -struct X_template *fill_P_template ( char *name,char *p, Sequence *S) -{ - struct X_template *P; - Sequence *PS; - Alignment *A; - int sim, cov, i; - char *buf; - - - P=fill_X_template ( name, p, "_P_"); - sprintf (P->template_format , "pdb"); - - if (!P) - { - //fprintf ( stderr, "\nWARNING: Could Not Fill _P_ template for sequence |%s|", name); - free_X_template (P); - return NULL; - } - else if ( check_file_exists (P->template_name)) - { - - sprintf ( P->template_file, "%s", P->template_name); - buf=path2filename (P->template_name); - if (P->template_name!=buf) - { - sprintf ( P->template_name, "%s",buf ); - vfree (buf); - } - } - else - { - char *st; - - - st=is_pdb_struc (P->template_name); - if (st) - { - if (st!=P->template_file)sprintf ( P->template_file, "%s", st); - } - } - - /*Make a first run to fix relaxed PDB files*/ - buf=fix_pdb_file (P->template_file); - - if ( buf!=P->template_file) - { - - sprintf ( P->template_file, "%s",buf); - vfree (buf); - } - - /*Check the PDB FILE EXISTS*/ - if (!is_pdb_file (P->template_file)) - { - - add_warning(stderr, "\nWARNING: _P_ Template |%s| Could Not Be Found\n",p); - free_X_template (P); - return NULL; - } - else - { - buf= get_pdb_id (P->template_file); - if (buf!=(P->VP)->pdb_id) - { - sprintf ((P->VP)->pdb_id, "%s", buf); - vfree (buf); - } - } - - /*Check the target sequence is similar enough*/ - - PS=get_pdb_sequence (P->template_file); - - - if ( PS==NULL) - { - add_warning( stderr, "\nWARNING: _P_ Template |%s| Could Not be Used for Sequence |%s|: Structure Not Found", P->template_name, name); - free_X_template (P);P=NULL; - } - else - { - int minsim=get_int_variable ("pdb_min_sim"); - int mincov=get_int_variable ("pdb_min_cov"); - - - i=name_is_in_list (name, S->name, S->nseq, 100); - - A=align_two_sequences (S->seq[i], PS->seq[0],"idmat",-3,0, "fasta_pair_wise"); - cov=aln2coverage (A, 0); - sim=aln2sim (A, "idmat"); - - if (simtemplate_name,name, sim, minsim); - free_X_template (P); - P=NULL; - } - else if ( covtemplate_name,name, cov, mincov); - free_X_template (P);P=NULL; - } - free_aln(A); - free_sequence (PS, -1); - } - - return P; -} - -struct X_template *fill_S_template ( char *name,char *p, Sequence *Seq) -{ - struct X_template *S; - S=fill_X_template ( name, p, "_S_"); - if ( strm (name, p))sprintf ( S->template_file, "%s",output_fasta_seqX (NULL,"w",Seq,NULL, seq_name2index (name, Seq))); - (S->VS)->S=get_fasta_sequence (S->template_file, NULL); - return S; -} -struct X_template *fill_R_template ( char *name,char *p, Sequence *S) -{ - /*Profile template*/ - struct X_template *R; - - - R=fill_X_template ( name, p, "_R_"); - sprintf (R->template_format , "fasta_aln"); - - - if (!is_aln(R->template_name) && !is_seq (R->template_name)) - { - - add_warning ( stderr, "\nWARNING: _R_ Template %s Could Not Be Found\n",R->template_name); - free_X_template (R); - return NULL; - } - else - { - int s; - Sequence *S1; - Alignment *A1; - - (R->VR)->A=main_read_aln (R->template_name, NULL); - - if ( !S) - sprintf ( R->template_file, "%s", R->template_name); - else - { - s=name_is_in_list(name, S->name, S->nseq, 100); - if ( s!=-1) - { - S1=fill_sequence_struc (1, &S->seq[s], &S->name[s]); - A1=seq2aln (S1,NULL, RM_GAP); - - (R->VR)->A=trim_aln_with_seq (A1, (R->VR)->A); - - sprintf ( R->template_file, "%s", vtmpnam (NULL)); - output_clustal_aln (R->template_file, (R->VR)->A); - } - else - sprintf ( R->template_file, "%s", R->template_name); - } - (R->VR)->A=aln2profile ((R->VR)->A); - } - return R; -} - -struct X_template *fill_T_template ( char *name,char *p, Sequence *S) -{ - /*Profile template*/ - struct X_template *T; - - T=fill_X_template ( name, p, "_T_"); - sprintf (T->template_format , "fasta_seq"); - - if (!is_aln(T->template_name) && !is_seq (T->template_name)) - { - - add_warning ( stderr, "\nWARNING: _T_ Template %s Could Not Be Found\n",T->template_name); - free_X_template (T); - return NULL; - } - else - { - - (T->VT)->S=main_read_seq(T->template_name); - sprintf ( T->template_file, "%s", T->template_name); - } - return T; -} -//add template -struct X_template *fill_U_template ( char *name,char *p, Sequence *S) -{ - /*Profile template*/ - struct X_template *U; - - U=fill_X_template ( name, p, "_U_"); - sprintf (U->template_format , "string list"); - - if (!check_file_exists(U->template_name)) - { - add_warning ( stderr, "\nWARNING: _U_ Template %s Could Not Be Found\n",U->template_name); - free_X_template (U); - return NULL; - } - else - { - //(U->VU)->list=file2string(U->template_name); - sprintf ( U->template_file, "%s", U->template_name); - } - return U; -} -struct X_template *fill_E_template ( char *name,char *p, Sequence *S) -{ - /*Profile template*/ - struct X_template *E; - - - E=fill_X_template ( name, p, "_E_"); - sprintf (E->template_format , "fasta_seq"); - - if (!is_aln(E->template_name) && !is_seq (E->template_name)) - { - - add_warning ( stderr, "\nWARNING: _E_ Template %s Could Not Be Found\n",E->template_name); - free_X_template (E); - return NULL; - } - else - { - (E->VE)->S=main_read_seq (E->template_name); - sprintf ( E->template_file, "%s", E->template_name); - } - return E; -} -struct X_template *fill_G_template ( char *name,char *p, Sequence *S) -{ - struct X_template *G; - G=fill_X_template ( name, p, "_G_"); - sprintf (G->template_format , "fasta_seq"); - - /*1: Get the sequence from another file if needed*/ - if ( strm (name, p))sprintf ( G->template_file, "%s",output_fasta_seqX (NULL,"w",S,NULL, seq_name2index (name, S))); - else if ( strstr (p, "SEQFILE_")) - { - Sequence *ST; - int i2; - - - ST=main_read_seq (after_strstr ( p,"SEQFILE_G_")); - - i2=seq_name2index (name, ST); - if ( i2!=-1) - { - sprintf ( G->template_file, "%s",output_fasta_seqX (NULL,"w",ST,NULL, i2)); - sprintf ( G->template_name, "%s", name); - } - free_sequence (ST, -1); - } - else sprintf (G->template_file, "%s", G->template_name); - - - /*2: Put the template in VG->S*/ - if (!is_seq (G->template_file)) - { - add_warning ( stderr, "\nWARNING: _G_ Template %s Could Not Be Found \n",p); - - free_X_template (G); - return NULL; - } - else - { - (G->VG)->S=get_fasta_sequence (G->template_file, NULL); - } - return G; -} - - -char *seq2T_value ( Sequence *S, int n, char *value, char *type) -{ - static char *rv_buf; - X_template *X; - - if ( !rv_buf)rv_buf=vcalloc (100, sizeof(char)); - if (!(X=seq_has_template (S, n, type)))return NULL; - else - { - if (strm (value, "template_file"))return X->template_file; - else if ( strm (value, "template_name"))return X->template_name; - else if ( strm (value, "seq_name"))return X->seq_name; - else if (strm (type, "_P_")) - { - if ( strm (value, "pdb_id"))return (X->VP)->pdb_id; - } - else if ( strm (type, "_R_")) - { - if ( strm (value, "A")) - { - if ((X->VR)->A){sprintf ( rv_buf, "%d", (int)(X->VR)->A);return rv_buf;} - else return NULL; - } - } - - } - return NULL; -} -char *seq2P_pdb_id (Sequence *S, int n) -{ - if (!S->T || !S->T[n] || !(S->T[n])->P ) return NULL; - else return ((S->T[n])->P)->template_name; -} - - -char *seq2P_template_file(Sequence *S, int n) -{ - - return seq2T_value (S, n, "template_file", "_P_"); -} - -char *profile2P_template_file (Sequence *S, int n) -{ - Alignment *A; - int a; - char *p; - - if ( !(A=seq2R_template_profile (S, n)))return NULL; - for (a=0; anseq; a++) - { - if ((p=seq2P_template_file (A->S, a))!=NULL)return p; - } - return NULL; -} -Alignment * seq2R_template_profile (Sequence *S, int n) -{ - - - return (Alignment *)atop(seq2T_value (S, n, "A", "_R_")); -} -char * seq2E_template_string (Sequence *S, int n) -{ - struct X_template *T; - - if ( (T=seq_has_template (S, n, "_E_"))!=NULL) - return ((T->VE)->S)->seq[0]; - else - return NULL; -} -//add template -int* seq2U_template (Sequence *S, int n) -{ - struct X_template *T; - - if ( (T=seq_has_template (S, n, "_U_"))!=NULL) - return (T->VU)->list; - else - return NULL; -} -char * seq2T_template_string (Sequence *S, int n) -{ - struct X_template *T; - - if ( (T=seq_has_template (S, n, "_T_"))!=NULL) - return ((T->VT)->S)->seq[0]; - else - return NULL; -} - -struct X_template* seq_has_template ( Sequence *S, int n, char *mode) -{ - Template *T; - - if ( !S || !mode) return NULL; - else if ( n<0 || n>=S->nseq)return NULL; - else if ( !(S->T)) return NULL; - else if ( !(S->T[n]))return NULL; - - T=S->T[n]; - //ADD STRUCTURE - //add template - if ( strm (mode, "_P_"))return T->P; - else if ( strm (mode, "_F_"))return T->F; - else if ( strm (mode, "_S_"))return T->S; - else if ( strm (mode, "_R_"))return T->R; - else if ( strm (mode, "_T_"))return T->T; - else if ( strm (mode, "_E_"))return T->E; - else if ( strm (mode, "_U_"))return T->U; - else if ( strm (mode, "_G_"))return T->G; - else return NULL; -} - -char ** name2random_subset (char **in_name, int n_in, int n_out) -{ - char **out_name; - - int **list; - int a,max; - - - vsrand (0); - max=n_in*10000; - out_name=declare_char (n_out,MAXNAMES+1 ); - list=declare_int (n_in, 2); - - for (a=0; aname, A->nseq, A->nseq); - A=reorder_aln (A, name_list, A->nseq); - free_char (name_list, -1); - return A; -} -Alignment *aln2jacknife (Alignment *A, int nseq, int len) -{ - int a, b; - - if (nseq!=0 && nseqnseq) - { - char **name; - - name=name2random_subset (A->name, A->nseq, nseq); - A=reorder_aln (A, name, nseq); - free_char (name, -1); - } - - if (len!=0 && lenlen_aln) - { - int **l; - Alignment *B; - - l=declare_int (A->len_aln, 2); - for (a=0; a< A->len_aln; a++) - { - l[a][0]=a; - l[a][1]=rand()%(A->len_aln*1000); - } - sort_int ( l,2, 1, 0, A->len_aln-1); - B=copy_aln (A, NULL); - for ( a=0; a< len; a++) - { - for ( b=0; bnseq; b++) - { - A->seq_al[b][a]=B->seq_al[b][l[a][0]]; - } - } - for (b=0; bnseq; b++)A->seq_al[b][len]='\0'; - free_aln (B); - free_int (l, -1); - } - return A; -} -Alignment * aln2scramble_seq (Alignment *A) -{ - int **list; - char **name_list; - int a,max; - - max=100*A->nseq; - vsrand (0); - - list=declare_int (A->nseq, 2); - name_list=vcalloc (A->nseq, sizeof (char*)); - - - for (a=0; anseq; a++) - { - list[a][0]=a; - list[a][1]=rand ()%max; - } - sort_int ( list,2, 1, 0, A->nseq-1); - - for ( a=0; a< A->nseq; a++) - name_list[a]=A->seq_al[a]; - for (a=0; anseq; a++) - { - A->seq_al[a]=name_list[list[a][0]]; - } - vfree (name_list); - free_int (list, -1); - return aln2random_order (A); -} - - - -Alignment * reorder_aln ( Alignment *A, char **name, int nseq) - { - int a,sn; - Alignment *BUF; - int n=0; - int *tpp_int; - - if ( name==NULL)return aln2random_order(A); - - - BUF=copy_aln ( A,NULL); - for ( a=0; aname, A->nseq,STRING); - if ( sn==-1) - { - ; - } - else - { - - - SWAPP(A->order[n], BUF->order[sn], tpp_int); - sprintf ( A->name[n], "%s", BUF->name[sn]); - sprintf ( A->seq_al[n], "%s",BUF->seq_al[sn]); - sprintf ( A->seq_comment[n], "%s", BUF->seq_comment[sn]); - - n++; - - } - } - - for ( a=n; a< A->nseq; a++)A->name[a][0]=A->seq_al[a][0]='\0'; - A->nseq=n; - - if ( A->A)A->A=reorder_aln(A->A, name, nseq); - free_aln (BUF); - return A; - } -Sequence * reorder_seq_2 ( Sequence *A, int **order,int field, int nseq) - { - char **name; - int a; - - if (!A || !order) return A; - name=declare_char (A->nseq, 100); - for (a=0; aname[order[a][field]]); - A=reorder_seq (A, name,nseq); - free_char (name, -1); - return A; - } -Sequence * reorder_seq ( Sequence *A, char **name, int nseq) - { - int a,sn; - Sequence *nA; - - - nA=duplicate_sequence (A); - - - for ( a=0; a< nseq; a++) - { - sn=name_is_in_list (name[a] ,nA->name, nA->nseq, 100); - if (sn==-1)continue; - - if ( nA->file) sprintf ( A->file[a], "%s", nA->file[sn]); - - if ( nA->seq_comment)sprintf ( A->seq_comment[a], "%s", nA->seq_comment[sn]); - if ( nA->aln_comment)sprintf ( A->aln_comment[a], "%s", nA->aln_comment[sn]); - sprintf ( A->seq[a], "%s", nA->seq[sn]); - A->len[a]=nA->len[sn]; - sprintf ( A->name[a], "%s", nA->name[sn]); - A->T[a][0]=nA->T[sn][0]; - } - A->nseq=nseq; - free_sequence (nA, nA->nseq); - - return A; -} - -char * concatenate_seq ( Sequence *S, char *conc, int *order) - { - int a; - - vfree (conc); - conc=vcalloc ( S->nseq*S->max_len, sizeof (char)); - - for ( a=0; a< S->nseq; a++) - { - conc=strcat ( conc, S->seq[order[a]]); - } - return conc; - - } - - - - -Alignment * rotate_aln ( Alignment *A, char *name) -{ - Alignment *B; - int a, b; - - B=declare_aln2 (A->len_aln, A->nseq+1); - for ( a=0; a< A->nseq; a++) - for ( b=0; b< A->len_aln; b++) - { - B->seq_al[b][a]=A->seq_al[a][b]; - } - for (a=0; a< A->len_aln; a++) - if (name && name[0])sprintf ( B->name[a], "%s_%s%d", name, (a<9)?"0":"",a+1); - else - sprintf ( B->name[a], "%d", a+1); - - - for (a=0; a< A->len_aln; a++)B->seq_al[a][A->nseq]='\0'; - B->len_aln=A->nseq; - B->nseq=A->len_aln; - /*free_aln (A);*/ - return B; -} - -Alignment * invert_aln ( Alignment *A) -{ - char *buf; - int l, a, b, c; - - for ( a=0; a< A->nseq; a++) - { - l=strlen ( A->seq_al[a]); - buf=vcalloc ( l+1,sizeof (char) ); - - for ( c=l-1,b=0; b< l; b++, c--) - { - buf[c]=A->seq_al[a][b]; - } - buf[l]='\0'; - sprintf ( A->seq_al[a], "%s", buf); - } - vfree(buf); - return A; -} -char * complement_string (char *s) -{ - char *buf; - int l, a, b, c; - - l=strlen (s); - for ( b=0; b< l; b++) - { - char r; - r=s[b]; - if ( r=='a')r='t'; - else if (r=='A')r='T'; - else if (r=='t')r='a'; - else if (r=='T')r='A'; - else if (r=='g')r='c'; - else if (r=='G')r='C'; - else if (r=='c')r='g'; - else if (r=='C')r='G'; - s[b]=r; - } - - return invert_string (s); -} -Alignment * complement_aln ( Alignment *A) -{ - char *buf; - int l, a, b, c; - - for ( a=0; a< A->nseq; a++) - { - A->seq_al[a]=complement_string (A->seq_al[a]); - } - - return A; -} - -Alignment * extract_nol_local_aln(Alignment *A, int start, int max_end) - { - A=extract_aln ( A, start, max_end); - A=trunkate_local_aln (A); - return A; - } - -Alignment * alnpos_list2block (Alignment *A, int n, char **in_list) -{ - int *pos; - int a; - char **list; - int list_declared=0; - Alignment *B; - - if (check_file_exists (in_list[0])) - { - int mn; - char ***tmp_list; - - mn=count_n_line_in_file (in_list[0]); - list=declare_char (mn, 100); - list_declared=1; - tmp_list=file2list (in_list[0], " "); - a=0; - n=0; - while (tmp_list[a]) - { - if (tmp_list[a][1][0]!='!') - { - sprintf (list[n++], "%s", tmp_list[a][1]); - } - a++; - } - free_arrayN ((void **)tmp_list, 3); - } - else - { - list=in_list; - } - - - pos=vcalloc (A->len_aln, sizeof (int)); - for (a=0; a=end || end>A->len_aln+1) - { - add_warning ( stderr, "\nWARNING: Illegal coordinates in extract_pos_list [%s]", list[a]); - return A; - } - start--; end--; - for (a=start; aA->len_aln) - { - add_warning ( stderr, "\nWARNING: Illegal coordinates in extract_pos_list [%s]", list[a]); - } - p--; - pos[p]=1; - } - } - B=alnpos2block(A, pos, NULL); - vfree (pos); - if ( list_declared)free_char (list, -1); - - return B; -} -Alignment * aln2block (Alignment *A, int start, int end, Alignment *B) -{ - if ( !A || start<=0 || start>=end || end>A->len_aln+1) - { - add_warning ( stderr, "\nWARNING: Illegal coordinates in extract_block start=%d end=%d len=%d [Note : [start-end[, with [1...n]", start, end, A->len_aln); - return A; - } - else - { - int *pos, p; - start--; - end--; - pos=vcalloc (A->len_aln, sizeof (int)); - for (p=start;plen_aln=0; - for (a=0; a<=A->len_aln; a++) - { - if ( pos[a]!=0 || a==A->len_aln) - { - for ( b=0; bnseq; b++) - B->seq_al[b][B->len_aln]=A->seq_al[b][a]; - if ( a!=A->len_aln)B->len_aln++; - } - } - - return B; -} -Alignment * extract_aln ( Alignment *A, int start, int end) -{ - return extract_aln2 ( A, start, end, "cons"); -} - -Alignment * extract_aln2 ( Alignment *A, int in_start, int in_end, char *seq) - { - char *tmp; - FILE *fp; - - - tmp=vtmpnam (NULL); - fp=vfopen (tmp, "w"); - fprintf ( fp, "%s %d %d\n", seq, in_start, in_end); - vfclose (fp); - return extract_aln3 (A,tmp); - } -Alignment * extract_aln3 ( Alignment *B, char *file) - { - int a, b, c; - int start, end; - int n, i, s, nline=0; - FILE *fp; - Alignment *A=NULL; - int *col; - char name[MAXNAMES]; - char line[VERY_LONG_STRING]; - int *offset; - - /*Reads in a file - #comment - ! seq_name offset - seqname pos - OR - seqname start end[ - modifies the incoming alignment - */ - - offset=vcalloc ( B->nseq+1, sizeof (int)); - fp=vfopen (file,"r"); - while ( (c=fgetc(fp))!=EOF) - { - s=-1; - fgets ( line, VERY_LONG_STRING,fp); - if ( c=='!') - { - sscanf (line, "%s %d", name, &start); - s=name_is_in_list (name,B->name,B->nseq,MAXNAMES); - } - if (s!=-1) - offset[s]=start; - } - - vfclose (fp); - - A=copy_aln (B, A); - col=vcalloc ( A->len_aln, sizeof (int)); - - fp=vfopen ( file, "r"); - while ( (c=fgetc(fp))!=EOF) - { - nline++; - if ( c=='#' || c=='!')fgets ( line, VERY_LONG_STRING,fp); - else - { - ungetc(c, fp); - fgets ( line, VERY_LONG_STRING,fp); - - if (sscanf (line, "%s %d %d", name, &start, &end)==3); - else if (sscanf (line, "%s %d", name, &start)==2) - { - end=start+1; - } - else - { - add_warning ( stderr, "\nWARNING: wrong format in coordinate file (line=%d)\n", nline); - continue; - } - if ( end==0)end=A->len_aln+1; - - s=name_is_in_list (name,A->name,A->nseq,MAXNAMES); - - - if ( s==-1 && !strm (name, "cons")) - { - add_warning ( stderr, "\nWARNING: Seq %s does not belong to the alignment (line %d)\n", name,nline); - continue; - } - else if ( start>end) - { - add_warning ( stderr, "\nWARNING: Illegal coordinates [%s %d %d] (line %d)\n", name,start, end,nline); - continue; - } - else - { - int done=0; - if ( s!=-1) - { - start-=offset[s]-1; - end-=offset[s]-1; - } - for (n=0, a=0; done!=1 && a< A->len_aln; a++) - { - i=(strm (name, "cons"))?1:!is_gap(A->seq_al[s][a]); - - n+=i; - if (n>=start && n=end)done=1; - //if (n>=start && n=end)a=A->len_aln; - } - if ( done==0) - { - HERE ("Warning Missing positions in File %s",file ); - } - } - } - } - vfclose ( fp); - - - - /*Extract [start-end[*/ - for ( b=0,a=0; a< A->len_aln; a++) - { - if ( col[a]) - { - for (c=0; c< A->nseq; c++)A->seq_al[c][b]=A->seq_al[c][a]; - b++; - } - } - A->len_aln=b; - - for (c=0; c< A->nseq; c++)A->seq_al[c][A->len_aln]='\0'; - vfree (col); - - return A; - - } -Alignment * trunkate_local_aln ( Alignment *A) - { - int a, b; - int **pos; - int **cache; - int seq; - - - cache=declare_int (return_max_int (A->order,read_size_int ( A->order,sizeof (int*)),0)+1,return_max_int (A->order,read_size_int ( A->order,sizeof (int*)),1)+A->len_aln+1); - pos=aln2pos_simple(A,A->nseq); - - for ( b=0; blen_aln; b++) - for ( a=0; a< A->nseq; a++) - { - seq=A->order[a][0]; - if ( pos[a][b]<=0); - else if ( pos[a][b]>0) - { - - if (cache[seq][pos[a][b]]==0)cache[seq][pos[a][b]]++; - else if ( cache[seq][pos[a][b]]>=1) - { - cache[seq][pos[a][b]]++; - A->seq_al[a][b]='\0'; - } - } - } - - A->len_aln=get_shortest_string ( A->seq_al, A->nseq, NULL, NULL); - pad_string_array ( A->seq_al, A->nseq, A->len_aln, '-'); - - free_int (pos, -1); - free_int ( cache,-1); - - - return A; - } - -int get_nol_aln_border ( Alignment *A, int start, int direction) - { - int a, b; - int **pos; - int **cache; - int seq,end; - - /*This Function Returns the limit position for a non overlaping alignment*/ - - cache=declare_int (return_max_int (A->order,read_size_int ( A->order,sizeof (int*)),0)+1,return_max_int (A->order,read_size_int ( A->order,sizeof (int)),1)+A->len_aln+1); - pos=aln2pos_simple(A,A->nseq); - end=(direction==GO_RIGHT)?A->len_aln:-1; - - - for ( b=start; b!=end;b+=direction) - for ( a=0; a< A->nseq; a++) - { - seq=A->order[a][0]; - if ( pos[a][b]<=0); - else if ( pos[a][b]>0) - { - - if (cache[seq][pos[a][b]]==0)cache[seq][pos[a][b]]++; - else if ( cache[seq][pos[a][b]]>=1) - { - cache[seq][pos[a][b]]++; - free_int(cache, -1); - return b-direction; - } - } - } - - free_int ( cache,-1); - free_int (pos, -1); - return end-direction; - } - - - - - -char * extract_defined_seq ( char *in, int in_of, int in_start, int *aa_def, int dir, int *out_start, char *out) - { - int start=0, end,l; - int b, c, d; - - - - if ( dir==GO_LEFT){start=in_start-1;} - else if ( dir==GO_RIGHT){start=in_start+1;} - - end=start; - while (aa_def[end]!=UNDEFINED) - { - end+=dir; - } - end-=dir; - - if (end=start && c<=end) - { - if ( out_start[0]==-1)out_start[0]=c-!is_gap(in[b]); - out[d++]=in[b]; - } - } - out[d]='\0'; - - - return out; - } -Alignment * concatenate_aln ( Alignment *A1, Alignment *A2, char *spacer) -{ - Alignment *A; - int a, i; - - A=declare_aln2( A1->nseq+A2->nseq , A1->len_aln+A2->len_aln+1); - for ( a=0; a< A1->nseq; a++) - { - if ((i=name_is_in_list ( A1->name[a], A2->name, A2->nseq, 100))!=-1) - { - sprintf ( A->name[A->nseq], "%s", A1->name[a]); - sprintf (A->seq_al[A->nseq], "%s%s%s", A1->seq_al[a],(spacer)?spacer:"", A2->seq_al[i]); - A->nseq++; - } - else - { - char *buf; - buf=generate_string (A2->len_aln, '-'); - sprintf ( A->name[A->nseq], "%s", A1->name[a]); - sprintf (A->seq_al[A->nseq], "%s%s", A1->seq_al[a], buf); - A->nseq++; - vfree (buf); - } - } - for ( a=0; a< A2->nseq; a++) - { - if ((i=name_is_in_list ( A2->name[a], A1->name, A1->nseq, 100))==-1) - { - char *buf; - buf=generate_string (A1->len_aln, '-'); - sprintf ( A->name[A->nseq], "%s", A2->name[a]); - sprintf (A->seq_al[A->nseq], "%s%s", buf, A2->seq_al[a]); - A->nseq++; - vfree (buf); - } - } - A->len_aln=A1->len_aln+A2->len_aln; - return A; -} -Alignment * aln_cat ( Alignment *A, Alignment *B) - { - int a; - - if ( A->nseq!=B->nseq) - { - fprintf ( stderr, "\nERROR IN ALN CAT: DIFFERENT NSEQ\n"); - myexit(EXIT_FAILURE); - } - - A=realloc_alignment2(A, A->nseq,A->len_aln+B->len_aln+1); - - for ( a=0;a< A->nseq; a++) - { - strcat ( A->seq_al[a], B->seq_al[a]); - } - A->len_aln+=B->len_aln; - return A; - } -int verify_aln ( Alignment *A, Sequence *S, char *message) - { - int a, b, c,s,r; - - - for ( a=0;a< A->nseq; a++) - { - s=A->order[a][0]; - r=A->order[a][1]; - for ( b=0, c=0; b< A->len_aln; b++) - { - if ( !is_gap(A->seq_al[a][b])) - { - if (tolower(A->seq_al[a][b])!=tolower(S->seq[s][c+r])) - { - fprintf ( stderr, "\n%s\nResidue [%c %d, %c %d] line %d seq %d",message,A->seq_al[a][b], b,S->seq[s][c+r], c+r,a,s); - output_Alignment_with_res_number(A, stderr); - myexit(EXIT_FAILURE); - return 0; - } - c++; - } - } - } - return 1; - } - -Alignment *adjust_est_aln ( Alignment *PW, Alignment *M, int s) -{ - /*This function reajusts M, threading M onto PW - two seqences in PW - s+1 seq in M - - seq 0 PW ----> 0->s-1 in M - seq 1 PW ----> 1->s in M; - - */ - int a, b; - static char **array; - - - int top_M=0; - int bottom_M=0; - - - if ( array==NULL) - { - array=declare_char (500, 100000); - } - - for ( a=0; a< PW->len_aln; a++) - { - if ( is_gap(PW->seq_al[0][a])) - { - for ( b=0; b< s; b++) - array[b][a]='-'; - } - else - { - for ( b=0; b< s; b++) - array[b][a]=M->seq_al[b][top_M]; - top_M++; - } - - if ( is_gap(PW->seq_al[1][a])) - { - array[s][a]='-'; - } - else - { - - array[s][a]=M->seq_al[s][bottom_M]; - bottom_M++; - } - } - - M->len_aln=PW->len_aln; - for (a=0; alen_aln; b++) - M->seq_al[a][b]=array[a][b]; - M->seq_al[a][b]='\0'; - } - - - M->nseq=s+1; - - return M; -} - - -Alignment * rename_seq_in_aln (Alignment *A, char ***list) -{ - int n, i; - if ( !A)return A; - - - - n=0; - while ( list[n][0][0]) - { - if ( (i=name_is_in_list (list[n][0], A->name, A->nseq, 100))!=-1) - { - sprintf ( A->name[i], "%s", list[n][1]); - } - n++; - } - - A->S=rename_seq_in_seq (A->S, list); - return A; -} -Sequence * rename_seq_in_seq (Sequence *A, char ***list) -{ - int n, i; - if ( !A || !list)return A; - - n=0; - while ( list[n][0][0]) - { - if ( (i=name_is_in_list (list[n][0], A->name, A->nseq, 100))!=-1) - { - sprintf ( A->name[i], "%s", list[n][1]); - } - n++; - } - return A; -} -/********************************************************************/ -/* */ -/* FLOAT SIMILARITIES */ -/* */ -/* */ -/* */ -/********************************************************************/ -float get_seq_fsim ( char *string1, char *string2, char *ignore, char *similarity_set,int **matrix, int MODE ) - { - int len, a, r1, r2, nr1=0, nr2=0; - float pos=0, sim=0; - - - len=MIN((strlen (string1)),(strlen (string2))); - if ( len==0)return 0; - - for ( a=0; a< len; a++) - { - - r1=string1[a]; - r2=string2[a]; - nr1+=!is_gap(r1); - nr2+=!is_gap(r2); - - if ( !is_in_set (r1, ignore) && !is_in_set (r2, ignore)) - { - pos++; - if ( matrix)sim+=matrix[r1-'A'][r2-'A']; - else if (is_in_same_group_aa(r1,r2,0, NULL,similarity_set)) - { - sim++; - } - } - } - if ( MODE==UNGAPED_POSITIONS)return ( sim*100)/pos; - else if ( MODE==ALIGNED_POSITIONS)return (sim*100)/len; - else if ( MODE==AVERAGE_POSITIONS)return (sim*200)/(nr1+nr2); - else - { - return 0; - } - - } -float get_seq_fsim2 ( char *string1, char *string2, char *ignore, char *in_mode) - { - int len1; - int a; - int p1, p2; - int r1=0,r2=0; - char *p; - char mode[1000]; - float r=0, pos1, pos2, pos0, gap, sim; - - - sprintf ( mode, "%s", in_mode); - - /*mode: __ - mat: idscore to get the alignment done - any legal cw matrix - sim_mode: sim1->identities/matches - sim2->identities/min len - */ - - - if ( (p=strstr (mode, "_"))!=NULL) - { - p[0]='\0'; - p++; - } - - - if (strstr (mode, "idscore")) - { - static int **mat; - if (!mat) mat=read_matrice ("blosum62mt"); - return idscore_pairseq (string1, string2, -12, -1, mat,mode); - - } - - len1=strlen (string1); - for ( sim=pos1=pos2=pos0=gap=0,a=0; a< len1; a++) - { - r1=string1[a]; - r2=string2[a]; - p1=1-is_in_set (r1, ignore); - p2=1-is_in_set (r2, ignore); - pos1+=p1; pos2+=p2; - if (p1 && p2) - { - pos0++; - if (is_in_same_group_aa(r1,r2,0, NULL, mode)) - { - sim++; - } - } - else if (p1+p2==1) - { - gap++; - } - } - - if ( p==NULL || strm (p, "sim1") || strm (p, "sim")) - { - r=(pos0==0)?0:(sim*MAXID)/pos0; - } - else if ( strm (p, "sim2")) - { - r=(pos1==0 || pos2==0)?0:(sim*MAXID)/MIN(pos1,pos2); - } - else if ( strm (p, "sim3")) - { - r=(pos1==0 || pos2==0)?0:(sim*MAXID)/MAX(pos1,pos2); - } - else if ( strm (p, "gap1")) - { - r=(len1==0)?MAXID:(gap*MAXID)/len1; - r=MAXID-r; - } - else if ( strm (p, "logid")) - { - r=logid_score (pos0, sim); - } - - return r; - - } - -/********************************************************************/ -/* */ -/* ALIGNMENT ANALYSES */ -/* */ -/* */ -/* */ -/********************************************************************/ -int **dist_array2sim_array ( int **p, int max) -{ - int s1, s2, a, b; - s1=read_array_size ((void *)p, sizeof (void *)); - s2=read_array_size ((void*)p[0],sizeof (int)); - /* s2=read_array_size ((void*)p[0],sizeof (void *)); OLD before 64 BITS*/ - for ( a=0; a< s1; a++) - for ( b=0; b< s2; b++) - { - p[a][b]=max-p[a][b]; - } - return p; -} - -int **sim_array2dist_array ( int **p, int max) -{ - int s1, s2, a, b; - s1=read_array_size ((void *)p, sizeof (void *)); - s2=read_array_size ((void*)p[0],sizeof (int)); - - /*s2=read_array_size ((void*)p[0],sizeof (void *)); OLD before 64 Bits stuff*/ - for ( a=0; a< s1; a++) - for ( b=0; b< s2; b++) - { - p[a][b]=max-(int)p[a][b]; - } - return p; -} - -int **normalize_array (int **p, int max, int norm) -{ -int s1, s2, a, b; - s1=read_array_size ((void *)p, sizeof (void *)); - s2=read_array_size ((void*)p[0],sizeof (int)); - - /*s2=read_array_size ((void*)p[0],sizeof (void *)); OLD before 64 Bits stuff*/ - for ( a=0; a< s1; a++) - for ( b=0; b< s2; b++) - { - p[a][b]=(p[a][b]*norm)/max; - } - return p; -} - -int aln2most_similar_sequence ( Alignment *A, char *mode) -{ - int **w; - int a, b; - int avg, best_avg=0, best_seq=0; - char *buf; - int coverage; - - - if ( !A) return -1; - else if ( A->nseq==1)return 0; - else - { - buf=vcalloc ( A->len_aln+1, sizeof (char)); - w=get_sim_aln_array ( A, mode); - - for ( a=0; a< A->nseq; a++) - { - sprintf ( buf, "%s", A->seq_al[a]); - ungap(buf); - coverage=(strlen(buf)*MAXID)/A->len_aln; - - for ( avg=0,b=0; b< A->nseq; b++)avg+=w[a][b]*coverage; - if ( avg>best_avg){best_avg=avg; best_seq=a;} - } - free_int (w, -1); - vfree (buf); - return best_seq; - } - -} - - - -int aln2coverage ( Alignment *A, int ref_seq) -{ - int a,b; - int cov_pos=0, npos=0; - - for ( a=0; a< A->len_aln; a++) - { - if ( !is_gap ( A->seq_al[ref_seq][a])) - { - npos++; - for ( b=0; b< A->nseq; b++) - { - if ( b!=ref_seq && !is_gap ( A->seq_al[b][a])){cov_pos++;break;} - } - } - } - return (int) (npos==0)?0:(( MAXID*cov_pos)/A->len_aln); -} - - -int sub_aln2sim ( Alignment *A, int *ns, int **ls, char *mode) -{ - int a, b, n; - float avg; - - n=0; avg=0; - if (!A || (ns==NULL && A->nseq<2))return -1; - else if (ns==NULL) - { - for (a=0; a< A->nseq-1; a++) - for ( b=a+1; b< A->nseq;b++, n++) - avg+=generic_get_seq_sim (A->seq_al[a], A->seq_al[b], NULL, mode); - } - else - { - for (a=0; aseq_al[ls[0][a]], A->seq_al[ls[1][b]], NULL, mode); - } - } - return (int)(n==0)?0:((float)avg/(float)n); -} -int sub_aln2max_sim ( Alignment *A, int *ns, int **ls, char *mode) -{ - int a, b, n; - float avg; - - n=0; avg=0; - if (!A || (ns==NULL && A->nseq<2))return -1; - else if (ns==NULL) - { - for (a=0; a< A->nseq-1; a++) - for ( b=a+1; b< A->nseq;b++, n++) - avg=MAX(avg,generic_get_seq_sim (A->seq_al[a], A->seq_al[b], NULL, mode)); - } - else - { - for (a=0; aseq_al[ls[0][a]], A->seq_al[ls[1][b]], NULL, mode)); - } - } - return avg; -} - - -double aln2entropy (Alignment *A, int *in_ls, int in_ns, float gap_threshold) -{ - int ns, a, s, col, r,ncol; - int *ls; - double *count; - double entropy=0; - float ng; - - ls=vcalloc ( A->nseq, sizeof (int)); - count=vcalloc ( 26, sizeof (double)); - - - if ( in_ls) - { - ns=in_ns; - for ( a=0; a< ns; a++)ls[a]=in_ls[a]; - } - else - { - ns=A->nseq; - for ( a=0; a< ns; a++)ls[a]=a; - } - - if ( ns==0) - { - vfree(ls);vfree(count);return 0; - } - for (ncol=0,col=0; collen_aln; col++) - { - for (ng=0,a=0; a< ns; a++) - { - s=ls[a]; - ng+=is_gap(A->seq_al[s][col]); - } - ng/=ns; - if ( ng>gap_threshold)continue; - - ncol++; - - for ( a=0; aseq_al[s][col]); - if (!is_gap(r))count[r-'a']++; - } - for (a=0; a<26; a++) - { - if ( count[a]==0); - else - { - count[a]/=(double)ns; - - entropy+=count[a]*log(count[a]); - count[a]=0; - } - } - } - entropy/=-ncol; - vfree (ls); vfree(count); - - return entropy; -} -int aln2sim ( Alignment *A, char *mode) -{ - return sub_aln2sim ( A, NULL, NULL, mode); - /* - if ( !A || A->nseq<2) return -1; - w=get_sim_aln_array ( A, mode); - - for (c=0, a=0; a< A->nseq-1; a++) - for ( b=a+1; b< A->nseq; b++, c++) - { - avg+=(float)w[a][b]; - } - free_int (w, -1); - return (int)((float)avg/(float)c); - */ -} - -int aln_is_aligned ( Alignment *A) -{ - int a, b; - - if ( !A)return 0; - for (a=0; a< A->nseq; a++) - for ( b=A->len_aln-1; b>0; b--) - { - if (!is_gap(A->seq_al[a][b]) && is_gap(A->seq_al[a][b-1]))return 1; - } - return 0; -} - - -int seq2aln2sim_old ( char *seq1, char *seq2, char *mode_aln, char *mode_id) -{ - Alignment *A; - int sim; - - A=align_two_sequences (seq1, seq2, "pam250mt", -10, -1, mode_aln); - sim=aln2sim (A, mode_id); - free_aln (A); - return sim; -} -int seq2aln2sim ( char *seq1, char *seq2, char *mode_aln, char *mode_id) -{ - Alignment *A; - int sim; - static int gop; - - if (!gop) - { - int **m; - m=read_matrice ("blosum62mt"); - gop=get_avg_matrix_mm(m, AA_ALPHABET)*10; - free_int (m, -1); - } - - A=align_two_sequences (seq1, seq2, "blosum62mt",gop,-1, mode_aln); - sim=aln2sim (A, mode_id); - free_aln (A); - return sim; -} -int* get_cdna_seq_winsim ( int *cache, char *string1, char *string2, char *ignore, char *mode,int *w ) - { - int len1, len2; - int a, x; - - - len1=strlen (string1); - len2=strlen (string2); - - if ( len1!=len2) - { - fatal_exit( stderr,EXIT_FAILURE, "\nTHE TWO cDNAs DO NOT HAVE THE SAME LENGTH [FATAL:get_cdna_seq_sim:%s", PROGRAM); - } - - x=get_cdna_seq_sim(cache, string1, string2, ignore, ""); - for ( a=0; a< len1; a++) - w[a]=x; - - add_warning (stderr, "\nWARNING: winsim not implemented for cDNA"); - return w; - } - -int get_cdna_seq_sim ( int *cache, char *string1, char *string2, char *ignore, char *mode) - { - int len1; - int len2; - int a; - int pos=0; - int sim=0; - char r1=0, r2=0; - - len1=strlen (string1); - len2=strlen (string2); - - - - if ( len1!=len2) - { - fprintf ( stderr, "\nTHE TWO cDNAs DO NOT HAVE THE SAME LENGTH [FATAL:get_cdna_seq_sim:%s", PROGRAM); - crash(""); - } - - for ( a=0; a< len1;) - { - - if ( cache[a]==0){a++;continue;} - else if ( cache[a]==1) - { - - r1=translate_dna_codon (string1+a, 'x'); - r2=translate_dna_codon (string2+a, 'x'); - a+=3; - } - - if ( !is_in_set (r1, ignore) && !is_in_set (r2, ignore)) - { - pos++; - if (is_in_same_group_aa(r1,r2,0, NULL,mode+4)) - { - sim++; - } - } - } - - - - if (pos==0) - return 0; - else - return (int) (sim*MAXID)/pos; - - } - -int* get_seq_winsim ( char *string1, char *string2, char *ignore, char *mode, int*w) - { - int len1, len2, len; - int left, right; - int a,b; - int sim=0; - int window; - int r1, r2; - - len1=strlen (string1); - len2=strlen (string2); - window=atoi(mode); - len=2*window+1; - - if ( len1!=len2)return 0; - if (window==0 || (window*2+1)>=len1) - { - sim=get_seq_sim (string1, string2, ignore, ""); - for (a=0; a__ - mat: idscore to get the alignment done - any legal cw matrix - sim_mode: sim1->identities/matches - sim2->identities/min len - */ - - - if ( (p=strstr (mode, "_"))!=NULL) - { - p[0]='\0'; - p++; - } - - - if (strstr (mode, "idscore")) - { - static int **mat; - if (!mat) mat=read_matrice ("blosum62mt"); - return idscore_pairseq (string1, string2, -12, -1, mat,mode); - - } - len1=strlen (string1); - for ( sim=pos1=pos2=pos0=0,a=0; a< len1; a++) - { - r1=string1[a]; - r2=string2[a]; - p1=1-is_in_set (r1, ignore); - p2=1-is_in_set (r2, ignore); - - pos1+=p1; pos2+=p2; - if (p1 && p2) - { - pos0++; - if (is_in_same_group_aa(r1,r2,0, NULL, mode)) - { - sim++; - } - } - else if (p1+p2==1) - { - gap++; - } - } - - if ( strstr (mode, "cov")) - { - r=(pos0+gap==0)?0:(pos0*MAXID)/(pos0+gap); - } - else if ( p==NULL || strm (p, "sim1") || strm (p, "sim")) - { - r=(pos0==0)?0:(sim*MAXID)/pos0; - } - else if ( strm (p, "sim2")) - { - r=(pos1==0 || pos2==0)?0:(sim*MAXID)/MIN(pos1,pos2); - } - else if ( strm (p, "sim3")) - { - r=(pos1==0 || pos2==0)?0:(sim*MAXID)/MAX(pos1,pos2); - } - else if ( strm (p, "gap1")) - { - r=(len1==0)?MAXID:(gap*MAXID)/len1; - r=MAXID-r; - } - else if ( strm (p, "logid")) - { - r=logid_score (pos0, sim); - } - else if ( strstr (mode, "sim")) - { - r=(pos0==0)?0:(sim*MAXID)/pos0; - } - - - return r; - - } -int get_seq_sim_2 ( char *string1, char *string2, char *ignore, char **gr, int ng) - { - int len1; - int len2; - int a; - int pos=0; - int sim=0; - char r1, r2; - - - len1=strlen (string1); - len2=strlen (string2); - - if ( len1!=len2)return 0; - - for ( a=0; a< len1; a++) - { - r1=string1[a]; - r2=string2[a]; - if ( !is_in_set (r1, ignore) && !is_in_set (r2, ignore)) - { - pos++; - if (is_in_same_group_aa(r1,r2,ng, gr, NULL)) - { - sim++; - } - } - } - - if (pos==0) - return 0; - else - return (int) (sim*MAXID)/pos; - - } - -int get_seq_sim_3 ( char *string1, char *string2, char *ignore, int **mat) - { - int len1; - int len2; - int a; - - int sim=0; - char r1, r2; - - - len1=strlen (string1); - len2=strlen (string2); - - if ( len1!=len2)return 0; - - for ( a=0; a< len1; a++) - { - r1=string1[a]; - r2=string2[a]; - if ( !is_in_set (r1, ignore) && !is_in_set (r2, ignore)) - { - sim+=mat[r1-'A'][r2-'A']; - } - } - return sim; - - } -int * get_aln_col_weight ( Alignment *A, char *mode) - { - int a, b; - char *col; - int *weight; - - col=vcalloc ( A->nseq, sizeof (int)); - weight=vcalloc (A->len_aln, sizeof (int)); - - for (a=0; a< A->len_aln; a++) - { - for ( b=0; b< A->nseq; b++) - col[b]=A->seq_al[b][a]; - weight[a]=(find_group_aa_distribution (col, A->nseq,0,NULL,NULL, mode )*MAXID)/A->nseq; - } - vfree (col); - return weight; - - } - -int analyse_aln_column ( Alignment *B, int col) - { - - char r=' '; - int a, b, c=0; - static char *mat; - static int ng_cw_star; - static char **cw_star; - int *cw_star_count; - - static int ng_cw_col; - static char **cw_col; - int *cw_col_count; - - static int ng_cw_dot; - static char **cw_dot; - int *cw_dot_count; - - - - - - - if ( !B->S || !(B->S)->type)B= get_aln_type (B); - - if ( !mat)mat=vcalloc ( STRING, sizeof (char)); - - if ( !ng_cw_star) - { - cw_star=make_group_aa ( &ng_cw_star, strcpy ( mat,"idmat")); - cw_col=make_group_aa ( &ng_cw_col, strcpy (mat,"clustalw_col")); - cw_dot=make_group_aa ( &ng_cw_dot, strcpy (mat, "clustalw_dot")); - } - - cw_star_count=vcalloc (ng_cw_star, sizeof (int)); - cw_col_count=vcalloc ( ng_cw_col, sizeof (int)); - cw_dot_count=vcalloc (ng_cw_dot, sizeof (int)); - - for ( a=0; a< B->nseq; a++) - { - c=tolower (B->seq_al[a][col]); - if (is_gap(c)){r=' ';break;} - - for ( b=0; b< ng_cw_star; b++) - cw_star_count[b]+=is_in_set (c, cw_star[b]); - for ( b=0; b< ng_cw_col; b++) - cw_col_count[b]+=is_in_set (c, cw_col[b]); - for ( b=0; b< ng_cw_dot; b++) - cw_dot_count[b]+=is_in_set (c, cw_dot[b]); - } - - - - - - if ( !is_gap(c) && r==' ') - for ( b=0; b< ng_cw_star; b++)if ( cw_star_count[b]==B->nseq){r='*'; break;} - if ( !is_gap(c) && r==' ' && !(strm((B->S)->type, "DNA")||strm ((B->S)->type,"RNA"))) - for ( b=0; b< ng_cw_col ; b++)if ( cw_col_count [b]==B->nseq){r=':'; break;} - if ( !is_gap(c) && r==' ' && !(strm((B->S)->type, "DNA")||strm ((B->S)->type,"RNA"))) - for ( b=0; b< ng_cw_dot ; b++)if ( cw_dot_count [b]==B->nseq){r='.'; break;} - - - - vfree(cw_star_count); - vfree(cw_col_count); - vfree(cw_dot_count); - - return r; - } - - -int ** get_cov_aln_array ( Alignment *A, char *mode) -{ - int **w; - int a, b, c, t; - - w=declare_int ( A->nseq, A->nseq); - - - for ( a=0; a< A->nseq-1; a++) - { - w[a][a]=100; - for ( t=0,b=a+1; b< A->nseq; b++) - { - for ( c=0; c< A->len_aln; c++) - { - t+=(!is_gap(A->seq_al[a][c]) &&!is_gap(A->seq_al[b][c])); - } - w[a][b]=w[b][a]=(t*100)/A->len_aln; - } - } - return w; -} - -int ** get_cov_master_aln_array ( Alignment *A,int n, char *mode) -{ - int **w; - int b, c, t; - - w=declare_int ( A->nseq, A->nseq); - - - for (b=0; b< A->nseq; b++) - { - - for (t=0, c=0; c< A->len_aln; c++) - { - t+=(!is_gap(A->seq_al[n][c]) &&!is_gap(A->seq_al[n][c])); - } - w[n][b]=w[b][n]=(t*100)/A->len_aln; - } - - return w; -} -int ** get_sim_master_aln_array ( Alignment *A,int n, char *mode) - { - int **w; - int a; - - w=declare_int ( A->nseq, A->nseq); - - - for ( a=0; a< A->nseq; a++) - { - if ( strm (mode, "cdna")) - w[n][a]=w[a][n]=get_cdna_seq_sim ( A->cdna_cache[0], A->seq_al[a], A->seq_al[n],GAP_LIST, mode); - else - w[n][a]=w[a][n]=get_seq_sim ( A->seq_al[n], A->seq_al[a],GAP_LIST, mode); - } - return w; - } -int ** get_dist_aln_array ( Alignment *A, char *mode) -{ - - int **w; - - w=get_sim_aln_array ( A, mode); - return sim_array2dist_array(w,MAXID); -} -Sequence * seq2filter (Sequence *Sin, int min, int max) -{ - int *keep; - char *tmpfile; - Sequence *S, *Sout; - int a, b, sim; - int **M; - FILE *fp; - int n; - - S=duplicate_sequence (Sin); - for (a=0; anseq; a++)ungap(S->seq[a]); - keep=vcalloc (S->nseq, sizeof (int)); - M=read_matrice ("blossum62mt"); - for (a=0; anseq; a++) - { - output_completion ( stderr, a, S->nseq, 100, "Distance Matrix Computation: "); - for ( b=a+1; bnseq; b++) - { - - sim=idscore_pairseq(S->seq[a], S->seq[b],-10, -2,M, "sim"); - if ( sim>min && simnseq; a++) - if ( keep[a]) - { - fprintf ( fp, ">%s %s\n%s", S->name[a], S->seq_comment[a], S->seq[a]); - n++; - } - vfclose (fp); - if (n==0) return NULL; - Sout=main_read_seq(tmpfile); - free_int (M, -1); vfree (keep); free_sequence (S, -1); - return Sout; -} - -Alignment * grep_seq (Alignment *S,char *field, char *mode, char *string) -{ - int a; - FILE *fp; - char *tmp; - int n=0; - - tmp=vtmpnam (NULL); - fp=vfopen (tmp, "w"); - - if ( !strm(mode, "KEEP") && ! strm (mode, "REMOVE")) - { - add_warning ( stderr, "\nERROR: +grep [FATAL: %s]", PROGRAM); - myexit (EXIT_FAILURE); - } - else if ( !strm(field, "SEQ") && ! strm (field, "COMMENT") && ! strm(field, "NAME")) - { - add_warning ( stderr, "\nERROR: +grep [FATAL: %s]", PROGRAM); - myexit (EXIT_FAILURE); - } - - - for (n=0, a=0; a< S->nseq; a++) - { - int found=0; - - if (strm(field, "NAME") && perl_strstr (S->name[a], string))found=1; - else if (strm(field, "COMMENT") && S->seq_comment[a][0] && perl_strstr (S->seq_comment[a], string) )found=1; - else if (strm(field, "SEQ") && perl_strstr (S->seq_al[a], string))found=1; - - if ( (strm (mode, "KEEP") && found) || (strm (mode, "REMOVE") && !found)) - { - n++; - fprintf (fp, ">%s", S->name[a]); - if (S->seq_comment[a][0])fprintf (fp, " %s", S->seq_comment[a]); - fprintf (fp, "\n%s\n", S->seq_al[a]); - } - } - - vfclose (fp); - - free_aln (S); - if ( n==0) return NULL; - else - return main_read_aln (tmp, NULL); -} - -Alignment * modify_seq (Alignment *S, char *field, char *string1, char *string2) -{ - int a; - FILE *fp; - char *tmp; - - tmp=vtmpnam (NULL); - fp=vfopen (tmp, "w"); - for ( a=0; a< S->nseq; a++) - { - if (strm(field, "NAME"))S->name[a]=substitute ( S->name[a], string1, string2); - else if (strm(field, "COMMENT"))S->seq_comment[a]=substitute ( S->seq_comment[a], string1, string2); - else if (strm(field, "SEQ"))S->seq_al[a]=substitute ( S->seq_al[a], string1, string2); - fprintf (fp, ">%s", S->name[a]); - if (S->aln_comment[a][0])fprintf (fp, " %s", S->aln_comment[a]); - fprintf (fp, "\n%s\n", S->seq_al[a]); - } - vfclose (fp); - free_aln (S); - S=main_read_aln (tmp, NULL); - return S; -} - -int ** seq2sim_mat (Sequence *S, char *mode) -{ - return seq2comp_mat ( S,mode, "sim"); -} -int ** seq2cov_mat (Sequence *S, char *mode) -{ - return seq2comp_mat ( S,mode, "cov"); -} - -int ** seq2comp_mat (Sequence *S, char *mode, char *comp_mode) -{ - int a, b; - int **sim; - char file[1000]; - Alignment *A; - char *name; - - - /*Use pre_computed value if available in the current dir*/ - - name=path2filename(S->file[0]); - sprintf ( file, "%s%s.%s.%s_file", get_cache_dir(),name, mode, comp_mode); - A=seq2aln(S,NULL, RM_GAP); - if ( check_file_exists (file) && is_distance_matrix_file (file) && (sim=input_similarities(file, A, NULL))!=NULL) - { - display_input_filename (stderr, "SIMILARITY_MATRIX", "SIMILARITY_MATRIX_FORMAT_01", file, CHECK); - fprintf ( stderr, "\n"); - } - else - { - char mode2[1000]; - int **M; - - M=read_matrice (mode); - sim=declare_int ( S->nseq, S->nseq); - for ( a=0; a< S->nseq; a++) - { - ungap (S->seq[a]); - sim[a][a]=100; - } - - for ( a=0; anseq-1; a++) - { - - output_completion4halfmat ( stderr, a, S->nseq, 100, "Similarity Matrix Computation: "); - for ( b=a+1; b< S->nseq; b++) - { - sim[a][b]=sim[b][a]=idscore_pairseq(S->seq[a], S->seq[b],-12, -1,M, comp_mode); - } - } - free_int (M,-1); - sprintf ( mode2, "_memory_%ld", (long int)sim); - output_similarities( file, A, mode2); - display_output_filename (stderr, "SIMILARITY_MATRIX", "SIMILARITY_MATRIX_FORMAT_01", file, CHECK); - fprintf ( stderr, "\n"); - } - free_aln (A); - return sim; -} - -int ** fast_aln2sim_list (Alignment *A, char *mode, int *ns, int **ls) -{ - int **simm; - int p1, p2, p3, r1, r2; - int gap,pos0,pos1,pos2,len,sim; - int a, b, c, m, s=0,s1, s2, n; - int free_ns=0; - - if (ns==NULL) - { - free_ns=1; - ns=vcalloc (2, sizeof (int)); - ns[0]=ns[1]=A->nseq; - ls=declare_int (2, A->nseq); - for ( a=0; a< 2; a++) - for (b=0; bnseq; b++) - ls[a][b]=b; - } - - - simm=declare_int (ns[0]*ns[1]+1, 3); - - if (strstr (mode, "sim1"))m=0; - else if (strstr (mode, "sim2"))m=1; - else if (strstr (mode, "sim3"))m=2; - else if (strstr (mode, "gap1"))m=3; - else if (strstr (mode, "cov1"))m=4; - else if (strstr (mode, "logid"))m=5; - else m=0; - - - - for (n=0,a=0; alen_aln; c++) - { - r1=tolower (A->seq_al[s1][c]); - r2=tolower (A->seq_al[s2][c]); - p1=(r1!='-')?1:0; - p2=(r2!='-')?1:0; - p3=p1+p2; - if ( p3==0)continue; - if ( p3==1)gap++; - if ( r1==r2)sim++; - pos1+=p1; - pos2+=p2; - pos0+=(p3==2)?1:0; - len++; - } - - if (m==0)s=(pos0==0)?0:(sim*MAXID)/pos0; //sim1 - else if (m==1) s=(MIN(pos1,pos2)==0)?0:(sim*MAXID)/MIN(pos1,pos2);//sim2 - else if (m==2) s=(MAX(pos1,pos2)==0)?0:(sim*MAXID)/MAX(pos1,pos2);//sim3 - else if (m==3) s=(len==0) ?0:((len-gap)*MAXID)/len;//gap1 - else if (m==4) s=(len==0) ?0:((pos0)*MAXID)/len; //cov - else if (m==5) - { - s=logid_score ( sim, len); - } - simm[n][0]=s1; - simm[n][1]=s2; - simm[n][2]=s; - } - } - - if ( free_ns) {vfree(ns); free_int (ls, -1);} - simm[n][0]=-1; - return simm; -} - -int ** fast_aln2sim_mat (Alignment *A, char *mode) -{ - int **simm; - int p1, p2, p3, r1, r2; - int gap,pos0,pos1,pos2,len,sim; - int a, b, c, m; - - simm=declare_int (A->nseq, A->nseq); - - - - if (strstr (mode, "sim1"))m=0; - else if (strstr (mode, "sim2"))m=1; - else if (strstr (mode, "sim3"))m=2; - else if (strstr (mode, "gap1"))m=3; - else if (strstr (mode, "cov1"))m=4; - else if (strstr (mode, "logid"))m=5; - else m=0; - - - - for ( a=0; a< A->nseq-1; a++) - { - simm[a][a]=MAXID; - for ( b=a+1; b< A->nseq; b++) - { - gap=pos0=pos1=pos2=len=sim=0; - - for ( c=0; c< A->len_aln; c++) - { - r1=tolower (A->seq_al[a][c]); - r2=tolower (A->seq_al[b][c]); - p1=(r1!='-')?1:0; - p2=(r2!='-')?1:0; - p3=p1+p2; - if ( p3==0)continue; - if ( p3==1)gap++; - if ( r1==r2)sim++; - pos1+=p1; - pos2+=p2; - pos0+=(p3==2)?1:0; - len++; - } - - if (m==0)simm[a][b]=simm[b][a]=(pos0==0)?0:(sim*MAXID)/pos0; //sim1 - else if (m==1) simm[a][b]=simm[b][a]=(MIN(pos1,pos2)==0)?0:(sim*MAXID)/MIN(pos1,pos2);//sim2 - else if (m==2) simm[a][b]=simm[b][a]=(MAX(pos1,pos2)==0)?0:(sim*MAXID)/MAX(pos1,pos2);//sim3 - else if (m==3) simm[a][b]=simm[b][a]=(len==0) ?0:((len-gap)*MAXID)/len;//gap1 - else if (m==4) simm[a][b]=simm[b][a]=(len==0) ?0:((pos0)*MAXID)/len; //cov - else if (m==5) - { - - //Inspired from Muscle +mafft 5 - simm[a][b]=simm[b][a]=logid_score ( sim, len); - } - } - } - return simm; -} -int logid_score ( int sim, int len) -{ - float score; - - if ( len==0)return (int)(0.33*(float)MAXID); - - score=(float)sim/(float)len; - if (score>0.9) score=1.0; - else score=-log10 (1.0-score); - - score=(score*MAXID); - return score; -} -int ** aln2sim_mat (Alignment *A, char*mode) -{ - - - if ( strstr (mode, "idmat"))return fast_aln2sim_mat(A, mode); - return get_sim_aln_array(A, mode); -} -int ** aln2cov (Alignment *A) -{ - int a, b, c; - int r1, r2, gr1, gr2, pos0, gap; - int **cov; - cov=declare_int (A->nseq, A->nseq); - - for (a=0; a< A->nseq-1; a++) - { - cov[a][a]=100; - for ( b=a+1; bnseq; b++) - { - for (gap=0,pos0=0,c=0;clen_aln; c++) - { - r1=A->seq_al[a][c]; - r2=A->seq_al[b][c]; - gr1=is_gap(r1); gr2=is_gap(r2); - if ( gr1+gr2==0)pos0++; - else if ( gr1+gr2<2)gap++; - } - cov[a][b]=cov[b][a]=((gap+pos0)==0)?0:((pos0*100)/(gap+pos0)); - } - } - return cov; -} -int ** get_raw_sim_aln_array (Alignment *A, char *mode) -{ - int **w; - int **M; - int a, b, c, r1, r2, set, max, min; - - w=declare_int (A->nseq, A->nseq); - if (strstr(mode, "sar"))M=NULL; - else M=read_matrice (mode); - - HERE ("RAW STUFF"); - - for ( set=0,a=0; a< A->nseq; a++) - for (b=a; bnseq; b++) - { - if (M) - { - for (c=0; clen_aln; c++) - { - r1=A->seq_al[a][c]; - r2=A->seq_al[b][c]; - - if ( !is_gap(r1) && !is_gap(r2)) - w[a][b]+=M[r1-'A'][r2-'A']; - } - } - else if ( strm (mode, "sarmat2")) - { - w[a][b]=get_sar_sim2 (A->seq_al[a], A->seq_al[b]); - } - else - { - HERE ("ERROR: %s is an unknown mode of raw_sim\n", mode); exit (0); - } - - w[b][a]=w[a][b]; - if (!set){min=max=w[a][b];set=1;} - min=MIN(min,w[a][b]); - max=MAX(max,w[a][b]); - } - for (a=0; anseq; a++) - for (b=a; bnseq; b++) - { - w[b][a]=((max-min)==0)?0:((w[b][a]-min)*100)/(max-min); - w[a][b]=w[b][a]; - } - free_int (M, -1); - return w; -} -int ** get_sim_aln_array ( Alignment *A, char *mode) - { - int **w; - int a, b; - - - w=declare_int ( A->nseq, A->nseq); - - for ( a=0; a< A->nseq-1; a++) - { - for ( b=a+1; b< A->nseq; b++) - { - - w[a][b]=w[b][a]=generic_get_seq_sim ( A->seq_al[a], A->seq_al[b], (A->cdna_cache)?A->cdna_cache[0]:NULL, mode); - } - } - return w; - } -int generic_get_seq_sim ( char *seq1, char *seq2, int*cache, char *mode) -{ - - - if ( strm (mode, "cdna")) - return get_cdna_seq_sim ( cache, seq1, seq2,GAP_LIST, mode); - else if ( strnm (mode, "ktup",4)) - return ktup_comparison (seq1, seq2,atoi(mode+4)); - else if ( strstr (mode, "sarmat2")) - { - - return get_sar_sim2 (seq1, seq2); - } - else if ( strstr (mode, "sarmat")) - return (int) get_sar_sim (seq1,seq2); - else - { - return get_seq_sim ( seq1,seq2,GAP_LIST, mode); - } -} -int *** get_winsim_aln_array ( Alignment *A,char *mode, int ***w) - { - int a, b; - for ( a=0; a< A->nseq; a++) - for ( b=0; b< A->nseq; b++) - { - if ( strm (mode, "cdna")) - w[a][b]=get_cdna_seq_winsim ( A->cdna_cache[0], A->seq_al[a], A->seq_al[b],GAP_LIST, mode, w[a][b]); - else - w[a][b]=get_seq_winsim ( A->seq_al[a], A->seq_al[b],GAP_LIST, mode, w[a][b]); - } - return w; - } - -Alignment * seq2profile (Sequence *S, int i) -{ - Alignment *A; - - if ((A=seq2R_template_profile (S, i))) - { - return A; - } - else - { - char *tmp; - FILE *fp; - tmp=vtmpnam (NULL); - fp=vfopen ( tmp, "w"); - fprintf (fp, ">%s\n%s\n", S->name[i], S->seq[i]); - vfclose (fp); - - (S->T[i])->R=fill_R_template (S->name[i], tmp, S); - - return seq2R_template_profile (S, i); - } -} - -Alignment* aln2sub_aln_file (Alignment *A, int n, char **string) -{ - char ***list; - int a; - - list=vcalloc (A->nseq, sizeof (char***)); - if ( n==0)return A; - else if (n>1) - { - int l; - char *buf; - - for (l=0,a=0; a< n; a++)l+=strlen (string[a]); - buf=vcalloc ( 2*n+l+1, sizeof (char)); - for (a=0; a< n; a++){buf=strcat (buf,string[a]), buf=strcat ( buf, " ");} - list[0]=string2list (buf); - vfree (buf); - } - else if ( file_exists (NULL,string[0])) - { - list=read_group (string[0]); - - } - else - { - fprintf (stderr, "\nERROR: file <%s> does not exist [FATAL:%s]\n",string[0], PROGRAM); - myexit (EXIT_FAILURE); - } - - - a=0; - while (list[a]) - { - int i, b; - FILE *fp; - n=atoi (list[a][0]); - fp=vfopen (list[a][1], "w"); - for (b=2; bname, A->nseq, MAXNAMES); - if (n==3)ungap (A->seq_al[i]); - fprintf (fp, ">%s\n%s\n", A->name[i], A->seq_al[i]); - } - vfclose (fp); - free_char (list[a], -1); - a++; - } - vfree(list); - return A; -} -Sequence *remove_empty_sequence (Sequence *S) -{ - int a, b; - char *c; - Sequence *NS; - - c=vcalloc ( S->max_len+1, sizeof (char)); - - for (a=0, b=0; a< S->nseq; a++) - { - sprintf ( c, "%s",S->seq[a]); - ungap (c); - if ( strlen (c)==0) - { - //vfree (S->seq[a]); - S->seq[a]=NULL; - add_warning ( stderr, "WARNING: Sequence %s does not contain any residue: automatically removed from the set [WARNING:%s]",S->name[a], PROGRAM); - } - } - NS=duplicate_sequence (S); - free_sequence (S, S->nseq); - vfree (c); - return NS; -} -Alignment* aln2sub_seq (Alignment *A, int n, char **string) -{ - char ***list; - int a; - Sequence *S=NULL; - - list=vcalloc (A->nseq, sizeof (char***)); - if ( n==0)return A; - else if (n>1) - { - int l; - char *buf; - - for (l=0,a=0; a< n; a++)l+=strlen (string[a]); - buf=vcalloc ( 2*n+l+1, sizeof (char)); - for (a=0; a< n; a++){buf=strcat (buf,string[a]), buf=strcat ( buf, " ");} - list[0]=string2list (buf); - vfree (buf); - } - else if ( file_exists (NULL,string[0])) - { - list=read_group (string[0]); - - } - else - { - fprintf (stderr, "\nERROR: file <%s> does not exist [FATAL:%s]\n",string[0], PROGRAM); - myexit (EXIT_FAILURE); - } - - - - a=0; - while (list[a]) - { - int t; - Alignment *B; - Sequence *subS; - - - B=main_read_aln (list[a][1], NULL); - t=aln2most_similar_sequence(B, "idmat"); - subS=extract_one_seq(B->name[t],0,0,B,KEEP_NAME); - S=add_sequence (subS,S,0); - free_aln (B);free_sequence (subS, -1); - vremove (list[a][1]); - a++; - } - vfree(list); - return seq2aln (S, NULL, RM_GAP); -} - -Alignment * aln2collapsed_aln (Alignment * A, int n, char **string) -{ - Alignment *B; - char ***list; - char **list2; - char *buf=NULL; - FILE *fp; - int a, b,c, ns, m, l; - int *collapsed; - - list=vcalloc (A->nseq, sizeof (char***)); - ns=0; - if ( n==0)return A; - else if (n>1) - { - for (l=0,a=0; a< n; a++)l+=strlen (string[a]); - buf=vcalloc ( 2*n+l+1, sizeof (char)); - for (a=0; a< n; a++){buf=strcat (buf,string[a]), buf=strcat ( buf, " ");} - - list[0]=string2list (buf);ns=1; - - } - else if ( file_exists (NULL,string[0])) - { - /*Format: Fasta like, the name fo the group followed with the name of the sequences - > .... - Groups must NOT be overlaping - */ - l=measure_longest_line_in_file (string[0])+1; - buf=vcalloc (l, sizeof (char)); - ns=0; - fp=vfopen (string[0], "r"); - while ((c=fgetc(fp))!=EOF) - { - buf=fgets (buf,l-1, fp); - if ( c=='>')list[ns++]=string2list (buf); - } - vfclose (fp); - } - else - { - fprintf (stderr, "\nERROR: file <%s> does not exist [FATAL:%s]\n",string[0], PROGRAM); - myexit (EXIT_FAILURE); - } - - vfree (buf); buf=NULL; - - /*Identify lost sequences*/ - collapsed=vcalloc (A->nseq, sizeof (int)); - for ( a=0; a< ns; a++) - { - m=atoi (list[a][0]); - for (b=2; bname, A->nseq, MAXNAMES); - if ( c>=0)collapsed[c]=1; - } - } - for ( a=0; a< A->nseq; a++) - { - if ( collapsed[a]==0) - { - list[ns]=declare_char (3, MAXNAMES); - sprintf ( list[ns][0], "3"); - sprintf ( list[ns][1], "%s", A->name[a]); - sprintf ( list[ns][2], "%s", A->name[a]); - ns++; - } - } - vfree (collapsed); - - - - - - list2=declare_char (A->nseq, 100); - /*1 Collapse the alignment*/ - for ( a=0; a< ns; a++) - { - sprintf ( list2[a], "%s", list[a][2]); - } - B=extract_sub_aln2 ( A, ns, list2); - /*2 Rename the sequences*/ - for ( a=0; a< ns; a++) - { - sprintf ( B->name[a], "%s", list[a][1]); - } - /*replace sequence with consensus*/ - - for ( a=0; a< ns; a++) - { - m=atoi (list[a][0]); - for (c=0, b=2; bseq_al[a], "%s", buf); - } - vfree (buf); - - free_aln (A); - B->S=aln2seq(B); - return B; -} -Alignment * aln2profile (Alignment * A) - { - Alignment *B=NULL; - char *cons; - - if (!A->P) - { - A->P=declare_profile (AA_ALPHABET,A->len_aln+1); - } - B=copy_aln (A, B); - free_int ((A->P)->count, -1); - free_int ((A->P)->count2, -1); - free_int ((A->P)->count3, -1); - (A->P)->count=aln2count_mat (A); - (A->P)->count2=aln2count_mat2 (A); - - cons=aln2cons_seq_mat (A, "blosum62mt"); - - sprintf (B->seq_al[0], "%s", cons); - B->nseq=1; - (A->P)->count3=aln2count_mat2 (B); - vfree (cons); - free_aln (B); - - - - return A; - - } - -int** aln2count_mat2 ( Alignment *A) -{ - return sub_aln2count_mat2 (A, 0, NULL); -} - -int sub_aln2nseq_prf ( Alignment *A, int ns, int *ls) -{ - - - int a, c, s; - Alignment *R; - int n; - int free_ls=0; - - - if ( ns==0) - { - n=ns=A->nseq; - ls=vcalloc (n, sizeof (int)); - for ( a=0; anseq; a++)ls[a]=a; - free_ls=1; - } - else - { - n=ns; - } - - for (c=0,a=0; aS && (R=seq2R_template_profile (A->S, A->order[s][0]))!=NULL) - { - n+=R->nseq; - } - else - { - ; - } - } - - if ( free_ls) vfree (ls); - return n; -} - -int** sub_aln2count_mat2 ( Alignment *A, int ns, int *ls) -{ - char **p; - int **count; - int a, b, c, s; - Alignment *R; - int n; - int free_ls=0; - - if ( ns==0) - { - n=ns=A->nseq; - p=vcalloc ( n, sizeof (char*)); - ls=vcalloc (n, sizeof (int)); - for ( a=0; anseq; a++)ls[a]=a; - free_ls=1; - } - else - { - n=ns; - p=vcalloc (n, sizeof (char*)); - } - - for (c=0,a=0; aS && (R=seq2R_template_profile (A->S, A->order[s][0]))!=NULL) - { - n+=R->nseq; - p=vrealloc (p, n*sizeof (char*)); - for (b=0; bnseq; b++) - { - p[c++]=R->seq_al[b]; - } - } - else - { - int w; - w=A->order[s][4]+1; - - for (b=0; bseq_al[s]; - } - } - count=sub_aln2count_mat3 (p,c); - vfree (p); - if ( free_ls) vfree (ls); - return count; -} -int** sub_aln2count_mat3 (char **al, int ns) -{ - int **count; - int used[1000]; - int a, b; - int r; - - int len; - int us; - - - /*count[x][0]=n symbols in column - count[x][1]=total_size of line - count[x][2]=Gap frequency - - count[x][n]=symbol n - count[x][n+1]=N occurence symbol n; - count[x][n+2]=N frequence symbol n*100; - - special multi-channeling - count[x][count[x][1]]=Nseq - count[x][count[x][1]+s]=residue col x, sequence s - */ - - - for (a=0; a< 1000; a++)used[a]=0; - len=strlen (al[0]); - - count=declare_int (len+2,100+ns+2); - count[len][0]=END_ARRAY; - count[len][1]=ns; - count[len][2]=len; - - - - for (a=0; anseq-us)*100/A->nseq;*/ - count[a][2]=ns-us; - - for (b=3; bnseq; - for (b=1; b<=A->nseq; b++) - count [a][count[a][1]+b]=(is_gap(A->seq_al[b-1][a]))?0:A->seq_al[b-1][a]; - */ - } -#ifdef XXXXXX - HERE ("Display "); - for (a=0; a< 5; a++) - { - fprintf ( stderr, "\n"); - for ( b=3; b< count[a][1]; b+=3) - { - fprintf ( stderr, "[%c %d]", count[a][b], count[a][b+1]); - } - fprintf ( stderr, "\n"); - for ( b=0; blen_aln); - - - for ( a=0; alen_aln; a++) - { - for ( b=0; b< A->nseq; b++) - { - if ( is_gap ( A->seq_al[b][a]))freq_mat[alp_size][a]++; - else - { - x=tolower(A->seq_al[b][a]); - freq_mat[x-'a'][a]++; - freq_mat[alp_size+1][a]++; - - } - } - } - - return freq_mat; - } -char *aln2random_seq (Alignment *A, int pn1, int pn2, int pn3, int gn) - { - - /* - - - Given the frequencies in A ( read as total counts of each Residue in - freq[A->nseq][A->len_aln], and pn1, pn2 and pn3: - - 1-Generate a new amino-acid at each position - 2-Insert Gaps, using a HMM. - - - pn3=Weight of the noise induced with sub mat. - - pn1=% noise type 1 ( Varies with entropi) - n1=Ratio noise type 1 - - T =Nseq - t1=Noise 1 expressed in Nseq - al=alphabet size; - ncat=number of non 0 cat for a given position - ICi initial count for residue i - - Ci=freq[seq][AA] - t1=T*n1*(1-1/ncat); - t2=T*n2; - - Ci= ICi*(T-(t1+t2))/T +(t1)/al+(t2)/al - - */ - - int **freq; - int **count; - float T, tot_t1, tot_t2,tot_t3, n1, n2, n3; - float ncat; - - double gf; - double *init_freq; - double *blur_freq; - double *t1, *t2,*t3; - int a, b, c, x; - char *seq; - int tot; - /*Viterbi Parameters */ - - int p; - int AL=0; /*Allowed Transition*/ - int F=-100000; /*Forbiden Transition*/ - - int GAP_TRANSITION; - int IGAP=0, IAA=1; - - int state,best_state=0, score, best_score=0; - int p_state; - int e=0; - int **score_tab; - int **state_tab; - int nstate=2; - int **transitions; - - int max; - - seq=vcalloc ( A->len_aln+1, sizeof (char)); - count=aln2count_mat(A); - freq=aln2count_mat(A); - - T=100; - - n1=(float)pn1/100; - n2=(float)pn2/100; - n3=(float)pn3/100; - - for ( a=0; a< A->len_aln; a++) - { - for ( b=0; b<26; b++) - freq[b][a]=freq[b][a]*((T)/(A->nseq-freq[26][a])); - freq[26][a]= (freq[26][a]*T)/A->nseq; - } - - - init_freq=vcalloc ( 26, sizeof (double)); - blur_freq=vcalloc ( 26, sizeof (double)); - - tot_t1=tot_t2=tot_t3=0; - - t1=vcalloc ( 27, sizeof (double)); - t2=vcalloc ( 27, sizeof (double)); - t3=vcalloc ( 27, sizeof (double)); - for (a=0; a< A->len_aln; a++) - { - - /*Compute Frequencies*/ - for (tot=0, b=0; b<26; b++) - { - if ( is_aa(b+'A')) - { - init_freq[b]=freq[b][a]; - tot+=freq[b][a]; - } - } - /*Count the number of different amino acids*/ - for ( ncat=0, b=0; b<=26; b++) - { - ncat+=(freq[b][a]!=0)?1:0; - } - /*Blurr the distribution using */ - blur_freq=compute_matrix_p (init_freq,tot); - - - /*compute noise 1: biased with blurred content * enthropy--> keeps prosite motifs*/ - tot_t1=T*n1*(1-1/ncat); - for ( b=0; b< 26; b++)if ( is_aa(b+'A')){t1[b]=blur_freq[b]*(1-1/ncat)*n1;} - - /*Compute noise 2: completely random*/ - tot_t2=T*n2; - for ( b=0; b< 26; b++)if ( is_aa(b+'A')){t2[b]=tot_t2/21;} - - /*compute noise 3: biased with the sole content(pam250mt)*/ - tot_t3=T*n3; - for ( b=0; b<26; b++)if ( is_aa(b+'A')){t3[b]=blur_freq[b]*n3;} - - for ( b=0; b<26; b++) - { - if ( is_aa('A'+b)) - freq[b][a]=freq[b][a]*(T-(tot_t1+tot_t2+(tot_t3)))/T+t1[b]+t2[b]+t3[b]; - } - - /*end of the loop that mutates position a*/ - } - - vfree (blur_freq); - vfree (init_freq); - vfree ( t3); - - /*1-Generate the amino acids of the new sequence new*/ - - - vsrand (0); - - for ( a=0; a< A->len_aln; a++) - { - - for (T=0,b=0; b<26; b++)T+=freq[b][a]; - x=rand ()%((int)T); - for (c=0,b=0; b<26; b++) - { - c+=freq[b][a]; - if ( c>=x) - { - seq[a]='A'+b; - c=-1; - break; - } - } - if ( c!=-1)seq[a]='-'; - } - seq[a]='\0'; - - - /*2 Generate the gaps in the new sequence*/ - - - - if ( gn<0); - else - { - - transitions=declare_int ( nstate, nstate); - score_tab=declare_int ( A->len_aln+2, nstate ); - state_tab=declare_int ( A->len_aln+2, nstate ); - - - - for (a=0; alen_aln; p++){for (state=0; state< nstate; state++){score_tab[p][state]=F;state_tab[p][state]=-1;} } - - for (p=1; p<= A->len_aln; p++) - { - for (max=0,a=0; a<26; a++)max=MAX(max, freq[a][p-1]); - max=(max*(A->nseq-count[26][p-1]))/A->nseq; - - for (state=0; state< nstate; state++) - { - - - gf=freq[26][p-1]; - if ( state==IGAP) e=gf-50; - else if ( state==IAA ) e=max-50; - for (p_state=0; p_statebest_score){ best_score=score;best_state=p_state;} - } - score_tab[p][state]=best_score; - state_tab[p][state]=best_state; - } - } - - for (state=0; statebest_score){best_score=score_tab[p-1][state]; best_state=state;} - } - - for (p=A->len_aln; p>0;) - { - if ( best_state==IGAP) - { - seq[p-1]='-'; - } - else if ( best_state==IAA) - { - seq[p-1]=seq[p-1]; - } - best_state=state_tab[p][best_state]; - p--; - } - } - - free_int (freq, -1); - return seq; - } - -/********************************************************************/ -/* */ -/* Weighting functions */ -/* */ -/* */ -/* */ -/********************************************************************/ -Alignment * master_trimseq( Alignment *A, Sequence *S,char *mode) - { - Alignment *NA; - char *p; - int a, b; - int use_aln=0, upper_sim=0, min_nseq=0, lower_sim=0; - float f_upper_sim, f_lower_sim; - char weight_mode[1000]; - char method[1000]; - int statistics=0; - int trim_direction=TOP; - float **sim_weight; - int *seq_list; - int table=0; - - - - - /* - mode: - (trim)__%_n_w - */ - - - - seq_list=vcalloc ( S->nseq, sizeof (int)); - for ( a=0; a< A->nseq; a++) - { - seq_list[a]=1; - } - - - use_aln=aln_is_aligned(A); - - if ( mode[0]=='\0') - { - - upper_sim=50; - lower_sim=0; - min_nseq=0; - sprintf (weight_mode, "pwsim"); - sprintf ( method, "clustering2"); - } - else - { - - upper_sim=lower_sim=min_nseq; - sprintf (weight_mode, "pwsim"); - sprintf ( method, "clustering2"); - } - - /* - U or % (deprecated) Upper bound for pairwise similarity - L or m (depercated) Lower bound for pairwise similarity - n max number of sequences - N max number of sequences as a fraction of thet total - S print Statistics - T print Table of distances - */ - - - - while ( (p=strtok(mode, "_"))) - { - mode=NULL; - if (strm (p, "seq"))use_aln=0; - else if ( strm(p,"aln"))use_aln=1; - else if (p[0]=='s')statistics=1; - else if (p[0]=='t')table=1; - else if (p[0]=='U')upper_sim=atoi(p+1); - else if (p[0]=='L')lower_sim=atoi(p+1); - else if (p[0]=='n')min_nseq=atoi(p+1); - else if (p[0]=='N')min_nseq=atoi(p+1)*-1; - else if (p[0]=='B')trim_direction=BOTTOM; - else if (p[0]=='T')trim_direction=TOP; - else if (p[0]=='W')sprintf (weight_mode, "%s", p+1); - else if (p[0]=='M')sprintf (method, "%s", p+1); - else if (p[0]=='K') - { - - while ((p=strtok(NULL, ":"))) - { - - if ( p[0]=='#') - { - seq_list[atoi(p+1)-1]=2; - } - else if ( (a=name_is_in_list (p, A->name, A->nseq, 100))!=-1) - - { - seq_list[a]=2; - } - } - } - } - - if ( !upper_sim && !min_nseq && !lower_sim)upper_sim=50; - - - - if (!S) - { - fprintf ( stderr, "\ntrimseq requires a set of sequences[FATAL:%s]\n", PROGRAM); - crash(""); - } - - else if ( min_nseq> S->nseq) - { - min_nseq=S->nseq; - } - else if ( min_nseq<0) - { - if ( min_nseq<-100) - { - add_warning ( stderr, "\nWARNING: trimseq: Nseq(N) max_val=100%% [Automatic reset]\n"); - min_nseq=-100; - } - - min_nseq=(int)((float)S->nseq*((float)min_nseq/100)*-1); - } - - - NA=seq2subseq3 (A, S,use_aln,lower_sim,upper_sim,min_nseq,trim_direction, weight_mode,&sim_weight, seq_list ); - - if ( table) - { - fprintf ( stderr, "\nSIMILARITY MATRIX\n"); - for ( a=0; a< A->nseq-1; a++) - for ( b=a+1; b< A->nseq; b++) - { - fprintf ( stderr, "%15s Vs %15s : %3.2f %% id\n", A->name[a], A->name[b], 100-sim_weight[a][b]); - } - } - if ( statistics) - { - f_upper_sim=(upper_sim>100)?((float)upper_sim/(float)100):upper_sim; - f_lower_sim=(upper_sim>100)?((float)lower_sim/(float)100):lower_sim; - - fprintf ( stderr, "\nTRIM Informations:\n"); - fprintf ( stderr, "\tUse...........: %s\n",(use_aln)?"multiple_aln":"pairwise_aln"); - fprintf ( stderr, "\tcluster_mode..: %s\n" ,method); - fprintf ( stderr, "\tsim_mode......: %s\n" ,weight_mode); - fprintf ( stderr, "\tlower_id_bound: %.2f%%\n" ,(f_lower_sim==0)?-1:f_lower_sim); - fprintf ( stderr, "\tupper_id_bound: %.2f%%\n",(f_upper_sim==0)?-1:f_upper_sim); - fprintf ( stderr, "\tnseq_kept.....: %d (out of %d)\n" ,NA->nseq, S->nseq); - fprintf ( stderr, "\treduction.....: %d%% of original set\n" ,(NA->nseq*100)/S->nseq); - fprintf ( stderr, "\tTrim_direction: From %s \n" ,(trim_direction==BOTTOM)?"Bottom":"Top"); - } - - return NA; - } - -Alignment *sim_filter (Alignment *A, char *in_mode, char *seq) -{ - int **sim, **cov; - int *list; - int *keep; - int maxnseq, nseq_ratio, nc; - int new_nseq; - int a, s, n, k; - Alignment *R; - char *mode; - int outlayers; - int direction=1;//remove the higher than - int coverage=0; //remove based on coverage - static char *field; - int maxsim, minsim, maxcov, mincov; - - if ( !field) field=vcalloc (1000, sizeof (char)); - - mode=vcalloc ( strlen (in_mode)+10, sizeof (char)); - sprintf ( mode, "_%s_", in_mode); - - strget_param ( mode, "_I", "100", "%d", &maxsim); - strget_param ( mode, "_i", "0", "%d", &minsim); - strget_param ( mode, "_C", "100", "%d", &maxcov); - strget_param ( mode, "_c", "0", "%d", &mincov); - - - - - - keep=vcalloc ( A->nseq, sizeof (int)); - list=vcalloc ( A->nseq, sizeof (int)); - - - - - - - if (!seq)s=0; - else s=name_is_in_list (seq, A->name, A->nseq, 100); - if (s==-1) - { - - if ( s==-1)printf_exit (EXIT_FAILURE, stderr, "ERROR: %s is not a valid sequence", seq); - } - else - keep[s]=1; - - //get the distances - if ( strstr (mode, "_seq_")) - { - char **seq; - int **M; - - M=read_matrice ("blosum62mt"); - seq=declare_char (A->nseq, A->len_aln+1); - for (a=0; anseq; a++) - { - sprintf ( seq[a], "%s", A->seq_al[a]); - ungap (seq[a]); - } - - sim=declare_int (A->nseq, A->nseq); - cov=declare_int (A->nseq, A->nseq); - - for (a=0; anseq; a++) - { - if ( s!=a) - { - sim[s][a]=sim[a][s]=idscore_pairseq(seq[s], seq[a],-12, -1,M,"sim"); - cov[s][a]=cov[a][s]=idscore_pairseq(seq[s], seq[a],-12, -1,M,"cov"); - - } - } - free_char (seq, -1); - free_int (M,-1); - } - else - { - sim=aln2sim_mat (A, "idmat"); - cov=aln2cov (A); - } - - for (a=0; a< A->nseq; a++) - { - if (a==s)continue; - else - { - if ( sim[s][a]>maxsim || sim[s][a]maxcov)keep[a]=-1; - else keep[a]=1; - } - } - - for ( n=0, a=0; a< A->nseq; a++) - { - if ( keep[a]!=-1) - { - list[n++]=a; - } - } - - R=extract_sub_aln (A, n, list); - free_int (sim, -1); free_int (cov, -1);vfree (list); - - return R; -} - - -static int find_worst_seq ( int **sim, int n, int *keep, int max, int direction); -Alignment *simple_trimseq (Alignment *A, Alignment *K, char *in_mode, char *seq_list) -{ - int **sim; - int *list; - int *keep; - int maxnseq, maxsim, nseq_ratio, nc; - int new_nseq; - int a,b, s, n, k; - Alignment *R; - char *mode; - int outlayers; - int direction=1;//remove the higher than - int coverage=0; //remove based on coverage - static char *field; - int *tot_avg; - - if ( !field) field=vcalloc (1000, sizeof (char)); - - mode=vcalloc ( strlen (in_mode)+10, sizeof (char)); - sprintf ( mode, "_%s_", in_mode); - - strget_param ( mode, "_%%", "0", "%d", &maxsim); - strget_param ( mode, "_n", "0", "%d", &maxnseq); - strget_param ( mode, "_N", "0", "%d", &nseq_ratio); - strget_param ( mode, "_F", "0", "%d", &nc); - strget_param ( mode, "_O", "0", "%d", &outlayers); - strget_param ( mode, "_f", "NAME", "%s", field); - - if ( strstr (mode, "_min"))direction=-1; - else direction=1; - - if ( strstr (mode, "_cov"))coverage=1; - else coverage=0; - - - if ( nseq_ratio) - { - maxnseq=(A->nseq*nseq_ratio)/100; - maxsim=0; - } - else if ( maxnseq) - { - maxsim=0; - } - else if ( !maxsim) - { - maxsim=100; - } - - - keep=vcalloc ( A->nseq, sizeof (int)); - list=vcalloc ( A->nseq, sizeof (int)); - - - - - /*Remove Sequences that do not have at least one residue in the first and last nc columns*/ - if ( nc) - { - int left, right, full_n,x, y; - int *full_list; - - Alignment *F; - - full_list=vcalloc ( A->nseq, sizeof (int)); - full_n=0; - for (x=0; x< A->nseq; x++) - { - for ( left=0,y=0; ylen_aln,nc); y++) - if (!is_gap(A->seq_al[x][y]))left=1; - - for ( right=0,y=MAX(0,(A->len_aln-nc)); ylen_aln; y++) - if (!is_gap(A->seq_al[x][y]))right=1; - - if ( left && right)full_list[full_n++]=x; - } - F=extract_sub_aln (A, full_n, full_list); - free_aln (A); - vfree (full_list); - A=F; - } - - /*Reorder the sequences according to the tree order: hopefully better phylogenetic coverage after trim*/ - if (strstr (mode, "_T")) - { - NT_node **T; - Sequence *O; - - sim=sim_array2dist_array ( sim, MAXID); - T=int_dist2nj_tree (sim, A->name, A->nseq, NULL); - O=tree2seq (T[3][0], NULL); - A=reorder_aln (A, O->name, O->nseq); - - free_int (sim, -1); - free_sequence (O, -1); - } - - if ( coverage==0) - { - if ( strstr (mode, "seq_"))sim=seq2comp_mat (aln2seq(A), "blosum62mt", "sim"); - else sim=aln2sim_mat (A, "idmat"); - } - else - { - int b; - if ( strstr (mode, "seq_"))sim=seq2comp_mat (aln2seq(A), "blosum62mt", "cov"); - else sim=aln2cov (A); - - } - - - if ( K && K->nseq>0) - { - for ( a=0; a< K->nseq; a++) - if ( (k=name_is_in_list (K->name[a], A->name, A->nseq, MAXNAMES+1))!=-1) - { - - keep[k]=1; - } - } - if ( seq_list) - { - for ( a=0; a< A->nseq; a++) - { - if (strstr (field, "NAME") && perl_strstr (A->name[a], seq_list)){keep[a]=1;} - else if (strstr (field, "COMMENT") && A->seq_comment && perl_strstr(A->seq_comment[a], seq_list)){keep[a]=1;} - else if (strstr (field, "SEQ") && perl_strstr((A->S)->seq[a], seq_list)){keep[a]=1;} - } - - - } - for ( a=0; a< A->nseq; a++) - if ( keep[a]) fprintf ( stderr, "\nFORCED KEEP %s", A->name[a]); - - new_nseq=A->nseq; - - - while ( (s=find_worst_seq (sim, A->nseq, keep, maxsim, direction))!=-1 && new_nseq>maxnseq) - { - for ( a=0; a< A->nseq; a++)sim[a][s]=sim[s][a]=-1; - keep[s]=-1; - new_nseq--; - } - - /*Trim Outlayers*/ - if (outlayers!=0) - { - int nn, b; - tot_avg=vcalloc ( A->nseq, sizeof (int)); - - for (a=0; anseq; a++) - { - if ( keep[a]==-1)tot_avg[a]=-1; - else - { - for (nn=0, b=0; b< A->nseq; b++) - { - if (a==b || keep[b]==-1)continue; - else - { - tot_avg[a]+=sim[a][b]; - nn++; - } - } - tot_avg[a]=(nn==0)?-1:(tot_avg[a])/nn; - } - } - for ( a=0; anseq; a++) - { - if (tot_avg[a]!=-1 && tot_avg[a]name[a]); - keep[a]=-1; - } - } - vfree ( tot_avg); - } - - for ( n=0, a=0; a< A->nseq; a++) - { - if ( keep[a]!=-1) - { - list[n++]=a; - } - } - - R=extract_sub_aln (A, n, list); - free_int (sim, -1); vfree (list); - - return R; -} - -int find_worst_seq ( int **sim, int n, int *keep,int max,int direction) -{ - int **sc; - int a, b, r=0; - int si; - - sc=declare_int (n, 2); - if (direction==-1)max=100-max; - - for ( a=0; a< n; a++) sc[a][0]=a; - for ( a=0; a< n-1; a++) - { - for ( b=a+1; b=0)si=(direction==-1)?100-sim[a][b]:sim[a][b]; - else si=sim[a][b]; - if ( si>max) - { - if ( keep[a]!=1)sc[a][1]+=si; - if ( keep[b]!=1)sc[b][1]+=si; - } - } - } - - sort_int_inv ( sc, 2, 1, 0, n-1); - if ( sc[0][1]>0)r=sc[0][0]; - else r=-1; - - free_int (sc, -1); - if (r!=-1 && keep && keep[r])return -1; - else return r; -} - -int find_worst_seq_old ( int **sim, int n, int *keep,int max,int direction) -{ - int **sc; - int a, b, r=0; - - sc=declare_int (n, 2); - - for ( a=0; a< n; a++) sc[a][0]=a; - for ( a=0; a< n-1; a++) - { - for ( b=a+1; bmax) - { - if ( keep[a]!=1)sc[a][1]+=sim[a][b]; - if ( keep[b]!=1)sc[b][1]+=sim[a][b]; - } - } - else if ( direction == -1) - { - if ( sim[a][b]=0) - { - if ( keep[a]!=1)sc[a][1]+=sim[a][b]; - if ( keep[b]!=1)sc[b][1]+=sim[a][b]; - } - } - } - } - - if ( direction ==1) //remove max - { - sort_int_inv ( sc, 2, 1, 0, n-1); - if ( sc[0][1]>0)r=sc[0][0]; - else r=-1; - - } - else if ( direction ==-1)//remove min - { - sort_int_inv ( sc, 2, 1, 0, n-1); - if ( sc[0][1]>=0)r=sc[0][0]; - else r=-1; - HERE ("** %d %d\n", r,sc[0][1]); - } - free_int (sc, -1); - if (r!=-1 && keep && keep[r])return -1; - else return r; -} - - -Alignment * trimseq( Alignment *A, Sequence *S,char *mode) - { - Alignment *NA; - char *p; - int a, b; - int use_aln=0, upper_sim=0, min_nseq=0, lower_sim=0; - char weight_mode[1000]; - char method[1000]; - int statistics=0; - int trim_direction=TOP; - float **sim_weight; - int *seq_list; - int table=0; - int print_name=0; - float f_lower_sim, f_upper_sim; - - - - /* - mode: - (trim)__%_n_w - */ - - - - seq_list=vcalloc ( S->nseq, sizeof (int)); - for ( a=0; a< A->nseq; a++) - { - seq_list[a]=1; - } - - - use_aln=aln_is_aligned(A); - - - if ( mode[0]=='\0') - { - - upper_sim=50; - lower_sim=0; - min_nseq=0; - sprintf (weight_mode, "pwsim_fragment"); - sprintf ( method, "clustering2"); - } - else - { - - upper_sim=lower_sim=min_nseq; - sprintf (weight_mode, "pwsim_fragment"); - sprintf ( method, "clustering2"); - } - - /* - U or % (deprecated) Upper bound for pairwise similarity - L or m (depercated) Lower bound for pairwise similarity - n max number of sequences - N max number of sequences as a fraction of thet total - S print Statistics - T print Table of distances - */ - - - - while ( (p=strtok(mode, "_"))) - { - mode=NULL; - if (strm (p, "seq"))use_aln=0; - else if ( strm(p,"aln"))use_aln=1; - else if (p[0]=='s')statistics=1; - else if (p[0]=='t')table=1; - else if (p[0]=='p')print_name=1; - else if (p[0]=='U')upper_sim=atoi(p+1); - else if (p[0]=='L')lower_sim=atoi(p+1); - else if (p[0]=='n')min_nseq=atoi(p+1); - else if (p[0]=='N')min_nseq=atoi(p+1)*-1; - else if (p[0]=='B')trim_direction=BOTTOM; - else if (p[0]=='T')trim_direction=TOP; - else if (p[0]=='W')sprintf (weight_mode, "%s", p+1); - else if (p[0]=='M')sprintf (method, "%s", p+1); - else if (p[0]=='K') - { - - while ((p=strtok(NULL, ":"))) - { - - if ( (a=name_is_in_list (p, A->name, A->nseq, 100))!=-1) - { - seq_list[a]=2; - } - } - } - } - - if ( !upper_sim && !min_nseq && !lower_sim)upper_sim=50; - - - - if (!S) - { - fprintf ( stderr, "\ntrimseq requires a set of sequences[FATAL:%s]\n", PROGRAM); - crash(""); - } - - else if ( min_nseq> S->nseq) - { - min_nseq=S->nseq; - } - else if ( min_nseq<0) - { - if ( min_nseq<-100) - { - add_warning ( stderr, "\nWARNING: trimseq: Nseq(N) max_val=100%% [Automatic reset]\n"); - min_nseq=-100; - } - - min_nseq=(int)((float)S->nseq*((float)min_nseq/100)*-1); - } - - - NA=seq2subseq2 (A, S,use_aln,lower_sim,upper_sim,min_nseq,trim_direction, weight_mode,&sim_weight, seq_list ); - - if ( table) - { - fprintf ( stderr, "\nSIMILARITY MATRIX\n"); - for ( a=0; a< A->nseq-1; a++) - for ( b=a+1; b< A->nseq; b++) - { - fprintf ( stderr, "%15s Vs %15s : %3.2f %% id\n", A->name[a], A->name[b], 100-sim_weight[a][b]); - } - } - - NA=seq_name2removed_seq_name(S, NA,sim_weight); - - if ( print_name) - { - fprintf ( stderr, "\nList of sequences with their closest removed neighbors\n"); - for ( a=0; a< NA->nseq; a++)fprintf ( stderr, "\n%s: %s\n", NA->name[a], NA->seq_comment[a]); - } - - if ( statistics) - { - f_lower_sim=(lower_sim>100)?(float)lower_sim/100:lower_sim; - f_upper_sim=(upper_sim>100)?(float)upper_sim/100:upper_sim; - - fprintf ( stderr, "\nTRIM seq Informations:\n"); - fprintf ( stderr, "\tUse...........: %s\n",(use_aln)?"multiple_aln":"pairwise_aln"); - fprintf ( stderr, "\tcluster_mode..: %s\n" ,method); - fprintf ( stderr, "\tsim_mode......: %s\n" ,weight_mode); - fprintf ( stderr, "\tlower_id_bound: %.2f%%\n" ,(f_lower_sim==0)?-1:f_lower_sim); - fprintf ( stderr, "\tupper_id_bound: %.2f%%\n",(f_upper_sim==0)?-1:f_upper_sim); - fprintf ( stderr, "\tnseq_kept.....: %d (out of %d)\n" ,NA->nseq, S->nseq); - fprintf ( stderr, "\treduction.....: %d%% of original set\n" ,(NA->nseq*100)/S->nseq); - fprintf ( stderr, "\tTrim_direction: From %s \n" ,(trim_direction==BOTTOM)?"Bottom":"Top"); - } - - return NA; - } - -Alignment * tc_trimseq( Alignment *A, Sequence *S,char *mode) - { - Alignment *NA; - Sequence *TS; - char *trimfile, *alnfile; - int *seq_list; - int a, nseq=0, sim=0; - char *p; - char command[100000]; - char keep_list[10000]; - - int top, bottom, middle, pmiddle; - - keep_list[0]='\0'; - - seq_list=vcalloc ( S->nseq, sizeof (int)); - for ( a=0; a< A->nseq; a++) - { - seq_list[a]=1; - } - - trimfile=vtmpnam (NULL); - alnfile=vtmpnam (NULL); - if ( !aln_is_aligned (A)) - { - fprintf ( stderr, "\ntrimTC: computation of an Approximate MSA ["); - A=compute_tcoffee_aln_quick ( A, NULL); - fprintf ( stderr, "DONE]\n"); - } - output_clustal_aln (alnfile, A); - - - while ( (p=strtok(mode, "#"))) - { - mode=NULL; - - - if (p[0]=='%' || p[0]=='S')sim=(p[1]=='%')?atoi(p+2):atoi(p+1); - else if (p[0]=='n' || p[0]=='N')nseq=atoi(p+1); - else if (p[0]=='K') - { - if ( (a=name_is_in_list (p+1, A->name, A->nseq, 100))!=-1) - { - seq_list[a]=2; - } - - } - } - if ( nseq ==0 && sim ==0) - { - fprintf ( stderr, "\nERROR: trimTC\nIndicate the maximum number of sequences Nnseq\nOR the maximum average similarity of the chosen sequencesSx\nEX: +trimTC S20 OR +trimTC N5"); - fprintf ( stderr, "\n[FATAL:%s]", PROGRAM); - myexit (EXIT_FAILURE); - } - - for ( a=0; anseq; a++)if (seq_list[a]==2){strcat ( keep_list, A->name[a]);strcat ( keep_list," ");} - - if ( sim) - { - sprintf ( command , "%s -infile %s -trim -trimfile=%s -split_score_thres %d -convert -iterate 0 ",get_string_variable("t_coffee"), alnfile, trimfile,sim); - if ( keep_list[0]){strcat ( command, " -seq_to_keep ");strcat ( command, keep_list);} - my_system ( command); - TS=read_sequences (trimfile); - } - else if ( nseq && A->nseq>nseq) - { - - top=100;bottom=0; - pmiddle=0;middle=50; - - sprintf ( command , "%s -infile %s -trim -trimfile=%s -split_score_thres %d -convert -iterate 0",get_string_variable("t_coffee"), alnfile, trimfile,middle); - if ( keep_list[0]){strcat ( command, " -seq_to_keep ");strcat ( command, keep_list);} - my_system ( command); - - TS=read_sequences (trimfile); - fprintf ( stderr, "\n\tTrimTC: Sim %d Nseq %d\t",middle, TS->nseq); - - if ( TS->nseq>nseq)top=middle; - else if ( TS->nseqnseq!=nseq && pmiddle!=middle) - { - - sprintf ( command , "%s -infile %s -trim -trimfile=%s -split_score_thres %d -convert -iterate 0 ",get_string_variable("t_coffee"), alnfile, trimfile,middle); - if ( keep_list[0]){strcat ( command, " -seq_to_keep ");strcat ( command, keep_list);} - my_system ( command); - free_sequence (TS, -1); - TS=read_sequences (trimfile); - fprintf ( stderr, "\n\tTrimTC: Sim %d Nseq %d\t", middle, TS->nseq); - - if ( TS->nseq>nseq)top=middle; - else if ( TS->nseq100)?(float)int_lower_sim/100:int_lower_sim; - upper_sim=(int_upper_sim>100)?(float)int_upper_sim/100:int_upper_sim; - - sim_weight[0]=get_weight ((use_aln)?A:NULL, S, weight_mode); - - name=declare_char (S->nseq, (MAXNAMES+1)); - seq= declare_char (S->nseq, S->max_len+1); - - /* - Remove every sequence that is more than upper_sim and less than lower_sim similar to the master sequences - the master sequence(s) are those for which seq_list[x]==2 - */ - - - - - new_nseq=A->nseq; - - - for (a=0; a< A->nseq; a++) - { - if ( seq_list[a]==2) - { - - for ( b=0; b< A->nseq;b++) - { - sim=100-sim_weight[0][a][b]; - if (seq_list[b]==1 && (sim>upper_sim || simnseq; a++) - { - if ( seq_list[a]) - { - sprintf ( name[b], "%s", S->name[a]); - sprintf ( seq[b] , "%s",(use_aln)?A->seq_al[a]: S->seq[a] ); - b++; - } - } - - - NS=fill_sequence_struc (new_nseq,seq,name); - NA=seq2aln(NS,NULL,1); - - if ( use_aln && A) - { - NA=realloc_aln2 ( NA,A->max_n_seq,A->len_aln+1); - - for (b=0, a=0; anseq; a++) - { - if ( seq_list[a]) - { - sprintf ( NA->seq_al[b] , "%s",A->seq_al[a]); - b++; - } - } - - NA->len_aln=A->len_aln; - ungap_aln(NA); - } - - - return NA; -} -Alignment* seq2subseq2( Alignment *A, Sequence *S,int use_aln, int int_lower_sim,int int_upper_sim, int min_nseq, int trim_direction, char *weight_mode, float ***sim_weight, int *seq_list) -{ - int a, b; - int new_nseq; - int seq_index=0; - /*OUTPUT*/ - char **seq, **name; - Sequence *NS; - Alignment *NA; - float lower_sim, upper_sim; - - lower_sim=(int_lower_sim>100)?(float)int_lower_sim/100:int_lower_sim; - upper_sim=(int_upper_sim>100)?(float)int_upper_sim/100:int_upper_sim; - - - sim_weight[0]=get_weight ((use_aln)?A:NULL, S, weight_mode); - - name=declare_char (S->nseq, (MAXNAMES+1)); - seq= declare_char (S->nseq, S->max_len+1); - - /* - 1 REMOVE OUTLAYERS - 2 REMOVE CLOSELY RELATED SEQUENCES - 3 IF STILL TOO MANY SEQUENCES: - REMOVE THE MOST CLOSELY RELATED ONES - */ - - - /*1 Remove outlayers*/ - - new_nseq=A->nseq; - - - /*1 Remove outlayers*/ - while ( lower_sim && (extreme_seq(BOTTOM,A,sim_weight[0],seq_list, &seq_index) min_nseq) && seq_index!=-1) - { - - if ( seq_list[seq_index]==1) - { - seq_list[seq_index]=0; - new_nseq--; - } - } - /*2 Remove close relative*/ - - - while ( upper_sim && (extreme_seq(TOP, A,sim_weight[0],seq_list, &seq_index)>upper_sim) && ((new_nseq)>min_nseq)&& seq_index!=-1) - { - - if ( seq_list[seq_index]==1) - { - seq_list[seq_index]=0; - new_nseq--; - } - } - - - /*Remove extra sequences*/ - - while ( min_nseq>0 && new_nseq>min_nseq && seq_index!=-1) - { - - extreme_seq(trim_direction, A,sim_weight[0],seq_list, &seq_index); - - if ( seq_index==-1)break; - if ( seq_list[seq_index]==1) - { - seq_list[seq_index]=0; - new_nseq--; - } - } - - - /*Prepare the new sequence List*/ - - for (b=0, a=0; anseq; a++) - { - if ( seq_list[a]) - { - sprintf ( name[b], "%s", S->name[a]); - sprintf ( seq[b] , "%s",(use_aln)?A->seq_al[a]: S->seq[a] ); - b++; - } - } - - - NS=fill_sequence_struc (new_nseq,seq,name); - NA=seq2aln(NS,NULL,1); - - if ( use_aln && A) - { - NA=realloc_aln2 ( NA,A->max_n_seq,A->len_aln+1); - - for (b=0, a=0; anseq; a++) - { - if ( seq_list[a]) - { - sprintf ( NA->seq_al[b],"%s",A->seq_al[a]); - b++; - } - } - - NA->len_aln=A->len_aln; - ungap_aln(NA); - } - - - return NA; -} - -float extreme_seq (int direction, Alignment *A,float **sim_weight,int *seq_list, int *seq_index) -{ - - /*find the closest relative of each sequence - Return: - Direction= BOTTOM: the sequence whose closest relative is the most distant - Direction= TOP: the sequence whose closest relative is the closest - weight: different sequences=100 - similar sequences =0 - */ - int a, b; - - float top_sim,bottom_sim, best_sim, sim; - int top_seq, bottom_seq; - - bottom_seq=top_seq=seq_index[0]=-1; - top_sim=-1; - bottom_sim=101; - - for (a=0; a< A->nseq; a++) - { - if (seq_list[a]!=1)continue; - - for ( best_sim=0, b=0; b< A->nseq; b++) - { - if ( a==b || !seq_list[b])continue; - - sim=100-sim_weight[a][b]; - if (sim>best_sim) - { - best_sim=sim; - } - } - - if ( best_sim>top_sim) - { - top_seq=a; - top_sim=best_sim; - } - - if ( best_sim_%_n_w - */ - - sim_weight=get_weight ((use_aln)?A:NULL, S, weight_mode); - pw_weight=declare_float (S->nseq, S->nseq); - seq_weight=declare_float ( S->nseq, 2); - - - for (best_score=0,a=0; anseq; a++) - { - for ( b=0; bnseq; b++) - { - if ( a==b)continue; - seq_weight[a][0]+=sim_weight[a][b]; - } - seq_weight[a][0]=seq_weight[a][0]/(S->nseq-1); - score=seq_weight[a][0]=100-seq_weight[a][0]; - - if ( score>best_score) - { - best_seq=a; - best_score=score; - } - - } - for (a=0; anseq; a++) - { - for ( b=0; bnseq; b++) - { - if ( a==b)continue; - pw_weight[a][b]=sim_weight[a][b]*seq_weight[a][0]*seq_weight[b][0]/(100*100); - - } - } - - - seq_list=vcalloc ( S->nseq, sizeof (int)); - used_seq_list=vcalloc ( S->nseq, sizeof (int)); - - - - name=declare_char (S->nseq, (MAXNAMES+1)); - seq= declare_char (S->nseq, S->max_len+1); - - /*compute the normalization factor*/ - for (sum=0,d=0; d< S->nseq; d++) - { - for (score=0,c=0; cnseq; c++) - { - if ( c!=d) - score=MAX(score, 100-sim_weight[c][d]); - } - sum+=score; - } - sum=sum/S->nseq; - /*chose the first sequence */ - for ( best_score=0,a=0; a< S->nseq; a++) - { - for (score=0, b=0; b< S->nseq; b++) - { - score+=100-sim_weight[a][b]; - } - if ( score>best_score) - { - best_seq=a; - best_score=score; - } - - } - - - last_chosen=chosen=((best_score/S->nseq)*100)/sum; - nchosen=last_nchosen=1; - seq_list[0]=best_seq; - used_seq_list[best_seq]=1; - - sprintf ( name[0],"%s", S->name[seq_list[0]]); - sprintf ( seq[0],"%s", S->seq[seq_list[0]]); - nchosen=last_nchosen=1; - - - fprintf ( stderr, "\nTRIM:\n"); - fprintf ( stderr, "\n1-Chosen Sequences\n"); - /*Assemble the list of sequences*/ - for (a=1; a< S->nseq; a++) - { - for (best_score=0,b=0; b< S->nseq; b++) - { - if (used_seq_list[b]); - else - { - score=pw_weight[seq_list[0]][b]+1; - for (c=0; c=best_score) - { - best_seq=b; - best_score=score; - } - - } - } - seq_list[a]=best_seq; - used_seq_list[best_seq]=1; - - - - for ( chosen=0,d=0; d< S->nseq; d++) - { - for (score=0, c=0; c<=a; c++) - { - if ( seq_list[c]!=d) - score=MAX(score, 100-sim_weight[seq_list[c]][d]); - } - chosen+=score; - - } - - chosen=((chosen/S->nseq)*100)/sum; - nchosen=a+1; - - condition1= (int)chosen<=(int)percent || !percent; - condition2=(nchosen)<=max_nseq || !max_nseq; - - if (condition1 && condition2) - { - fprintf ( stderr, "\tADD %s (set score: %.2f %%)\n", S->name[seq_list[a]], chosen); - sprintf ( name[a],"%s", S->name[seq_list[a]]); - sprintf ( seq[a],"%s", S->seq[seq_list[a]]); - - } - else - { - break; - } - last_chosen=chosen; - last_nchosen=nchosen; - } - - NS=fill_sequence_struc (last_nchosen,seq,name); - NA=seq2aln(NS,NULL,1); - fprintf ( stderr, "\n2-Informations:\n"); - fprintf ( stderr, "\tUse...........: %s\n",(use_aln)?"multiple_aln":"pairwise_aln"); - fprintf ( stderr, "\tweight_mode...: %s\n" ,weight_mode); - fprintf ( stderr, "\tpercent_weight: %.2f%% (max=%d%%)\n",last_chosen,percent); - fprintf ( stderr, "\tn_seq.........: %d\n" ,NS->nseq); - fprintf ( stderr, "\treduction.....: %d%% of original set\n" ,(NS->nseq*100)/S->nseq); - - return NA; - } -float ** get_weight ( Alignment *A, Sequence *S, char *mode) -{ - char *aln_name; - char *weight_name; - char *seq_name; - char command[LONG_STRING]; - char program[LONG_STRING]; - float **weight; - FILE *fp; - int c; - - if ( !mode || !mode[0] || strm (mode, "msa")) - { - if ( getenv ( "SEQ2MSA_WEIGHT")==NULL)sprintf (program, "%s",SEQ2MSA_WEIGHT); - else sprintf ( program, "%s", (getenv ( "SEQ2MSA_WEIGHT"))); - } - else if ( strm(mode, "pwsim") ||strm(mode, "pwsim_fragment") ) - { - return seq2pwsim (A, S, mode); - } - else - { - if (getenv (mode))sprintf ( program, "%s", (getenv (mode))); - else fprintf ( stderr, "\nERROR: %s is not a valid mode for weight computation [FATAL:%s]", mode, PROGRAM); - } - - /*MSA weights*/ - seq_name=vtmpnam(NULL); - aln_name=vtmpnam(NULL); - weight_name=vtmpnam(NULL); - weight=declare_float (S->nseq+1, 2); - - - - if (A) - { - output_clustal_aln (seq_name,A); - output_fasta_seq (aln_name,A); - sprintf ( command, "%s %s -i %s -w %s", program, seq_name, aln_name, weight_name); - } - else - { - A=seq2aln(S,A,1); - output_fasta_seq (seq_name,A); - sprintf ( command, "%s %s -w %s", program, seq_name, weight_name); - } - - - my_system ( command); - - fp=vfopen( weight_name, "r"); - while ( (c=fgetc(fp))!='$'); - c=fgetc(fp); - c=0; - while ( (fscanf (fp, "%*s %f\n",&(weight[c][1])))==1) - {weight[c][0]=c;c++;} - vfclose (fp); - - - return weight; -} - -float **seq2pwsim ( Alignment *A, Sequence *S, char *mode) -{ - int a, b, c; - float d,t; - float **W; - Alignment *B; - W=declare_float (S->nseq, S->nseq); - - - - for (a=0; a< S->nseq; a++) - for ( b=a; bnseq; b++) - { - if ( a==b){d=1;} - else if (!A) - { - - B=align_two_sequences ((S)->seq[a], (S)->seq[b],"pam250mt", -10, -1, "fasta_pair_wise"); - for (t=0,d=0,c=0; clen_aln; c++) - { - d+=(B->seq_al[0][c]==B->seq_al[1][c] && !is_gap(B->seq_al[0][c])); - t+=(!is_gap(B->seq_al[0][c]) && !is_gap(B->seq_al[1][c])); - } - t=(strm ( mode, "pwsim_fragment"))?B->len_aln:t; - - d=d/((t==0)?1:t); - free_aln(B); - } - else - { - for (t=0,d=0,c=0; clen_aln; c++) - { - d+=(A->seq_al[a][c]==A->seq_al[b][c] && !is_gap(A->seq_al[a][c])); - t+=(!is_gap(A->seq_al[a][c]) && !is_gap(A->seq_al[b][c])); - } - d=d/((t==0)?1:t); - } - - - W[a][b]=W[b][a]=(1-d)*100; - } - - - return W; - -} - -float **seq2pwsim_fragment ( Alignment *A, Sequence *S, char *mode) -{ - - - int a, b, c; - float d,t; - float **W; - Alignment *B; - W=declare_float (S->nseq, S->nseq); - - - - - for (a=0; a< S->nseq; a++) - for ( b=a; bnseq; b++) - { - if ( a==b){d=1;} - else if (!A) - { - - B=align_two_sequences ((S)->seq[a], (S)->seq[b],"pam250mt", -10, -1, "fasta_pair_wise"); - for (t=0,d=0,c=0; clen_aln; c++) - { - d+=(B->seq_al[0][c]==B->seq_al[1][c] && !is_gap(B->seq_al[0][c])); - t+=(!is_gap(B->seq_al[0][c]) && !is_gap(B->seq_al[1][c])); - } - - d=d/((t==0)?1:t); - free_aln(B); - } - else - { - for (t=0,d=0,c=0; clen_aln; c++) - { - d+=(A->seq_al[a][c]==A->seq_al[b][c] && !is_gap(A->seq_al[a][c])); - t+=(!is_gap(A->seq_al[a][c]) && !is_gap(A->seq_al[b][c])); - } - d=d/((t==0)?1:t); - } - - - W[a][b]=W[b][a]=(1-d)*100; - } - - - return W; - -} - -/********************************************************************/ -/* */ -/* AMINO ACID FUNCTIONS */ -/* */ -/* */ -/* */ -/********************************************************************/ -//Builds an extended alphabet from a string -char** string2alphabet (char *string, int depth, int *falp_size) -{ - int max_s; - int a, b,c, l, n; - char buf[1000]; - char **alp; - int alp_size; - - char ***alp2; - int *alp2_size; - - int *array; - char **falp; - - - l=strlen (string); - array=vcalloc ( 256, sizeof (int)); - - - max_s=l+1; - falp_size[0]=0; - falp=declare_char (l+1, 2); - - alp=declare_char(l,2); - alp_size=0; - - array=vcalloc ( 256, sizeof (int)); - for (a=0;a0) - { - for ( c=0,b=0;b< 26; b++) - { - - if ( matrix[a][b]>0 && matrix[b][b]>0) - { - buf[c++]=b+'A'; - buf[c++]=b+'a'; - } - } - buf[c]='\0'; - for ( is_in=0,b=0; b< ngroup[0]; b++)if ( strcmp (buf, group_list[b])==0)is_in=1; - if (is_in==0)sprintf ( group_list[ngroup[0]++], "%s", buf); - - } - } - free_int (matrix, -1); - vfree (matrix_name); - - return group_list; - } -char** make_group_aa_upgma (char*matrix, int max_n) - { - char **group_list; - int **mat; - int *used; - int a, b, ba, bb, best, set, l, n; - l=26; - - group_list=declare_char (l+1, l+1); - for (a=0; amax_n) - { - for (set=0,a=0; abest) - { - best=mat[a][b]; - ba=a; - bb=b; - set=1; - } - } - - for (a=0; ac)?d[a]:c; - return c; - } - - - -int is_in_same_group_aa ( char r1, char r2, int n_group, char **gl, char *mode) - { - int a; - static char **lgl; - static int ln_group; - - char **gl2; - int n_group2; - - /*use mode=idmat for similarity based on id*/ - - r1=toupper(r1); - r2=toupper(r2); - if (mode==NULL)return (r1==r2)?1:0; - - if ( strm (mode, "clean")) - { - free_char (lgl, -1); - lgl=NULL; - ln_group=0; - return 0; - } - else if ( strstr (mode, "cov")) - { - return 1; - } - - if ( lgl==NULL) - { - lgl=make_group_aa ( &ln_group, mode); - } - - if ( gl==NULL) - { - gl2=lgl; - n_group2=ln_group; - } - else - { - gl2=gl; - n_group2=n_group; - } - - for ( a=0; a< n_group2; a++) - if ( is_in_set ( r1, gl2[a]) && is_in_set ( r2, gl2[a]))return 1; - return 0; - } - - -Alignment * gene2prot (Alignment *A){return A; } -char * test_gene2prot (Constraint_list *CL, int s1) - { - int a, b,q, nal; - int F=-10000000; /*FORBIDEN STATE*/ - int AL=0; /*ALLOWED STATE*/ - int SPLICE_PENALTY=1000; - int FRAME_PENALTY=1000; - - - int START, ORF1, ORF2, ORF3, s5NC; - int s3NC,ORF3_G1, ORF3_T2, ORF3_NC, ORF3_A3, ORF3_T4; - int U1_G1, U1_T2, U1_NC, U1_A3, U1_T4; - int U2_G1, U2_T2, U2_NC, U2_A3, U2_T4; - int U1, U2, U3, U4, U5, END; - - int nstate=0; - int **transitions; - int **v_tab; - int **v_tab_p; - int **last_coding; - int **last_t4; - int *potential; - int v; - - int orf1, orf2, orf3, ncp, p, state, pstate, e, best_state_p=0, best_state_v=0, best_pstate_p=0, best_pstate_v; - char *seq, *seq2, *seq3; - int l; - int *is_coding; - int *is_t4; - char *codon; - - static int *entry; - int tot=0; - - seq=vcalloc ( strlen ((CL->S)->seq[s1])+1, sizeof (char)); - seq2=vcalloc ( strlen ((CL->S)->seq[s1])+1, sizeof (char)); - seq3=vcalloc ( strlen ((CL->S)->seq[s1])+1, sizeof (char)); - sprintf ( seq, "%s", (CL->S)->seq[s1]); - ungap (seq); - - l=strlen (seq); - for ( a=0; a< l; a++) seq[a]=tolower ( seq[a]); - for ( a=0; a< l; a++) seq[a]=(seq[a]=='t')?'u': seq[a]; - - - potential=vcalloc (l+1, sizeof (int)); - CL=index_constraint_list ( CL); - for (nal=0, a=0; a<(CL->S)->nseq; a++) - for ( b=CL->start_index[s1][a]; b< CL->end_index[s1][a];b++) - { - entry=extract_entry(entry, b, CL); - if ( entry[SEQ1]==s1)potential[entry[R1]-1]+=entry[WE]; - else if ( entry[SEQ2]==s1)potential[entry[R2]-1]+=entry[WE]; - tot+=entry[WE]; - nal++; - } - - - SPLICE_PENALTY=10000; - FRAME_PENALTY=1000; - - - nstate=0; - START=nstate++; ORF1=nstate++; ORF2=nstate++; ORF3=nstate++; s5NC=nstate++; - s3NC=nstate++; - ORF3_G1=nstate++;U1_G1=nstate++;U2_G1=nstate++; - ORF3_T2=nstate++;U1_T2=nstate++;U2_T2=nstate++; - ORF3_NC=nstate++;U1_NC=nstate++;U2_NC=nstate++; - ORF3_A3=nstate++;U1_A3=nstate++;U2_A3=nstate++; - ORF3_T4=nstate++;U1_T4=nstate++;U2_T4=nstate++; - - - U1=nstate++; U2=nstate++; U3=nstate++; U4=nstate++; U5=nstate++; - END=nstate++; - - is_coding=vcalloc ( nstate, sizeof (int)); - is_coding[ORF1]=is_coding[ORF2]=is_coding[ORF3]=is_coding[U1]=is_coding[U2]=1; - is_coding[U3]=is_coding[U4]=is_coding[U5]=1; - - is_t4=vcalloc ( nstate, sizeof (int)); - is_t4[ORF3_T4]=is_t4[U1_T4]=is_t4[U2_T4]=1; - transitions=declare_int ( nstate, nstate); - for (a=0; a< nstate; a++) - for ( b=0; b< nstate; b++)transitions[a][b]=F; - - transitions[START][ORF1]=AL; - transitions[START][s5NC]=AL-FRAME_PENALTY; - transitions[s5NC][s5NC]=AL; - - transitions[s5NC][ORF1]=AL-FRAME_PENALTY; - - transitions[ORF1][ORF2]=AL; - transitions[ORF2][ORF3]=AL; - transitions[ORF3][U1]=AL; - transitions[ORF3][ORF1]=AL; - transitions[ORF3][ORF3_G1]=AL-SPLICE_PENALTY; - - - transitions[ORF3_G1][ORF3_T2]=AL; - transitions[ORF3_T2][ORF3_NC]=AL; - transitions[ORF3_NC][ORF3_NC]=AL; - transitions[ORF3_NC][ORF3_A3]=AL; - transitions[ORF3_A3][ORF3_T4]=AL; - transitions[ORF3_T4][ORF1]=AL-SPLICE_PENALTY; - - transitions[U1][U2]=AL; - transitions[U1][U1_G1]=AL-SPLICE_PENALTY; - transitions[U1_G1][U1_T2]=AL; - transitions[U1_T2][U1_NC]=AL; - transitions[U1_NC][U1_NC]=AL; - transitions[U1_NC][U1_A3]=AL; - transitions[U1_A3][U1_T4]=AL; - transitions[U1_T4][U3]=AL-SPLICE_PENALTY; - transitions[U3][U4]=AL; - transitions[U4][ORF1]=AL; - - transitions[U2][U2_G1]=AL-SPLICE_PENALTY; - transitions[U2_G1][U2_T2]=AL; - transitions[U2_T2][U2_NC]=AL; - transitions[U2_NC][U2_NC]=AL; - transitions[U2_NC][U2_A3]=AL; - transitions[U2_A3][U2_T4]=AL; - transitions[U2_T4][U5]=AL-SPLICE_PENALTY; - transitions[U5][ORF1]=AL; - - transitions[ORF3][s3NC]=AL-FRAME_PENALTY; - transitions[ORF3][END]=AL; - transitions[s3NC][END]=AL; - - - v_tab=declare_int ( l+1,nstate); - v_tab_p=declare_int ( l+1,nstate); - last_coding=declare_int ( l+1,nstate); - last_t4=declare_int ( l+1,nstate); - - for (a=0; a< l; a++) potential[a]-=200; - - codon=vcalloc ( 4, sizeof (char)); - best_pstate_p=START; - best_pstate_v=0; - nal=0; - for ( p=1; p<=l; p++) - { - if (translate_dna_codon (seq+(p-1), 'x')=='x' || p>(l-2))orf1=F; - else orf1=potential[p-1]; - - if (p<2 || translate_dna_codon (seq+(p-2), 'x')=='x' || p>(l-1))orf2=F; - else orf2=potential[p-1]; - - - if (p<3 || translate_dna_codon (seq+(p-3), 'x')=='x' || p>l)orf3=F; - else orf3=potential[p-1]; - - if ( best_int (3, 1, &a, orf1, orf2, orf3)!=F)ncp=-best_int (3, 1, &a, orf1, orf2, orf3); - else ncp=1000; - - for ( state=0; state< nstate; state++) - { - - if ( state==ORF1)e=orf1; - else if ( state==ORF2)e=orf2; - else if ( state==ORF3)e=orf3; - else if ( state>=U1 && state<=U3) - { - e=0; - } - else if ( state==U4) - { - codon[2]=seq[p-1]; - codon[1]=seq[last_coding[p-1][U3]-1]; - codon[0]=seq[last_coding[p-2][U1_T4]-1]; - if ( translate_dna_codon (codon, 'x')=='x')e=F; - else e=0; - } - else if ( state==U5) - { - codon[2]=seq[p-1]; - codon[1]=seq[last_coding[p-1][U2_T4]-1]; - q=seq[last_coding[p-1][U2_T4]]; - codon[0]=seq[last_coding[q-1][U1]-1]; - if ( translate_dna_codon (codon, 'x')=='x')e=F; - else e=0; - } - - else if (state>=ORF3_G1 && state<=U2_G1)e=(p=ORF3_T2 && state<=U2_T2) - { - e=(p>1 && seq[p-2]=='g' && seq[p-1]=='u')?ncp:F; - } - else if ( state>=ORF3_A3 && state<=U2_A3)e=(seq[p-1]=='a')?ncp:F; - else if ( state>=ORF3_T4 && state<=U2_T4)e=(seq[p-1]=='u')?ncp:F; - else e=ncp; - - for ( pstate=0; pstatebest_pstate_v) - {best_pstate_v=v;best_pstate_p=pstate;} - } - v_tab[p][state]=best_pstate_v; - v_tab_p[p][state]=best_pstate_p; - - if (!is_coding[state])last_coding[p][state]=last_coding[p-1][best_pstate_p]; - else if (is_coding[state])last_coding[p][state]=p; - - if (!is_t4[state]) - { - if (is_coding[state] && last_t4[p-1][best_pstate_p]==0)last_t4[p][state]=p; - else last_t4[p][state]=last_t4[p-1][best_pstate_p]; - } - else if (is_t4[state])last_t4[p][state]=p; - - if (state==0 ||best_pstate_v>best_state_v ){best_state_p=state; best_state_v=best_pstate_v;} - } - } - tot=0; - for ( p=l; p>0; p--) - { - if ( best_state_p>=ORF1 && best_state_p<=ORF3){seq2[tot++]=tolower (seq[p-1]);} - else if ( best_state_p>=U1 && best_state_p<=U5){seq2[tot++]=tolower (seq[p-1]);} - if (best_state_p==ORF1)seq[p-1]=toupper (seq[p-1]); - else if (best_state_p==ORF2 || best_state_p==ORF3)seq[p-1]=tolower (seq[p-1]); - else if ( best_state_p==ORF3_NC || best_state_p==U1_NC || best_state_p==U2_NC) seq[p-1]='.'; - else if ( best_state_p==U1 || best_state_p==U2 || best_state_p==U3 || best_state_p==U4 || best_state_p==U5) seq[p-1]=best_state_p-U1+'1'; - else seq[p-1]=toupper (seq[p-1]); - best_state_p=v_tab_p[p][best_state_p]; - } - - for ( a=0, b=tot-1; b>=0; b--, a++) - seq3[a]=seq2[b]; - - fprintf ( stderr, "\n%s\n", seq); - fprintf ( stderr, "\nN coding=%d\n", tot); - for ( a=0; a< tot; a+=3) - { - b=translate_dna_codon (seq3+a, 'x'); - fprintf ( stderr, "%c",b); - if ( b=='x'){fprintf ( stderr, "\n");myexit (EXIT_SUCCESS);} - } - - fprintf ( stderr, "\n"); - myexit (EXIT_SUCCESS); - return 0; - - - - } -Alignment * dna_aln2_3frame_cdna_aln(Alignment *A,int *ns,int **l_s) -{ - Alignment *B; - int a; - B=realloc_aln2 (NULL,6,strlen(A->seq_al[l_s[0][0]])+strlen(A->seq_al[l_s[1][0]])); - for ( a=0; a< 3; a++) - { - B->seq_al[a]=translate_dna_seq (A->seq_al[l_s[0][0]]+a, 0, 'o',B->seq_al[a]); - B->seq_al[a+3]=translate_dna_seq (A->seq_al[l_s[1][0]]+a, 0, 'o',B->seq_al[a+3]); - } - for ( a=1; a<3; a++) - { - if ( strlen(B->seq_al[a])seq_al[0])) B->seq_al[a]=strcat ( B->seq_al[a], "x"); - if ( strlen(B->seq_al[a+3])seq_al[3])) B->seq_al[a+3]=strcat ( B->seq_al[a+3], "x"); - } - - B->nseq=6; - B->len_aln=strlen (B->seq_al[0]); - return B; -} - -//JM_ADD -//For normal distribution scan -#ifndef PI -#define PI 3.141592653589793238462643 -#endif - -double normal(double x, double mean, double std) -{ - return (1/(std*sqrt(2.0*PI)))*exp((-0.5*(x-mean)*(x-mean))/(std*std)); -} - -int ** get_sim_aln_array_normal_distribution ( Alignment *A, char *mode, int *STD, int *CENTER) - { - int **w; - int a, b; - - - w=declare_int ( A->nseq, A->nseq); - - for ( a=0; a< A->nseq-1; a++) - { - for ( b=a+1; b< A->nseq; b++) - { - - w[a][b]=w[b][a]=generic_get_seq_sim_normal_distribution ( A->seq_al[a], A->seq_al[b], (A->cdna_cache)?A->cdna_cache[0]:NULL, mode, STD, CENTER); - } - } - return w; - } -int generic_get_seq_sim_normal_distribution ( char *seq1, char *seq2, int*cache, char *mode, int *STD, int *CENTER) -{ - return get_seq_sim_distribution ( seq1,seq2,GAP_LIST, mode, STD, CENTER); -} - -int get_seq_sim_distribution ( char *string1, char *string2, char *ignore, char *in_mode, int *STD, int *CENTER) - { - int len1; - int a; - int pos0, gap=0; - int p1, p2; - int r=0,r1=0,r2=0; - char *p; - char mode[1000]; - - double sim; - - - sprintf ( mode, "%s", in_mode); - - /*mode: __ - mat: idscore to get the alignment done - any legal cw matrix - sim_mode: sim1->identities/matches - sim2->identities/min len - */ - - - if ( (p=strstr (mode, "_"))!=NULL) - { - p[0]='\0'; - p++; - } - - - if (strstr (mode, "idscore")) - { - static int **mat; - if (!mat) mat=read_matrice ("blosum62mt"); - return idscore_pairseq (string1, string2, -12, -1, mat,mode); - } - - len1=strlen (string1); - for ( sim=pos0=0,a=0; a< len1; a++) - { - r1=string1[a]; - r2=string2[a]; - p1=1-is_in_set (r1, ignore); - p2=1-is_in_set (r2, ignore); - if (p1 && p2) - { - pos0++; - if (is_in_same_group_aa(r1,r2,0, NULL, mode)) - { - sim += normal(a, *CENTER, *STD); - } - } - else if (p1+p2==1) - { - gap++; - } - } - - if ( p==NULL || strm (p, "sim1") || strm (p, "sim")) - { - r=(pos0==0)?0:(sim*MAXID); - } -/* else if ( strm (p, "sim2")) - { - r=(pos1==0 || pos2==0)?0:(sim*MAXID)/MIN(pos1,pos2); - } - else if ( strm (p, "sim3")) - { - r=(pos1==0 || pos2==0)?0:(sim*MAXID)/MAX(pos1,pos2); - } - else if ( strm (p, "gap1")) - { - r=(len1==0)?MAXID:(gap*MAXID)/len1; - r=MAXID-r; - } - else if ( strm (p, "logid")) - { - r=logid_score (pos0, sim); - }*/ - return r; - - } - - -Alignment *aln2clean_pw_aln (Alignment *A, OveralnP *F)// char *mode, int t, int f, int p1,int p2, int p3, char *fsa_mode) -{ - int **C, **T; - int a, b, c; - Alignment *B; - - - if (F->t==0)F->t=2; - - C=declare_int ( A->nseq, A->len_aln); - T=declare_int ( A->nseq, A->len_aln); - B=copy_aln (A, NULL); - - for (a=0; a< A->nseq;a++) - { - for (b=0; bnseq; b++) - { - int *w; - w=pw_aln2clean_aln_weight (A->seq_al[a], A->seq_al[b], 1,F);//f,p1, p2, p3, fsa_mode); - for (c=0; clen_aln; c++) - { - if (A->seq_al[a][c]=='-')continue; - C[a][c]+=w[c]; - T[a][c]++; - } - vfree (w); - } - } - - - - for (a=0; anseq; a++) - { - for (b=0; blen_aln; b++) - { - int c; - c=A->seq_al[a][b]; - if ( c=='-'); - else if (T[a][b]==0); - else - { - int r; - r=(C[a][b]*10)/T[a][b]; - r=(r==10)?9:r; - if (!F->mode || strm (F->mode, "number")) - B->seq_al[a][b]='0'+r; - else if ( F->mode && strm (F->mode, "unalign")) - B->seq_al[a][b]='0'+r; - else if ( F->mode && strm (F->mode, "lower") ) - { - if (r<=F->t)B->seq_al[a][b]=tolower (B->seq_al[a][b]); - else B->seq_al[a][b]=toupper (B->seq_al[a][b]); - } - } - } - } - - if ( F->mode && strm (F->mode, "unalign")) - { - A=unalign_aln (A, B, F->t); - free_aln (B); - B=copy_aln (A, NULL); - } - - free_int (C, -1); - free_int (T, -1); - - return B; -} - -char **pw_aln2clean_pw_aln_fsa1 (char ** aln, OveralnP *F); -char **pw_aln2clean_pw_aln_fsa2 (char ** aln, OveralnP *F); - -int * pw_aln2clean_aln_weight ( char *seq1, char *seq2, int w, OveralnP *F) -{ - char **aln; - int *weight; - int l, a; - - if ( (l=strlen (seq1)) !=strlen (seq2)) - { - HERE ("\n%s\n%s\n", seq1, seq2); - printf_exit ( EXIT_FAILURE, stderr, "\nERROR: Comparing unaligned sequences [FATAL:%s]", PROGRAM); - - } - - aln=declare_char (2, l+1); - sprintf ( aln[0], "%s", seq1); - sprintf ( aln[1], "%s", seq2); - - - aln=pw_aln2clean_pw_aln (aln, F); - - weight=vcalloc (l+1, sizeof (int)); - for (a=0; amodel, "fsa2"))return pw_aln2clean_pw_aln_fsa2 (aln,F); - else if ( strm (F->model, "fsa1"))return pw_aln2clean_pw_aln_fsa1 (aln,F); - else return pw_aln2clean_pw_aln_fsa1 (aln,F); -} - -char **pw_aln2clean_pw_aln_fsa2 (char ** aln, OveralnP *FO) -{ - int a, b, c, d, l, id; - int c1, c2, e0, e1,tb, obs; - int T0, T1,T2; - int **mat, **tran, **p, **t, *s, *ids; - int ns, ps, cs; - int S, M1, M2, m1, m2,B1, B2,G1,G2, K; - int F=-9999999; - int MID_EXON_FACTOR=50; - int best; - static int **smat; - int model_type=1; - int *translate; - - if ( getenv ("MID_EXON_FACTOR"))MID_EXON_FACTOR=atoi (getenv ("MID_EXON_FACTOR")); - - - - if (!smat)smat=read_matrice ( "blosum62mt"); - - l=strlen (aln[0]); - - if ( l!=strlen (aln[1])) - { - printf_exit ( EXIT_FAILURE, stderr, "\nERROR: unaligned strings"); - } - - - - s=vcalloc (l, sizeof (int)); - ids=vcalloc (l, sizeof (int)); - - //record the id level of each posotion - for (b=0; b=2){id++; s[a]=1;} - else {s[a]=0;} - b++; - } - } - - if (b==0) - { - vfree(s);vfree (ids); - return aln; - } - - - - FO->p1=(FO->p1==0)?5:FO->p1; - FO->p2=(FO->p2==0)?15:FO->p2; - FO->p3=(FO->p3==0)?0:FO->p3; - FO->p4=(FO->p4==0)?100:FO->p4; - - - T1=100*(float)id/(float)b; - T2=(FO->f==0)?30:T1*(float)((float)FO->f/(float)100); - T2=MAX(T2,20); - - //0: unaligned - //1: aligned - //2: gap - //3: exon boundary - - ns=0; - S=ns++; - M1=ns++;//1 matched aligned - m1=ns++;//2 mmatched aligned - M2=ns++;//3 matched unaligned - m2=ns++;//4 mmatched unaligned - B1=ns++;//5 transition aligned - B2=ns++;//6 transition unaligned - - mat=declare_int (ns, 4); - tran=declare_int (ns, ns); - p=declare_int (l+1, ns); - t=declare_int (l+1, ns); - - //emission Values - mat[M1][0]=F; //non id - mat[M1][1]=T1;//id - mat[M1][2]=0; //gap - mat[M1][3]=F; //transition - - mat[M2][0]=F; - mat[M2][1]=T2; - mat[M2][2]=0; - mat[M2][3]=F; - - mat[m1][0]=100-T1; - mat[m1][1]=F; - mat[m1][2]=0; - mat[m1][3]=F; - - mat[m2][0]=100-T2; - mat[m2][1]=F; - mat[m2][2]=0; - mat[m1][3]=F; - - mat[B1][0]=F; - mat[B1][1]=F; - mat[B1][2]=F; - mat[B1][3]=0; - - mat[B2][0]=F; - mat[B2][1]=F; - mat[B2][2]=F; - mat[B2][3]=0; - - //transition values - tran[S][m1]=0; - tran[S][m2]=0; - tran[S][M1]=0; - tran[S][M2]=0; - tran[S][B1]=0; - tran[S][B2]=0; - - - tran[M1][m1]= 0; - tran[M1][m2]=-FO->p4; - tran[M1][M1]=+FO->p2; - tran[M1][M2]= F; - tran[M1][S ]= F; - tran[M1][B1]= 0; - tran[M1][B2]=-FO->p1; - - tran[M2][m1]= F; - tran[M2][m2]=+FO->p3; - tran[M2][M1]= F; - tran[M2][M2]= 0; - tran[M2][S] = F; - tran[M2][B1]= F; - tran[M2][B2]= 0; - - - tran[m1][m1]= 0; - tran[m1][m2]= F; - tran[m1][M1]= 0; - tran[m1][M2]= F; - tran[m1][S] = F; - tran[m1][B1]= 0; - tran[m1][B2]=-FO->p1; - - tran[m2][m1]= F; - tran[m2][m2]= 0; - tran[m2][M1]= -FO->p4; - tran[m2][M2]= +FO->p3; - tran[m2][S] = F; - tran[m2][B1]= F; - tran[m2][B2]= 0; - - tran[B1][m1]= 0; - tran[B1][m2]= F; - tran[B1][M1]= 0; - tran[B1][M2]= F; - tran[B1][S]= F; - tran[B1][B1]= F; - tran[B1][B2]= F; - - tran[B2][m1]= -FO->p1; - tran[B2][m2]= 0; - tran[B2][M1]= -FO->p1; - tran[B2][M2]= 0; - tran[B2][S]= F; - tran[B2][B1]= F; - tran[B2][B2]= F; - - translate=vcalloc (ns, sizeof (int)); - translate[M1]=1; - translate[m1]=1; - translate[M2]=0; - translate[m2]=0; - translate[B1]=1; - translate[B2]=0; - - for (a=1;a<=l; a++) - { - obs=s[a-1]; - - for (cs=0; cs=best){t[a][cs]=ps;best=p[a][cs]=c;} - } - - } - } - - - for (a=0; a=best){tb=a;best=p[l][a];} - } - - for (a=l; a>0; a--) - { - int v; - int p2; - - p2=a-1; - aln[0][p2]=aln[1][p2]=translate[tb]; - tb=t[a][tb]; - - } - - free_int (p, -1); - vfree(s); - free_int (t, -1); - free_int (mat, -1); - free_int (tran, -1); - vfree (translate); - return aln; -} -char **pw_aln2clean_pw_aln_fsa1 (char ** aln, OveralnP *FO) -{ - int a, b, c, d, l, id; - int c1, c2, e0, e1,tb, obs; - int T0, T1,T2; - int **mat, **tran, **p, **t, **s; - int ns, ps, cs; - int S, M1, M2, m1, m2, K; - int F=-9999999; - int best; - static int **smat; - int *translate; - - - if (!smat)smat=read_matrice ( "blosum62mt"); - - l=strlen (aln[0]); - - if ( l!=strlen (aln[1])) - { - printf_exit ( EXIT_FAILURE, stderr, "\nERROR: unaligned strings"); - } - - - s=declare_int (l+1, 2); - for (id=0,b=0,a=0;a=2){id++; s[b][0]=1;} - else {s[b][0]=0;} - s[b][1]=a; - b++; - - } - } - if (b==0) - { - free_int (s, -1); - return aln; - } - FO->f=(FO->f==0)?30:FO->f; - FO->p1=(FO->p1==0)?90:FO->p1; - FO->p2=(FO->p2==0)?15:FO->p2; - FO->p3=(FO->p3==0)?0:FO->p3; - - l=b;//length of the ungapped aln - T1=100*(float)id/(float)b; - T2=FO->f;//T1*f; - - - - //0: unaligned - //1: aligned - - - ns=0; - S=ns++; - M1=ns++;//1 matched aligned - m1=ns++;//2 mmatched aligned - M2=ns++;//3 matched unaligned - m2=ns++;//4 mmatched unaligned - - mat=declare_int (ns, 2); - tran=declare_int (ns, ns); - p=declare_int (l+1, ns); - t=declare_int (l+1, ns); - - - mat[M1][0]=F; - mat[M1][1]=T1; - - mat[M2][0]=F; - mat[M2][1]=T2; - - mat[m1][0]=100-T1; - mat[m1][1]=F; - - mat[m2][0]=100-T2; - mat[m2][1]=F; - - - tran[S][m1]=0; - tran[S][m2]=0; - tran[S][M1]=0; - tran[S][M2]=0; - - - tran[M1][m1]= 0; - tran[M1][m2]=-FO->p1;// -P; - tran[M1][M1]=+FO->p2; - tran[M1][M2]= F; - tran[M1][S] = F; - - tran[M2][m1]= F; - tran[M2][m2]=+FO->p3; - tran[M2][M1]= F; - tran[M2][M2]= 0; - tran[M2][S]= F; - - tran[m1][m1]= 0; - tran[m1][m2]= F; - tran[m1][M1]= 0; - tran[m1][M2]= F; - tran[m1][S]= F; - - tran[m2][m1]= F; - tran[m2][m2]= 0; - tran[m2][M1]=-FO->p1; - tran[m2][M2]=+FO->p3; - tran[m2][S]= F; - - translate=vcalloc (ns, sizeof (int)); - translate[M1]=1; - translate[m1]=1; - translate[M2]=0; - translate[m2]=0; - translate[S]=1; - - - for (a=1;a<=l; a++) - { - obs=s[a-1][0]; - - for (cs=0; cs=best){t[a][cs]=ps;best=p[a][cs]=c;} - } - - } - } - - - for (a=0; a=best){tb=a;best=p[l][a];} - } - for (a=l; a>0; a--) - { - int p2=s[a-1][1]; - aln[0][p2]=aln[1][p2]=translate[tb]; - - tb=t[a][tb]; - } - - - free_int (p, -1); - free_int (s, -1); - free_int (t, -1); - free_int (mat, -1); - free_int (tran, -1); - vfree (translate); - return aln; -} -float* analyze_overaln ( Alignment *iA, Alignment *iB, char *mode, int filter, int f, int p1,int p2, int p3) -{ - Alignment *C, *D; - Alignment *A, *B; - OveralnP *F; - - F=vcalloc (1, sizeof (OveralnP)); - F->p1=p1; - F->p2=p2; - F->p3=p3; - F->f=f; - F->t=filter; - sprintf (F->mode, "%s", mode); - - - float *r; - A=copy_aln (iA, NULL); - B=copy_aln (iB, NULL); - - C=aln2gap_cache (A,0); - A=filter_aln_upper_lower (A, C, 0, 0); - D=aln2clean_pw_aln (B, F); - r=aln2pred (A,D,mode); - free_aln (C); - free_aln (D); - free_aln (A); - free_aln (B); - return r; -} -float* aln2pred ( Alignment *A, Alignment*B, char *mode) -{ - int a, b, c, d, i, l, salp, s, n; - static char **list, *buf1, *buf2, *alp, *alp_lu; - static int ***r; - int T, N; - int fp, fn, tn, tp; - int tfp, tfn, ttn, ttp; - float sp, sn, sen2, best, result; - int print=1; - float *fresult; - - fresult=vcalloc ( 3, sizeof (float)); - - if ( mode && strstr (mode, "case")) - { - A=aln2case_aln (A,"u","l"); - B=aln2case_aln (B,"u","l"); - } - - if (mode && strstr (mode, "printaln")) - { - Sequence *S; - Alignment *C; - S=aln2seq (A); - C=copy_aln (B, NULL); - for (a=0; anseq; a++) - { - i=name_is_in_list (C->name[a], S->name, S->nseq, 100); - if ( i==-1) - for (b=0; blen_aln; b++) C->seq_al[a][b]='-'; - else - for (d=0,b=0; blen_aln; b++) - { - if ( !is_gap (C->seq_al[a][b])) - { - if (C->seq_al[a][b]==S->seq[i][d])C->seq_al[a][b]=toupper(C->seq_al[a][b]); - d++; - } - } - } - print_aln (C); - } - - vfree (alp);vfree (alp_lu); - alp=vcalloc ( 256, sizeof (char)); - alp_lu=vcalloc ( 256, sizeof (char)); - - for (c=0; c<2; c++) - { - Alignment *AL; - AL=(c==0)?A:B; - for (salp=0,a=0; anseq; a++) - { - for (b=0; blen_aln; b++) - { - c=AL->seq_al[a][b]; - if (!is_gap(c) && !alp[c]) - { - salp++; - alp_lu[salp]=c; - alp[c]=salp; - } - } - } - } - - vfree (buf1); vfree(buf2); - buf1=vcalloc ( A->len_aln+1, sizeof (char)); - buf2=vcalloc ( B->len_aln+1, sizeof (char)); - - free_arrayN ((void **)r, 3); - r=declare_arrayN(3, sizeof (int),A->nseq,salp+1,salp+1); - free_char ( list, -1); - list=declare_char ( A->nseq, 100); - for (n=0,a=0; a< A->nseq; a++) - { - for ( b=0; bnseq; b++) - { - if ( strm (A->name[a], B->name[b])) - { - sprintf ( buf1, "%s", A->seq_al[a]); - sprintf ( buf2, "%s", B->seq_al[b]); - ungap (buf1); ungap (buf2); - if ((l=strlen (buf1))!=strlen (buf2))continue; - else - { - sprintf ( list[n], "%s", A->name[a]); - for (c=0; c%s S=%c sp=%6.2f sn=%6.2f sen2=%6.2f best=%6.2f\n", list[a],alp_lu[s],sp, sn, sen2, best); - } - - rates2sensitivity (ttp, ttn, tfp, tfn, &sp, &sn, &sen2, &best); - if (mode && strstr (mode, "printstat"))fprintf ( stdout, ">TOT S=%c sp=%6.2f sn=%6.2f re=%6.2f best=%6.2f\n", alp_lu[s],sp, sn, sen2, best); - - if ( mode && strstr (mode, type)) - { - fresult[0]=sn; - fresult[1]=sp; - fresult[2]=sen2; - } - } - return fresult; -} - -Alignment * mark_exon_boundaries (Alignment *A, Alignment *E) -{ - char *buf, *buf2; - int a, b, c, i, l; - - buf2=vcalloc ( E->len_aln+1, sizeof (char)); - buf =vcalloc ( E->len_aln+1, sizeof (char)); - - for (a=0; a< A->nseq; a++) - { - i=name_is_in_list (A->name[a], E->name, E->nseq, 100); - if ( i==-1) continue; - sprintf (buf, "%s", E->seq_al[i]); - ungap (buf); - l=strlen (buf); - //clean buf2 - for (c=0, b=0; b=1)buf2[c-1]=tolower(buf2[c-1]); - else if (buf[b]=='j' &&clen_aln; b++) - { - if (!is_gap(A->seq_al[a][b])) - { - A->seq_al[a][b]=buf2[c++]; - } - } - } - vfree (buf); - vfree (buf2); - return A; -} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/define_header.h b/binaries/src/tcoffee/t_coffee_source/define_header.h deleted file mode 100644 index 64fbbf8..0000000 --- a/binaries/src/tcoffee/t_coffee_source/define_header.h +++ /dev/null @@ -1,702 +0,0 @@ -/*DEBUGGING*/ -/*#include "mshell.h"*/ -/*MEMORY MANAGEMENT*/ -#include -#define MY_EPS 1000*DBL_EPSILON -//Maximum number of tries for interactibve things -#define MAX_N_TRIES 3 - -//Maximum CACHE and Temporary file size and age (Mb and days, 0: unlimited) -#define TMP_MAX_SIZE 0 -#define TMP_MAX_KEEP 10 -#define CACHE_MAX_SIZE 2000 -#define CACHE_MAX_KEEP 180 -#define MAX_N_PID 65536 -//Importnat Values Affecting the Program Behavior -#define SCORE_K 10 -#define NORM_F 1000 -#define PAVIE_MAT_FACTOR 1000 -#define MAXID 100 -#define CLEAN_FUNCTION NULL -#define MINSIM_4_TCOFFEE 25 //The minimum similarity between a sequence and its PDB template -#define MINCOV_4_TCOFFEE 25 //The minimum similarity between a sequence and its PDB template - - -#define TRACE_TYPE int -#define MAX_LEN_FOR_DP 600 - - -#define GIVE_MEMORY_BACK 0 -#define MEMSET0 1 -#define NO_MEMSET0 0 -/*OUTPUT DEFINITIONS*/ -#define NO_COLOR_RESIDUE 127 -#define NO_COLOR_GAP 126 -#define CLOSE_HTML_SPAN -1 -/*SPECIAL_CODES*/ -#define GAP_CODE 60 -/*TYPE DEFINITIONS*/ - -//Formats -#define BLAST_XML 100 -#define BLAST_TXT 101 - -/*SWITCHES*/ - - -#define USED 1 -#define UNUSED 2 - - -#define TEMPLATES 1 -#define NOTEMPLATES 0 - -#define EXTEND 1 -#define RESIZE 2 - -#define SEN 0 -#define SPE 1 -#define REC 2 -#define SEN2 2 - -#define ALL 1 -#define SEGMENTS 2 -#define DIAGONALS 3 - -#define START_STATE 0 -#define END_STATE 1 - -#define KEEP_CASE 2 /*Hard set in several places*/ -#define LOWER_CASE 0 -#define UPPER_CASE 1 -#define CHANGE_CASE 3 -#define KEEP_GAP 0 -#define RM_GAP 1 - -#define KEEP_NAME 1 - -#define CHECK 0 -#define NO_CHECK 1 -#define FORCE 2 -#define STORE 3 -#define FLUSH 4 - - -#define ON 8 -#define OFF 9 -#define LOCKED_ON 10 -#define LOCKED_OFF 11 - -#define YES 12 -#define NO 13 -#define MAYBE 14 - -#define NEVER 15 -#define ALWAYS 16 -#define SOMETIMES 17 - -#define UPPER 18 -#define LOWER 19 -#define DELETE 20 -#define SWITCHCASE 21 - -#define VECTOR 22 -#define NON_VECTOR 23 -#define NON_PROFILE 24 -#define BOOTSTRAP 25 - -#define HEADER 26 -#define NO_HEADER 27 - -#define VERY_VERBOSE 28 -#define VERBOSE 29 -#define SHORT 30 -#define VERY_SHORT 31 - -#define OVERLAP 32 -#define NO_OVERLAP 33 - -#define PRINT 34 -#define NO_PRINT 35 - -#define FREE_ALN 36 -#define DECLARE_ALN 37 -#define EXTRACT_ALN 38 -#define CLEAN 39 -#define INTERACTIVE 40 -#define NON_INTERACTIVE 41 -#define PAD 42 -#define NO_PAD 43 - -#define SET 44 -#define UNSET 45 -#define RESET 48 -#define ISSET 49 -#define GET 50 - -#define ENV 52 - -#define GOP 0 -#define GCP 1 -#define GEP 2 - -#define BOTTOM 0 -#define TOP 1 - -#define FORWARD -1 -#define BACKWARD 1 - -#define GO_LEFT -1 -#define GO_RIGHT 1 - -#define LOCAL 1 -#define GLOBAL 2 -#define LALIGN 3 -#define MOCCA 4 - -#define TRUE 1 -#define FALSE 0 - -#define NEW 1 -#define OLD 0 - -#define RANDOM 0 -#define DETERMINISTIC 1 - -#define GREEDY 1 -#define NON_GREEDY 0 - -#define IS_FATAL 1 -#define IS_NOT_FATAL 0 -#define NO_REPORT 2 -#define INSTALL 3 -#define INSTALL_OR_DIE 4 - -#define OPTIONAL 1 -#define NON_OPTIONAL 0 - -#define GV_MAXIMISE 1 -#define GV_MINIMISE 0 - -#define MAXIMISE 1 -#define MINIMISE 0 - -#define ALLOWED 0 -#define FORBIDEN -99999999 -#define END_ARRAY -99999990 -#define SOFT_COPY 1 -#define HARD_COPY 2 - -#define VERY_SLOW 0 -#define SLOW 1 -#define FAST 2 -#define VERY_FAST 3 -#define SUPER_FAST 4 -#define ULTRA_FAST 5 - -#define CODE 1 -#define DECODE 2 -#define CODELIST 3 - -/*Identity measure*/ -#define UNGAPED_POSITIONS 1 -#define ALIGNED_POSITIONS 2 -#define AVERAGE_POSITIONS 3 -#define NOMATRIX NULL -#define NOGROUP NULL -#define NOALN NULL - -/*SIZE DEFINITIONS*/ -#define SIZE_OF_INT 10 -#define UNDEFINED FORBIDEN -#define UNDEFINED_INT UNDEFINED -#define UNDEFINED_FLOAT UNDEFINED -#define UNDEFINED_DOUBLE UNDEFINED -#define UNDEFINED_CHAR 125 -#define UNDEFINED_SHORT -125 -#define UNDEFINED_2 0 -#define UNDEFINED_RESIDUE '>' - - - -#define FACTOR 1 -#define MAX_N_SEQ 1 -#define MAX_N_ALN 1 -#define MAX_LEN_ALN 1 -#define MAX_N_LIST 100 - -#define COMMENT_SIZE 1000 -#define MAXNAMES 100 -#define FILENAMELEN 500 /* Max. file name length */ -#define MAX_N_PARAM 2000 -#define MAX_PARAM_LEN 200 -#define MAX_LINE_LENGTH 10000 -#define ALN_LINE_LENGTH 60 -#define SHORT_STRING 10 -#define STRING 300 -#define LONG_STRING 1000 -#define VERY_LONG_STRING 10000 - -#define AA_ALPHABET "acdefghiklmnpqrstvwy-ACDEFGHIKLMNPQRSTVWY" -#define DNA_ALPHABET "AGCTUNRYMKSWHBVD-agctunrymkswhbvd" -#define RNAONLY_ALPHABET "Uu" -#define BLAST_AA_ALPHABET "arndcqeghilkmfpstwyvbzx*" -#define NAMES_ALPHABET "1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_|�-!%@&#-+=." - -#define SIZEOF_AA_MAT 60 -#define GAP_LIST "-.#*~" -#define SSPACE " " - -#define MATCH 1 -#define UNALIGNED 2 -#define GAP 3 - -#define MNE 3 -#define CODE4PROTEINS 10 -#define CODE4DNA 20 - -#define STOCKHOLM_CHAR 'z' -#define STOCKHOLM_STRING "z" - - -/*CODE SHORT CUTS*/ - -/*1-COMMAND LINE PROCESSING*/ -#define GET_COMMAND_LINE_INFO ((strncmp ( argv[1], "-h",2)==0)||(strncmp ( argv[1], "-man",4)==0)||(strncmp ( argv[1], "-",1)!=0)) -#define NEXT_ARG_IS_FLAG ((argc<=(a+1)) ||(( argv[a+1][0]=='-') && !(is_number(argv[a+1])))) - - -/*UTIL MACROS*/ -#define BORDER(p1,l1,p2,l2) ((p1==0 || p2==0 || p1==l1 || p2==l2)?1:0) -#define GET_CASE(f,c) ((f==UPPER_CASE)?toupper(c):((f==LOWER_CASE)?tolower(c):c)) - -#define SWAP(x,y) {x=x+y;y=x+y; x=y-x; y=y-2*x;} -#define SWAPP(x,y,tp) {tp=y;y=x;x=tp;} - -#define MAX(x, y) (((x) >(y)) ? (x):(y)) -#define MAX2(x, y) (((x) >(y)) ? (x):(y)) -#define MAX3(x,y,z) (MAX(MAX(x,y),z)) -#define MAX4(a,b,c,d) (MAX(MAX(a,b),MAX(c,d))) -#define MAX5(a,b,c,d,e) (MAX2((MAX3(a,b,c)),(MAX2(d,e)))) -#define MAX6(a,b,c,d,e,f) (MAX2((MAX3(a,b,c)),(MAX3(c,d,e)))) - -#define MIN(x, y) (((x) <(y)) ? (x):(y)) -#define FABS(x) ((x<0)?(-x):(x)) -#define is_defined(x) ((x==UNDEFINED)?0:1) -#define a_better_than_b(x,y,m) ((m==1)?(((x)>(y))?1:0):(((x)<(y))?1:0)) -#define is_in_range(x,min,max) ((x>=min && x<=max)?1:0) -/*#define bod_a_b(x,y,m) ((m==1)?(MAX((x),(y))):(MIN((x),(y)))) -#define bo_a_b(x,y,m) ((x==UNEFINED)?y:((y==UNDEFINED)?x:bod_a_b(y,y,m))) -#define best_of_a_b(x,y,m) ((x==UNDEFINED && y==UNDEFINED)?(UNDEFINED):(bo_a_b(x,y,m))) -*/ - - -#define DIE(x) HERE(x);exit(0); -#define best_of_a_b(x,y,m) ((m==1)?(MAX((x),(y))):(MIN((x),(y)))) - -#define strm(x,y) ((vstrcmp((x),(y))==0)?1:0) -#define strnm(x,y,n) ((vstrncmp((x),(y),(n))==0)?1:0) -#define strm2(a,b,c) (strm(a,b) || strm(a,c)) -#define strm3(a,b,c,d) (strm2(a,b,c) || strm(a,d)) -#define strm4(a,b,c,d,e) (strm2(a,b,c) || strm2(a,d,e)) -#define strm5(a,b,c,d,e,f) (strm2(a,b,c) || strm3(a,d,e,f)) -#define strm6(a,b,c,d,e,f,g) (strm3(a,b,c,d) || strm3(a,e,f,g)) -#define declare_name(x) (x=vcalloc (MAX(FILENAMELEN,L_tmpnam)+1, sizeof (char))) -#define is_parameter(x) (x[0]=='-' && !isdigit(x[1])) - -/*Freing functions*/ -#define free_2(a, b) free(a);free(b) -#define free_1(a) free(a) -#define free_3(a, b, c) free_2(a,b);free_1(c) -#define free_4(a, b, c,d) free_2(a,b);free_2(c,d) -#define free_5(a, b, c,d,e) free_3(a,b,e);free_2(c,d) -#define free_6(a, b, c,d,e,f) free_3(a,b,e);free_3(c,d,f) -#define free_7(a, b, c,d,e,f,g) free_3(a,b,e);free_4(c,d,f,g) -/*2-FILE PARSING*/ -#define SEPARATORS "\n \t,;" -#define LINE_SEPARATOR "\n#TC_LINE_SEPARATOR\n" -#define TC_REC_SEPARATOR "#### TC REC SEPARATOR ###" - -/*END 1-*/ - - -/*WIDOWS/UNIX DISTINCTIONS -#if defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__) || defined(__MSDOS__) || defined(__DOS__) || defined(__NT__) || defined(__WIN32__) -#define WIN32 -#define TO_NULL_DEVICE " >nul" -#define NULL_DEVICE "nul" -#define CWF "/" -#else -#define TO_NULL_DEVICE " >/dev/null 2>&1" -#define NULL_DEVICE "/dev/null" -*/ - -#if defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__) || defined(__MSDOS__) || defined(__DOS__) || defined(__NT__) || defined(__WIN32__) -#define WIN32 -#define TO_NULL_DEVICE " >>t_coffee.log" -#define NULL_DEVICE "t_coffee.log" -#define CWF "/" /*ClustalW Flag*/ -#else -#define TO_NULL_DEVICE " >>/dev/null 2>&1" -#define NULL_DEVICE "/dev/null" - - -#define CWF "-" /*ClustaW Flag*/ -#endif - -/*Generic Data*/ -#define EMAIL "cedric.notredame@europe.com" -#define URL "http://www.tcoffee.org" - -#define PERL_HEADER "#!/usr/bin/env perl" - -//Optimize the Score Computation in DP -#define TC_SCORE_2(x,y) (SCORE_K*CL->M[Aln->seq_al[l_s[0][0]][x]-'A'][Aln->seq_al[l_s[1][0]][y]-'A']-SCORE_K*CL->nomatch) -#define TC_SCORE_N(x,y) ((CL->get_dp_cost)(Aln, pos, ns[0], l_s[0], x, pos, ns[1], l_s[1], y, CL)) -#define TC_SCORE(x,y) ((CL->get_dp_cost==slow_get_dp_cost && CL->evaluate_residue_pair==evaluate_matrix_score && ns[0]+ns[1]==2 && x>=0 && j>=0)? (TC_SCORE_2(x,y)):(TC_SCORE_N(x,y))) - -#define NULL_2 NULL,NULL -#define NULL_3 NULL_2,NULL -#define NULL_4 NULL_2,NULL_2 -#define NULL_5 NULL_3,NULL_2 -#define NULL_6 NULL_4,NULL_2 -#define NULL_7 NULL_5,NULL_2 -/* PROGRAM PATH */ - -#define ADDRESS_BUILT_IN "built_in" -#define PROGRAM_BUILT_IN "t_coffee" -#define TEST_WWWSITE_4_TCOFFEE "www.google.com" -#define TCOFFEE_4_TCOFFEE "t_coffee" -#define TCOFFEE_type "sequence_multiple_aligner" -#define TCOFFEE_ADDRESS "http://www.tcoffee.org" -#define TCOFFEE_language "C" -#define TCOFFEE_language2 "C" -#define TCOFFEE_source "http://www.tcoffee.org/Packages/T-COFFEE_distribution.tar.gz" -#define TCOFFEE_update_action "always" -#define TCOFFEE_mode "tcoffee,mcoffee,rcoffee,expresso,3dcoffee" -#define CLUSTALW2_4_TCOFFEE "clustalw2" -#define CLUSTALW2_type "sequence_multiple_aligner" -#define CLUSTALW2_ADDRESS "http://www.clustal.org" -#define CLUSTALW2_language "C++" -#define CLUSTALW2_language2 "CXX" -#define CLUSTALW2_source "http://www.clustal.org/download/2.0.10/clustalw-2.0.10-src.tar.gz" -#define CLUSTALW2_mode "mcoffee,rcoffee" -#define CLUSTALW_4_TCOFFEE "clustalw" -#define CLUSTALW_type "sequence_multiple_aligner" -#define CLUSTALW_ADDRESS "http://www.clustal.org" -#define CLUSTALW_language "C" -#define CLUSTALW_language2 "C" -#define CLUSTALW_source "http://www.clustal.org/download/1.X/ftp-igbmc.u-strasbg.fr/pub/ClustalW/clustalw1.82.UNIX.tar.gz" -#define CLUSTALW_mode "mcoffee,rcoffee" -#define DIALIGNT_4_TCOFFEE "dialign-t" -#define DIALIGNT_type "sequence_multiple_aligner" -#define DIALIGNT_ADDRESS "http://dialign-tx.gobics.de/" -#define DIALIGNT_DIR "/usr/share/dialign-tx/" -#define DIALIGNT_language "C" -#define DIALIGNT_language2 "C" -#define DIALIGNT_source "http://dialign-tx.gobics.de/DIALIGN-TX_1.0.1.tar.gz" -#define DIALIGNT_mode "mcoffee" -#define DIALIGNT_binary "dialign-t" -#define DIALIGNTX_4_TCOFFEE "dialign-tx" -#define DIALIGNTX_type "sequence_multiple_aligner" -#define DIALIGNTX_ADDRESS "http://dialign-tx.gobics.de/" -#define DIALIGNTX_DIR "/usr/share/dialign-tx/" -#define DIALIGNTX_language "C" -#define DIALIGNTX_language2 "C" -#define DIALIGNTX_source "http://dialign-tx.gobics.de/DIALIGN-TX_1.0.1.tar.gz" -#define DIALIGNTX_mode "mcoffee" -#define DIALIGNTX_binary "dialign-tx" -#define POA_4_TCOFFEE "poa" -#define POA_type "sequence_multiple_aligner" -#define POA_ADDRESS "http://www.bioinformatics.ucla.edu/poa/" -#define POA_language "C" -#define POA_language2 "C" -#define POA_source "http://downloads.sourceforge.net/poamsa/poaV2.tar.gz" -#define POA_DIR "/usr/share/" -#define POA_FILE1 "blosum80.mat" -#define POA_mode "mcoffee" -#define POA_binary "poa" -#define PROBCONS_4_TCOFFEE "probcons" -#define PROBCONS_type "sequence_multiple_aligner" -#define PROBCONS_ADDRESS "http://probcons.stanford.edu/" -#define PROBCONS_language2 "CXX" -#define PROBCONS_language "C++" -#define PROBCONS_source "http://probcons.stanford.edu/probcons_v1_12.tar.gz" -#define PROBCONS_mode "mcoffee" -#define PROBCONS_binary "probcons" -#define MAFFT_4_TCOFFEE "mafft" -#define MAFFT_type "sequence_multiple_aligner" -#define MAFFT_ADDRESS "http://align.bmr.kyushu-u.ac.jp/mafft/online/server/" -#define MAFFT_language "C" -#define MAFFT_language "C" -#define MAFFT_source "http://align.bmr.kyushu-u.ac.jp/mafft/software/mafft-6.603-with-extensions-src.tgz" -#define MAFFT_windows "http://align.bmr.kyushu-u.ac.jp/mafft/software/mafft-6.603-mingw.tar" -#define MAFFT_mode "mcoffee,rcoffee" -#define MAFFT_binary "mafft.tar.gz" -#define MUSCLE_4_TCOFFEE "muscle" -#define MUSCLE_type "sequence_multiple_aligner" -#define MUSCLE_ADDRESS "http://www.drive5.com/muscle/" -#define MUSCLE_language "C++" -#define MUSCLE_language2 "GPP" -#define MUSCLE_source "http://www.drive5.com/muscle/downloads3.6/muscle3.6_src.tar.gz" -#define MUSCLE_windows "http://www.drive5.com/muscle/downloads3.6/muscle3.6_win32.zip" -#define MUSCLE_linux "http://www.drive5.com/muscle/downloads3.6/muscle3.6_linux_ia32.tar.gz" -#define MUSCLE_mode "mcoffee,rcoffee" -#define PCMA_4_TCOFFEE "pcma" -#define PCMA_type "sequence_multiple_aligner" -#define PCMA_ADDRESS "ftp://iole.swmed.edu/pub/PCMA/" -#define PCMA_language "C" -#define PCMA_language2 "C" -#define PCMA_source "ftp://iole.swmed.edu/pub/PCMA/pcma.tar.gz" -#define PCMA_mode "mcoffee" -#define KALIGN_4_TCOFFEE "kalign" -#define KALIGN_type "sequence_multiple_aligner" -#define KALIGN_ADDRESS "http://msa.cgb.ki.se" -#define KALIGN_language "C" -#define KALIGN_language2 "C" -#define KALIGN_source "http://msa.cgb.ki.se/downloads/kalign/current.tar.gz" -#define KALIGN_mode "mcoffee" -#define AMAP_4_TCOFFEE "amap" -#define AMAP_type "sequence_multiple_aligner" -#define AMAP_ADDRESS "http://bio.math.berkeley.edu/amap/" -#define AMAP_language "C++" -#define AMAP_language2 "CXX" -#define AMAP_source "http://baboon.math.berkeley.edu/amap/download/amap.2.2.tar.gz" -#define AMAP_mode "mcoffee" -#define PRODA_4_TCOFFEE "proda" -#define PRODA_type "sequence_multiple_aligner" -#define PRODA_ADDRESS "http://proda.stanford.edu" -#define PRODA_language "C++" -#define PRODA_language2 "CXX" -#define PRODA_source "http://proda.stanford.edu/proda_1_0.tar.gz" -#define PRODA_mode "mcoffee" -#define PRANK_4_TCOFFEE "prank" -#define PRANK_type "sequence_multiple_aligner" -#define PRANK_ADDRESS "http://www.ebi.ac.uk/goldman-srv/prank/" -#define PRANK_language "C++" -#define PRANK_language2 "CXX" -#define PRANK_source "http://www.ebi.ac.uk/goldman-srv/prank/src/old/prank.src.081202.tgz" -#define PRANK_mode "mcoffee" -#define SAP_4_TCOFFEE "sap" -#define SAP_type "structure_pairwise_aligner" -#define SAP_ADDRESS "http://mathbio.nimr.mrc.ac.uk/wiki/Software" -#define SAP_language "C" -#define SAP_language2 "C" -#define SAP_source "http://www.tcoffee.org/Packages/sap_distribution_TCC_0.6.tar.gz" -#define SAP_mode "expresso,3dcoffee" -#define TMALIGN_4_TCOFFEE "TMalign" -#define TMALIGN_type "structure_pairwise_aligner" -#define TMALIGN_ADDRESS "http://zhang.bioinformatics.ku.edu/TM-align/TMalign.f" -#define TMALIGN_language "Fortran" -#define TMALIGN_language2 "Fortran" -#define TMALIGN_source "http://zhang.bioinformatics.ku.edu/TM-align/TMalign.f" -#define TMALIGN_linux "http://zhang.bioinformatics.ku.edu/TM-align/TMalign_32.gz" -#define TMALIGN_mode "expresso,3dcoffee" -#define MUSTANG_4_TCOFFEE "mustang" -#define MUSTANG_type "structure_pairwise_aligner" -#define MUSTANG_ADDRESS "http://www.cs.mu.oz.au/~arun/mustang" -#define MUSTANG_language "C++" -#define MUSTANG_language2 "CXX" -#define MUSTANG_source "http://www.cs.mu.oz.au/~arun/mustang/mustang_v.3.tgz" -#define MUSTANG_mode "expresso,3dcoffee" -#define LSQMAN_4_TCOFFEE "lsqman" -#define LSQMAN_type "structure_pairwise_aligner" -#define LSQMAN_ADDRESS "empty" -#define LSQMAN_language "empty" -#define LSQMAN_language2 "empty" -#define LSQMAN_source "empty" -#define LSQMAN_update_action "never" -#define LSQMAN_mode "expresso,3dcoffee" -#define ALIGN_PDB_4_TCOFFEE "align_pdb" -#define ALIGN_PDB_type "structure_pairwise_aligner" -#define ALIGN_PDB_ADDRESS "empty" -#define ALIGN_PDB_language "empty" -#define ALIGN_PDB_language2 "empty" -#define ALIGN_PDB_source "empty" -#define ALIGN_PDB_update_action "never" -#define ALIGN_PDB_mode "expresso,3dcoffee" -#define FUGUE_4_TCOFFEE "fugueali" -#define FUGUE_type "structure_pairwise_aligner" -#define FUGUE_ADDRESS "http://www-cryst.bioc.cam.ac.uk/fugue/download.html" -#define FUGUE_language "empty" -#define FUGUE_language2 "empty" -#define FUGUE_source "empty" -#define FUGUE_update_action "never" -#define FUGUE_mode "expresso,3dcoffee" -#define DALILITEc_4_TCOFFEE "dalilite.pl" -#define DALILITEc_type "structure_pairwise_aligner" -#define DALILITEc_ADDRESS "built_in" -#define DALILITEc_ADDRESS2 "http://www.ebi.ac.uk/Tools/webservices/services/dalilite" -#define DALILITEc_language "Perl" -#define DALILITEc_language2 "Perl" -#define DALILITEc_source "empty" -#define DALILITEc_update_action "never" -#define DALILITEc_mode "expresso,3dcoffee" -#define PROBCONSRNA_4_TCOFFEE "probconsRNA" -#define PROBCONSRNA_type "RNA_multiple_aligner" -#define PROBCONSRNA_ADDRESS "http://probcons.stanford.edu/" -#define PROBCONSRNA_language "C++" -#define PROBCONSRNA_language2 "CXX" -#define PROBCONSRNA_source "http://probcons.stanford.edu/probconsRNA.tar.gz" -#define PROBCONSRNA_mode "mcoffee,rcoffee" -#define CONSAN_4_TCOFFEE "sfold" -#define CONSAN_type "RNA_pairwise_aligner" -#define CONSAN_ADDRESS "http://selab.janelia.org/software/consan/" -#define CONSAN_language "empty" -#define CONSAN_language2 "empty" -#define CONSAN_source "empty" -#define CONSAN_update_action "never" -#define CONSAN_mode "rcoffee" -#define RNAPLFOLD_4_TCOFFEE "RNAplfold" -#define RNAPLFOLD_type "RNA_secondarystructure_predictor" -#define RNAPLFOLD_ADDRESS "http://www.tbi.univie.ac.at/~ivo/RNA/" -#define RNAPLFOLD_language "C" -#define RNAPLFOLD_language2 "C" -#define RNAPLFOLD_source "http://www.tbi.univie.ac.at/~ivo/RNA/ViennaRNA-1.7.2.tar.gz" -#define RNAPLFOLD_mode "rcoffee" -#define HMMTOP_4_TCOFFEE "hmmtop" -#define HMMTOP_type "protein_secondarystructure_predictor" -#define HMMTOP_ADDRESS "www.enzim.hu/hmmtop/" -#define HMMTOP_language "C" -#define HMMTOP_language2 "C" -#define HMMTOP_source "empty" -#define HMMTOP_update_action "never" -#define HMMTOP_mode "tcoffee" -#define GOR4_4_TCOFFEE "gorIV" -#define GOR4_type "protein_secondarystructure_predictor" -#define GOR4_ADDRESS "http://mig.jouy.inra.fr/logiciels/gorIV/" -#define GOR4_language "C" -#define GOR4_language2 "C" -#define GOR4_source "http://mig.jouy.inra.fr/logiciels/gorIV/GOR_IV.tar.gz" -#define GOR4_update_action "never" -#define GOR4_mode "tcoffee" -#define EBIWUBLASTc_4_TCOFFEE "wublast.pl" -#define EBIWUBLASTc_type "protein_homology_predictor" -#define EBIWUBLASTc_ADDRESS "built_in" -#define EBIWUBLASTc_ADDRESS2 "http://www.ebi.ac.uk/Tools/webservices/services/wublast" -#define EBIWUBLASTc_language "Perl" -#define EBIWUBLASTc_language2 "Perl" -#define EBIWUBLASTc_source "empty" -#define EBIWUBLASTc_update_action "never" -#define EBIWUBLASTc_mode "psicoffee,expresso,3dcoffee" -#define EBIBLASTPGPc_4_TCOFFEE "blastpgp.pl" -#define EBIBLASTPGPc_type "protein_homology_predictor" -#define EBIBLASTPGPc_ADDRESS "built_in" -#define EBIBLASTPGPc_ADDRESS2 "http://www.ebi.ac.uk/Tools/webservices/services/blastpgp" -#define EBIBLASTPGPc_language "Perl" -#define EBIBLASTPGPc_language2 "Perl" -#define EBIBLASTPGPc_source "empty" -#define EBIBLASTPGPc_update_action "never" -#define EBIBLASTPGPc_mode "psicoffee,expresso,3dcoffee" -#define NCBIWEBBLAST_4_TCOFFEE "blastcl3" -#define NCBIWEBBLAST_type "protein_homology_predictor" -#define NCBIWEBBLAST_ADDRESS "ftp://ftp.ncbi.nih.gov/blast/executables/LATEST" -#define NCBIWEBBLAST_language "C" -#define NCBIWEBBLAST_language2 "C" -#define NCBIWEBBLAST_source "empty" -#define NCBIWEBBLAST_update_action "never" -#define NCBIWEBBLAST_mode "psicoffee,expresso,3dcoffee" -#define NCBIBLAST_4_TCOFFEE "blastpgp" -#define NCBIBLAST_type "protein_homology_predictor" -#define NCBIBLAST_ADDRESS "ftp://ftp.ncbi.nih.gov/blast/executables/LATEST" -#define NCBIBLAST_language "C" -#define NCBIBLAST_language2 "C" -#define NCBIBLAST_source "empty" -#define NCBIBLAST_update_action "never" -#define NCBIBLAST_mode "psicoffee,expresso,3dcoffee" -#define SOAPLITE_4_TCOFFEE "SOAP::Lite" -#define SOAPLITE_type "library" -#define SOAPLITE_ADDRESS "http://cpansearch.perl.org/src/MKUTTER/SOAP-Lite-0.710.08/Makefile.PL" -#define SOAPLITE_language "Perl" -#define SOAPLITE_language2 "Perl" -#define SOAPLITE_source "empty" -#define SOAPLITE_mode "psicoffee,expresso,3dcoffee" -//TclinkdbEnd -/*New Methods*/ -/********************************************/ -/* Various Methoids */ -/********************************************/ -#define METHODS_4_TCOFFEE "~/.t_coffee/methods/" -#define METHOD_4_MSA_WEIGHTS "petra_weight" -/********************************************/ -/* SEQAN LIBRARY */ -/********************************************/ -#define SEQAN_TCOFFEE_4_TCOFFEE "seqan_tcoffee" -/********************************************/ -/* REFORMATING AND UTILITIES */ -/********************************************/ -#define WGET_4_TCOFFEE "wget" -#define WGET_ADDRESS "http://www.gnu.org/software/wget/" - -#define CURL_4_TCOFFEE "curl" -#define CURL_ADDRESS "http://curl.haxx.se/" - -#define SEQ_REFORMAT_4_TCOFFEE "seq_reformat" -#define PS2PDF "ps2pdf" -#define EXTRACT_FROM_PDB_4_TCOFFEE "extract_from_pdb" -#define BLAST_ALN2FASTA_ALN "blast_aln2fasta_aln.pl" -#define FASTA_ALN2FASTA_ALN_UNIQUE_NAME "fasta_aln2fasta_aln_unique_name.pl" -#define MSF_ALN2FASTA_ALN "msf_aln2fasta_aln.pl" -#define SEQ2MSA_WEIGHT "seq2msa_weight" -/********************************************/ -/* DEPRECATED DEF */ -/********************************************/ -//Deprecated definitions -#define SIB_BLAST_4_TCOFFEE "blastall.remote" -#define LOCAL_BLAST_4_TCOFFEE "blastall" -#define BLAST_DB_4_TCOFFEE "nr" -#define NCBI_BLAST_4_TCOFFEE "" -/********************************************/ -/* PARAMETER_FILE */ -/********************************************/ - - - - -/* PARAMETER FILES */ -#define COLOR_FILE "seq_reformat.color" -/*This file specifies the 10 colors available to seq_reformat. -If the file is not on the system, hard coded defaults will be used. -The format is as follow: - -------------------------------------------------------------------------------------------- - -* - -------------------------------------------------------------------------------------------- -the RGB values are used for the post-script generation, the html code is used in html documents. -*/ -#define DATE "Tue Oct 27 10:10:30 WEST 2009" -#define PROGRAM "T-COFFEE" -#define VERSION "Version_8.14" -#define AUTHOR "Cedric Notredame " -#define DISTRIBUTION_ADDRESS "www.tcoffee.org/Packages/" -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/dev1.c b/binaries/src/tcoffee/t_coffee_source/dev1.c deleted file mode 100644 index 31d70b5..0000000 --- a/binaries/src/tcoffee/t_coffee_source/dev1.c +++ /dev/null @@ -1,133 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" -#include "dev1_lib_header.h" - -//Insert functions here -void aln2hitMat_help() -{ - fprintf ( stdout, "\n+aln2hitMat| _MODE_ : how to compare the two positions of the alignment (default:id)"); - fprintf ( stdout, "\n.................id : the sequence identity of those two positions"); - fprintf ( stdout, "\n.............pairscore : the pairwise score of the residues of those two positions"); - fprintf ( stdout, "\n+aln2hitMat| _MATRIX_ : matrix used for the comparison (idmat, blosum62mt, pam250mt.. default:blosum62mt)\n"); - exit (EXIT_SUCCESS); -} - -void aln2hitMat (Alignment *A, char *phitmat) -{ - float **ffpHitScoreMatrix; - int i, j, k, l, s; - int nl = A->len_aln; - int inseq = A->nseq; - int itmpScore; - char matrix[100]; - char mode[100]; - int isim_count, itotal_count, r1, r2; - -//Initialization for files - char *pcFileName = A->file[0]; - char prefix[200] ={0}; - char *hit_matrix_file = vcalloc(200, sizeof (char)); - char *hit_html_file = vcalloc(200, sizeof (char)); - int len = (strrchr(pcFileName,'.')?strrchr(pcFileName,'.')-pcFileName:strlen(pcFileName)); - - strncpy(prefix, pcFileName, len); - sprintf(hit_matrix_file, "%s%s", prefix, "_aln.hit_matrix"); - sprintf(hit_html_file, "%s%s", prefix, ".alnhit_html"); - - if ( phitmat && strstr ( phitmat, "help")) - aln2hitMat_help(); - - if(phitmat == NULL) phitmat = vcalloc(1, sizeof(char)); //such that program could get default value - - strget_param (phitmat, "_MODE_", "id", "%s", mode); - strget_param (phitmat, "_MATRIX_", "blosum62mt", "%s", matrix); - - fprintf ( stdout, "[START] aln to hit matrix\n"); - fprintf ( stdout, " Mode:%s\n", mode); - fprintf ( stdout, " Matrix:%s\n", matrix); - - int **mat = read_matrice(matrix); - - ffpHitScoreMatrix=vcalloc (nl, sizeof (float*)); - for(i = 0; i < nl; i++) - ffpHitScoreMatrix[i]=vcalloc (nl-i, sizeof (float)); - - fprintf (stdout, "Process positions\n", i); - for(i = 0; i < nl; i++) - { - fprintf (stdout, "%d, ", i); - for(j = i; j < nl; j++) - { - if(strm (mode, "id")) - ffpHitScoreMatrix[i][j-i]=generic_get_seq_sim (aln_column2string(A, i), aln_column2string(A, j), (A->cdna_cache)?A->cdna_cache[0]:NULL, matrix); - else if(strm (mode, "pairscore")) - { - isim_count = itotal_count = 0; - for (k=0; k< inseq; k++) - { - r1=tolower(A->seq_al[k][i]); - if (is_gap(r1))continue; - for (l=0; l< inseq; l++) - { - r2=tolower(A->seq_al[l][j]); - if (is_gap (r2))continue; - s=mat[r2-'A'][r1-'A']; - s=(s<=0)?0:1; - isim_count += s; - itotal_count++; - } - } - r1=(isim_count*100)/itotal_count; - ffpHitScoreMatrix[i][j-i] = r1; - } - else - aln2hitMat_help(); - } - } - fprintf (stdout, "\n"); - output_hit_matrix(hit_matrix_file, ffpHitScoreMatrix, nl); - -//Output Hit Score into color html - output_hit_color_html (A, ffpHitScoreMatrix, nl, hit_html_file); - vfree(ffpHitScoreMatrix); - vfree(hit_matrix_file); - vfree(hit_html_file); - fprintf ( stdout, "[END] aln to hit matrix\n"); -} -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/dev1_lib_header.h b/binaries/src/tcoffee/t_coffee_source/dev1_lib_header.h deleted file mode 100644 index d0bffa0..0000000 --- a/binaries/src/tcoffee/t_coffee_source/dev1_lib_header.h +++ /dev/null @@ -1,31 +0,0 @@ -//Insert function prototypes here -void aln2hitMat (Alignment *S, char *arg_list); -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/dev2.c b/binaries/src/tcoffee/t_coffee_source/dev2.c deleted file mode 100644 index 4bffd54..0000000 --- a/binaries/src/tcoffee/t_coffee_source/dev2.c +++ /dev/null @@ -1,43 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" -#include "dev2_lib_header.h" - - -//Insert functions here -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/dev2_lib_header.h b/binaries/src/tcoffee/t_coffee_source/dev2_lib_header.h deleted file mode 100644 index 9ce384b..0000000 --- a/binaries/src/tcoffee/t_coffee_source/dev2_lib_header.h +++ /dev/null @@ -1,30 +0,0 @@ -//Insert function prototypes here -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/dev3.c b/binaries/src/tcoffee/t_coffee_source/dev3.c deleted file mode 100644 index b8c9887..0000000 --- a/binaries/src/tcoffee/t_coffee_source/dev3.c +++ /dev/null @@ -1,42 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" -#include "dev3_lib_header.h" - -//Insert functions here -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/dev3_lib_header.h b/binaries/src/tcoffee/t_coffee_source/dev3_lib_header.h deleted file mode 100644 index 9ce384b..0000000 --- a/binaries/src/tcoffee/t_coffee_source/dev3_lib_header.h +++ /dev/null @@ -1,30 +0,0 @@ -//Insert function prototypes here -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/dev4.c b/binaries/src/tcoffee/t_coffee_source/dev4.c deleted file mode 100644 index 6be9724..0000000 --- a/binaries/src/tcoffee/t_coffee_source/dev4.c +++ /dev/null @@ -1,42 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" -#include "dev4_lib_header.h" - -//Insert functions here -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/dev4_lib_header.h b/binaries/src/tcoffee/t_coffee_source/dev4_lib_header.h deleted file mode 100644 index 9ce384b..0000000 --- a/binaries/src/tcoffee/t_coffee_source/dev4_lib_header.h +++ /dev/null @@ -1,30 +0,0 @@ -//Insert function prototypes here -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/dp_lib_header.h b/binaries/src/tcoffee/t_coffee_source/dp_lib_header.h deleted file mode 100644 index 5196710..0000000 --- a/binaries/src/tcoffee/t_coffee_source/dp_lib_header.h +++ /dev/null @@ -1,839 +0,0 @@ -struct CL_node - { - - int copy_mode; - struct CL_node *c; - struct CL_node *p; - struct CL_node *l; - struct CL_node *r; - int seq; - int res; - int free; -}; - -typedef struct CL_node CL_node; -Alignment * add_constraint2aln ( Alignment *A, int s1, int r1, int s2, int r2); -Alignment * graph_aln (Alignment *A, Constraint_list *CL, Sequence *S); -Alignment* graph2aln (Alignment *A, CL_node *G, Sequence *S); -CL_node ***add_constraint2graph_aln (CL_node ***G, int s1, int r1, int s2, int r2); -CL_node * shift_segment ( CL_node *S, int segL, int shiftL); - -int is_graph_gap_column(CL_node *S); -CL_node * remove_graph_gap_column (CL_node *S); -CL_node * swap_gap_in_graph ( CL_node*S, CL_node *E); - -CL_node * declare_cl_nodes ( int len, int seq); -CL_node * insert_gap_columns (CL_node *S, int d); -int get_node_distance ( CL_node *S, CL_node *E); -CL_node ***aln2graph (Alignment *A); -CL_node *vfree_graph (CL_node *S); -CL_node *vfree_cl_node ( CL_node *N); - -int gotoh_pair_wise_lalign ( Alignment *A, int*ns, int **l_s,Constraint_list *CL); -Constraint_list * undefine_sw_aln ( Alignment *A, Constraint_list *CL); -Constraint_list * undefine_sw_pair ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int sw_pair_is_defined ( Constraint_list *CL, int s1, int r1, int s2, int r2); - - -int gotoh_pair_wise_sw (Alignment *A,int*ns, int **l_s,Constraint_list *CL); - -Alignment * get_best_local_aln ( Alignment *IN,Constraint_list *CL,int gop, int gep, int sw_t, int sw_l, int sw_z, int greedy); -Alignment * get_best_nol_local_aln ( Alignment *IN, Constraint_list *CL, int gop, int gep,int sw_t,int sw_l, int sw_z, int mode); -double compute_penalty (Constraint_list *CL, char *mode, int len); -double compute_scale ( Constraint_list *CL,char *mode, int len); -int evaluate_penalty (Alignment *A, Constraint_list *CL, int *scale,char *scale_mode, int *penalty, char *penalty_mode, int len_seq); -Alignment ** t_coffee_lalign (Constraint_list *CL, int scale, int penalty,int maximise,Sequence *S, int sw_t, int sw_l, int sw_z,int *sw_n, int sw_io); -Alignment * add_seq2aln (Constraint_list *CL, Alignment *IN,Sequence *S); - - - - - - - -struct Dp_Model -{ - int *diag; - - int TG_MODE; - int F_TG_MODE; - int gop; - int gep; - int f_gop; - int f_gep; - int nstate; - int START; - int END; - - char**model_comments; - int **model; - int **model_properties; - int **bounded_model; - int (***model_emission_function)(Alignment*, int **, int, int*, int, int **, int, int*, int, struct Constraint_list *); - - int LEN_I; - int LEN_J; - int DELTA_I; - int DELTA_J; - int EMISSION; - int START_EMISSION; - int TERM_EMISSION; - - int ALN_TYPE; - Constraint_list *CL; - /*Associated Functions*/ - - /*To Deprecate*/ - int UM; - - int TYPE; - int F0; - int F1; - - - int NON_CODING; - int INSERTION; - int DELETION; - int CODING0; - int CODING1; - int CODING2; - - -}; -typedef struct Dp_Model Dp_Model; - -struct Dp_Result -{ - int *traceback; - int len; - int score; - Dp_Model *Dp_model; -}; -typedef struct Dp_Result Dp_Result; - -Dp_Result * make_fast_generic_dp_pair_wise (Alignment *A, int*ns, int **l_s,Dp_Model *M); - -Constraint_list* free_dp_model (Dp_Model *D); -Dp_Result * free_dp_result (Dp_Result *D ); - -typedef struct hseq* SeqHasch; - -typedef struct hseq -{ - SeqHasch hl[256]; - int n; - int *l; -} hseq; - -int ** ktup_dist_mat ( char **seq, int nseq, int ktup, char *type); -int ** evaluate_diagonals_for_two_sequences ( char *seq1, char *seq2,int maximise,Constraint_list *CL,int ktup); -int ** evaluate_diagonals ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list,int ktup); -int ** evaluate_segments_with_ktup ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list,int ktup); -int ** evaluate_diagonals_with_ktup ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list,int ktup); - -int ** evaluate_diagonals_with_clist ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list,int ktup); - -int * flag_diagonals (int l1, int l2, int **sorted_diag,float T, int window); -int * extract_N_diag (int l1, int l2, int **sorted_diag, int n_chosen_diag, int window); - -int hasch_seq(char *seq1, int **hs, int **lu,int ktup, char *alph); -int fasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL); -int cfasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL); -int very_fast_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL); - -int make_fasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL, int *diag); -/*********************************************************************/ -/* */ -/* KTUP_DP */ -/* */ -/* */ -/*********************************************************************/ - -int precomputed_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL); -int ktup_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL); -int ktup_comparison ( char *seq1, char *seq2, int ktup); -HaschT* hasch_sequence ( char *seq1, int ktup); - -SeqHasch * seq2hasch (int i,char *seq, int ktup, SeqHasch *H); -Constraint_list * hasch2constraint_list (Sequence*S, Constraint_list *CL); -SeqHasch *cleanhasch (SeqHasch *H); -int hasch2sim (SeqHasch *H, int nseq); -int cfasta_gotoh_pair_wise_sw (Alignment *A,int*ns, int **l_s,Constraint_list *CL); -int fasta_gotoh_pair_wise_sw (Alignment *A,int*ns, int **l_s,Constraint_list *CL); -int make_fasta_gotoh_pair_wise_sw (Alignment *A,int*ns, int **l_s,Constraint_list *CL, int *diag); - -/*pair wise aln implementations*/ - -int idscore_pairseq (char *s1, char *s2, int gop, int gep, int **m, char *mode); -int idscore_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL); -int gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL); -int glocal_pair_wise ( Alignment *A, int *ns, int **l_s, Constraint_list *CL); -int gotoh_pair_wise_lgp ( Alignment *A, int *ns, int **l_s, Constraint_list *CL); -int test_pair_wise (Alignment *A, int *ns, int **l_s, Constraint_list *CL); - -int glocal2_pair_wise (Alignment *A,int*ns, int **ls,Constraint_list *CL); -int gotoh_pair_wise_lgp_sticky ( Alignment *A, int *ns, int **l_s, Constraint_list *CL); -int linked_pair_wise ( Alignment *A, int *ns, int **l_s, Constraint_list *CL); -int clinked_pair_wise ( Alignment *A, int *ns, int **l_s, Constraint_list *CL); -void free_proba_pair_wise(); - -int subop1_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL); -int subop2_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL); -int proba_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL); -int viterbi_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL); -int biphasic_pair_wise ( Alignment *A, int *ns, int **l_s, Constraint_list *CL); - - - - - - - - -int cfasta_cdna_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL); -int fasta_cdna_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL); -Dp_Model* initialize_dna_dp_model (Constraint_list *CL); -Dp_Result * make_fast_dp_pair_wise (Alignment *A,int*ns, int **l_s, Constraint_list *CL,Dp_Model *M); -int make_fasta_cdna_pair_wise (Alignment *B,Alignment *A,int*ns, int **l_s,Constraint_list *CL, int *diag); - - - -int ** evaluate_diagonals_cdna ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup); -int cfasta_cdna_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL); -Alignment *clean_maln ( Alignment *A, Alignment *I, int T, int n_it); -Alignment *realign_segment (int seq, int start, int len,Alignment *A, Alignment *C); -Dp_Model * initialize_seg2prf_model(int left_tg_mode, int right_tg_mode, Constraint_list *CL); - -int get_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int get_start_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int get_term_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); - -Dp_Model * initialize_sseq_model(int left_tg_mode, int right_tg_mode, Constraint_list *CL); -int ssec_pwaln_maln (Alignment *A, int *ns, int **ls, Constraint_list *CL); - - -int get_turn_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int get_turn_start_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int get_turn_term_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); - -int get_alpha_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int get_alpha_start_gep_cost(Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int get_alpha_term_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); - -int get_beta_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int get_beta_start_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int get_beta_term_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); - -int get_alpha_sub_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int get_beta_sub_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int get_turn_sub_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); - -int get_ssec_no_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int sim_pair_wise_lalign (Alignment *in_A, int *in_ns, int **in_l_s, Constraint_list *in_CL); - -/*pair wise aln implementations*/ -int myers_miller_pair_wise (Alignment *A, int *ns, int **l_s,Constraint_list *CL); -int diff (Alignment *A, int *ns, int **ls, int s1, int M,int s2, int N , int tb, int te, Constraint_list *CL, int **pos); -int evaluate_est_order (Sequence *S, char *concat, Constraint_list *CL, int ktuple); - -Constraint_list *prepare_cl_for_moca ( Constraint_list *CL); -Alignment ** moca_aln ( Constraint_list *CL); -Alignment * extract_domain ( Constraint_list *CL); -Alignment * interactive_domain_extraction ( Constraint_list *CL); -int print_moca_interactive_choices (); - -Alignment * approximate_domain ( int min_start, int max_start, int step_start,int min_len, int max_len, int step_len, int *best_start, int *best_len, int *best_score, Constraint_list *CL); - -int measure_domain_length ( Constraint_list *CL,Alignment *IN, int start, int min_size, int max_size, int step); -Alignment *extract_domain_with_coordinates ( Alignment *RESULT,int start, int len, Constraint_list *CL); -int get_starting_point ( Constraint_list *CL); - -Alignment * find_domain_coordinates (Constraint_list *CL, int *start, int *len); -Alignment * extend_domain ( Constraint_list *CL, int *start, int *len, int dstart, int dlen); -Alignment * modify_domain ( Constraint_list *CL, Alignment *IN, int *start, int *len, int dstart, int dlen); - -int * analyse_sequence ( Constraint_list *CL); - -/****************************************************************************/ -/* */ -/* */ -/* Alignment Methods */ -/* */ -/* */ -/****************************************************************************/ -Alignment * sorted_aln (Alignment *A, Constraint_list *CL); -Alignment * sorted_aln_seq (int seq, Alignment *A, Constraint_list *CL); -Alignment * full_sorted_aln (Alignment *A, Constraint_list *CL); - -/******************************************************************/ -/* MAIN DRIVER */ -/* */ -/* */ -/******************************************************************/ - -Constraint_list *profile2list ( Job_TC *job,int nprf); -Constraint_list *seq2list (Job_TC *Job); -Constraint_list *method2pw_cl (TC_method *M, Constraint_list *CL); -int method_uses_structure(TC_method *M); -int method_uses_profile(TC_method *M); - -/******************************************************************/ -/* MULTIPLE ALIGNMENTS */ -/* */ -/* */ -/******************************************************************/ -Alignment * compute_prrp_aln (Alignment *A, Constraint_list *CL); -Alignment * compute_clustalw_aln (Alignment *A, Constraint_list *CL); -Alignment * compute_tcoffee_aln_quick (Alignment *A, Constraint_list *CL); -Alignment * seq2clustalw_aln (Sequence *S); -Alignment * aln2clustalw_aln (Alignment *A, Constraint_list *CL); -Alignment * realign_block ( Alignment *A, int col1, int col2, char *pg); -/******************************************************************/ -/* DNA */ -/* */ -/* */ -/******************************************************************/ -Constraint_list * align_coding_nucleotides (char *seq, char *method, char *weight, char *mem_mode, Constraint_list *CL); -/******************************************************************/ -/* STRUCTURES */ -/* */ -/* */ -/******************************************************************/ -Constraint_list * seq_msa (TC_method *M , char *in_seq, Constraint_list *CL); - -Constraint_list *align_pdb_pair (char *seq_in, char *dp_mode,char *evaluate_mode, char *file, Constraint_list *CL, Job_TC *job); -Constraint_list * align_pdb_pair_2 (char *seq, Constraint_list *CL); - -Constraint_list * pdb_pair ( TC_method*M,char *seq, Constraint_list *CL); -Constraint_list * pdbid_pair ( TC_method*M,char *seq, Constraint_list *CL); -Constraint_list * profile_pair ( TC_method*M,char *seq, Constraint_list *CL); -Constraint_list * thread_pair ( TC_method*M,char *seq, Constraint_list *CL); -Constraint_list * thread_pair2 ( TC_method *M,int s1, int s2, Constraint_list *CL); -Constraint_list * sap_pair (char *seq, char *weight, Constraint_list *CL); -Constraint_list * lsqman_pair (char *seq, Constraint_list *CL); -Constraint_list * rna_pair (TC_method *M , char *in_seq, Constraint_list *CL); - -/******************************************************************/ -/* GENERIC PAIRWISE METHODS */ -/* */ -/* */ -/******************************************************************/ -Constraint_list *best_pair4prot (Job_TC *job); -Constraint_list *best_pair4rna (Job_TC *job); -Alignment * fast_pair (Job_TC *job); - -void toggle_case_in_align_two_sequences(int value); -Alignment * align_two_sequences ( char *seq1, char *seq2, char *matrix, int gop, int gep, char *align_mode); -Alignment * align_two_aln ( Alignment *A1, Alignment *A2, char *in_matrix, int gop, int gep, char *in_align_mode); -NT_node make_root_tree ( Alignment *A,Constraint_list *CL,int gop, int gep,Sequence *S, char *tree_file,int maximise); -NT_node ** make_tree ( Alignment *A,Constraint_list *CL,int gop, int gep,Sequence *S, char *tree_file, int maximise); -int ** get_pw_distances ( Alignment *A,Constraint_list *CL,int gop, int gep, char **out_seq, char **out_seq_name, int out_nseq, char *tree_file, char *tree_mode, int maximise); -Alignment *stack_progressive_nol_aln_with_seq_coor(Constraint_list *CL,int gop, int gep,Sequence *S, int **seq_coor, int nseq); -Alignment *stack_progressive_aln_with_seq_coor (Constraint_list*CL,int gop, int gep, Sequence *S, int **coor, int nseq); -Alignment *stack_progressive_aln(Alignment *A, Constraint_list *CL, int gop, int gep); -Alignment *est_progressive_aln(Alignment *A, Constraint_list *CL, int gop, int gep); -void analyse_seq ( Alignment *A, int s); - -char ** list_file2dpa_list_file (char **list_file, int *len,int maxnseq, Sequence *S); -Alignment * seq2aln_group (Alignment *A, int T, Constraint_list *CL); - -Alignment *profile_aln (Alignment *A, Constraint_list *CL); -Alignment * iterative_tree_aln (Alignment *A,int n, Constraint_list *CL); -Alignment * iterative_aln ( Alignment*A, int nseq, Constraint_list *CL); -Alignment * seq_aln ( Alignment*A, int nseq, Constraint_list *CL); -Alignment *tsp_aln (Alignment *A, Constraint_list *iCL, Sequence *S); -Alignment *iterate_aln ( Alignment*A, int nit, Constraint_list *CL); -Alignment *realign_aln ( Alignment*A, Constraint_list *CL); -Alignment *very_fast_aln (Alignment*A, int nseq, Constraint_list *CL); -Alignment *simple_progressive_aln (Sequence *S, NT_node **T, Constraint_list *CL, char *mat); -Alignment *frame_aln (Alignment *A, int n,Constraint_list *CL); -Alignment *dpa_aln (Alignment *A, Constraint_list *CL); -Alignment *new_dpa_aln (Alignment *A, Constraint_list *CL); -Alignment * make_delayed_tree_aln (Alignment *A,int n, Constraint_list *CL); - -NT_node* delayed_tree_aln ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL); -int node2seq_list (NT_node P, int *ns, int *ls); -Alignment* delayed_tree_aln1 ( NT_node P,Alignment*A,Constraint_list *CL, int threshold); -Alignment* delayed_tree_aln2 ( NT_node P,Alignment*A,Constraint_list *CL, int threshold); - -NT_node* tree2ao (NT_node LT, NT_node RT,Alignment *A, int nseq,Constraint_list *CL);//tree2align_order -NT_node* tree_aln ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL); -NT_node* local_tree_aln ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL); -NT_node* seqan_tree_aln ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL); - - -NT_node* tree_realn ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL); - -int split_condition (int nseq, int score, Constraint_list *CL); - -int profile_pair_wise (Alignment *A, int n1, int *l1, int n2, int *l2, Constraint_list *CL); -int pair_wise (Alignment *A, int*ns, int **l_s,Constraint_list *CL ); - -int empty_pair_wise ( Alignment *A, int *ns, int **l_s, Constraint_list *CL, int glocal); - - -Pwfunc get_pair_wise_function (Pwfunc func, char *dp_mode, int *glocal); - - - -char *build_consensus ( char *seq1, char *seq2, char *dp_mode); -int fastal (int argv, char **arg); - -int domain_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL ); -Alignment *domain_match_list2aln ( Alignment *A,int *ns,int **l_s,int **ml, int nseq, int len); -Alignment * domain_seq2domain (Constraint_list *CL,int scale,int gop,int gep,Alignment *SEQ_DOMAIN, Alignment *TARGET); - - -int custom_pair_score_function1 (Constraint_list *CL, int s1, int r1, int s2, int r2); -int custom_pair_score_function2 (Constraint_list *CL, int s1, int r1, int s2, int r2); -int custom_pair_score_function3 (Constraint_list *CL, int s1, int r1, int s2, int r2); -int custom_pair_score_function4 (Constraint_list *CL, int s1, int r1, int s2, int r2); -int custom_pair_score_function5 (Constraint_list *CL, int s1, int r1, int s2, int r2); -int custom_pair_score_function6 (Constraint_list *CL, int s1, int r1, int s2, int r2); -int custom_pair_score_function7 (Constraint_list *CL, int s1, int r1, int s2, int r2); -int custom_pair_score_function8 (Constraint_list *CL, int s1, int r1, int s2, int r2); -int custom_pair_score_function9 (Constraint_list *CL, int s1, int r1, int s2, int r2); -int custom_pair_score_function10 (Constraint_list *CL, int s1, int r1, int s2, int r2); -int apdb (int argc, char *argv[]); - -Constraint_list * set_constraint_list4align_pdb (Constraint_list *inCL,int seq, char *dp_mode, char *hasch_mode, char *param_file); -int evaluate_ca_trace_sap2_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2); -int evaluate_ca_trace_nb (Constraint_list *CL, int s1, int s2, int r1, int r2); -int evaluate_ca_trace_bubble (Constraint_list *CL, int s1, int s2, int r1, int r2); -int evaluate_ca_trace_sap1_bubble (Constraint_list *CL, int s1, int s2, int r1, int r2); -int evaluate_ca_trace_3D_bubble (Constraint_list *CL, int s1, int s2, int r1, int r2); -int evaluate_ca_trace_transversal (Constraint_list *CL, int s1, int s2, int r1, int r2); -int evaluate_ca_trace_bubble_2 (Constraint_list *CL, int s1, int s2, int r1, int r2); -int evaluate_ca_trace_bubble_3 (Constraint_list *CL, int s1, int s2, int r1, int r2); - - -/*********************************************************************************************/ -/* */ -/* FUNCTIONS FOR COMPARING TWO NEIGHBORHOODS:START */ -/* */ -/*********************************************************************************************/ -float matrix_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2); -float transversal_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2); -float neighborhood_match (Constraint_list *CL, int s1, int s2, int r1, int r2,Struct_nb *nbs1, Struct_nb *nbs2); -float sap1_neighborhood_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2); -float sap2_neighborhood_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2); - - -/*********************************************************************************************/ -/* */ -/* FUNCTIONS FOR COMPARING TWO NEIGHBORHOODS:END */ -/* */ -/*********************************************************************************************/ -Alignment * analyse_pdb ( Alignment *A, Alignment *ST, char *filename); -Alignment * msa2struc_dist ( Alignment *A, Alignment *ST, char *filename); -float **** analyse_pdb_residues ( Alignment *A, Constraint_list *CL, Pdb_param *pdb_param); - -float square_atom ( Atom *X); -Atom* reframe_atom ( Atom *X, Atom*Y, Atom *Z, Atom *IN, Atom *R); -Atom* add_atom ( Atom *A, Atom*B, Atom *R); -Atom* diff_atom ( Atom *A, Atom*B, Atom *R); -Atom * copy_atom ( Atom *A, Atom*R); -/************************************************************************/ -/* */ -/* NUSSINOV */ -/* */ -/************************************************************************/ -char *nussinov (char *S, int min_dist); -char * rna_struc2rna_lib ( char *seq_name, char *seq, char *name); -int display_rna_ss ( int pos, char *seq, char *struc, FILE *fp); -int evaluate_aln_gibbs ( Alignment *A, Constraint_list *CL); -int evaluate_moca_domain ( Alignment *A, Constraint_list *CL); -int moca_residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int moca_evaluate_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int moca_slow_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); - -int **cache_cl_with_moca_domain (Alignment *A, Constraint_list *CL); -Alignment *make_moca_nol_aln ( Alignment *A, Constraint_list *CL); -/*********************************************************************************************/ -/* */ -/* DOMAIN Z SCORE EVALUATION */ -/* */ -/*********************************************************************************************/ - -int evaluate_domain_aln_z_score (Alignment *A, int start, int end,Constraint_list *CL, char *alphabet); -int evaluate_domain_aln ( Alignment *A, int start, int end,Constraint_list *CL); - - - -int unpack_seq_residues ( int *s1, int *r1, int *s2, int *r2, int **packed_seq_lu); -Alignment * unpack_seq_aln ( Alignment *A,Constraint_list *C); -typedef struct -{ - int N_COMPONENT; - double *double_logB_alpha; - double *exponant_list; - double **ALPHA; - double *DM_Q; - double *alpha_tot; - int n_aa; - int tot_n; -} -Mixture; - -double int_logB (int *i, int n); -double float_logB (float *i, int n); -double double_logB (double *x, int n); -double *** make_lup_table ( Mixture *D); -double double_logB2(int j, double *n,Mixture *D); -double compute_exponant ( double *n, int j, Mixture *D); - -double *compute_matrix_p ( double *n,int Nseq); -double* compute_dirichlet_p ( double *n,int Nseq); -void precompute_log_B ( double *table,Mixture *D); -double compute_X (double *n,int i,Mixture *D); -Mixture *read_dirichlet ( char *name); -int dirichlet_code( char aa); - -double lgamma2 ( double x); -double lgamma_r(double x, int *signgamp); -/*********************************************************************************************/ -/* */ -/* FUNCTIONS FOR EVALUATING THE CONSISTENCY BETWEEN ALN AND CL */ -/* */ -/*********************************************************************************************/ -Alignment * overlay_alignment_evaluation ( Alignment *I, Alignment *O); -Alignment * main_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL, const char *mode ); -int aln2ecl_raw_score (Alignment *A, Constraint_list *C); -int sub_aln2ecl_raw_score (Alignment *A, Constraint_list *CL, int ns, int *ls); -int sub_aln2sub_aln_raw_score ( Alignment *IN,Constraint_list *CL, const char *mode, int *ns, int **ls); -int node2sub_aln_score(Alignment *A,Constraint_list *CL, char *mode, NT_node T); -int sub_aln2sub_aln_score ( Alignment *IN,Constraint_list *CL, const char *mode, int *ns, int **ls); -Alignment * main_coffee_evaluate_output_sub_aln ( Alignment *IN,Constraint_list *CL, const char *mode, int *ns, int **ls); - -Alignment * categories_evaluate_output ( Alignment *IN,Constraint_list *CL); -Alignment * matrix_evaluate_output ( Alignment *IN,Constraint_list *CL); -Alignment * sar_evaluate_output ( Alignment *IN,Constraint_list *CL); -Alignment * boxshade_evaluate_output ( Alignment *IN,Constraint_list *CL, int T); - -Alignment * fast_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL); - -int slow_coffee_evaluate_sub_aln ( Alignment *IN,int *ns, int **ls, Constraint_list *CL); -Alignment * slow_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL); -Alignment * non_extended_t_coffee_evaluate_output( Alignment *IN,Constraint_list *CL); -Alignment * heuristic_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL); -/*Old Function: To deprecate*/ -Alignment * coffee_evaluate_output ( Alignment *IN,Constraint_list *CL); - -/*********************************************************************************************/ -/* */ -/* PROFILE/PRofile Functions */ -/* */ -/*********************************************************************************************/ -Profile_cost_func get_profile_mode_function (char *name, Profile_cost_func func); -int generic_evaluate_profile_score (Constraint_list *CL,Alignment *Prf1, int s1, int r1, Alignment *Prf2, int s2, int r2, Profile_cost_func prf_prf); -int cw_profile_profile (int *prf1, int *prf2, Constraint_list *CL); -int muscle_profile_profile (int *prf1, int *prf2, Constraint_list *CL); - -/*********************************************************************************************/ -/* */ -/* FUNCTIONS FOR GETING THE COST : (Sequences) ->evaluate_residue_pair */ -/* */ -/*********************************************************************************************/ -int evaluate_blast_profile_score (Constraint_list *CL, int s1, int r1, int s2, int r2); -int evaluate_aln_profile_score (Constraint_list *CL, int s1, int r1, int s2, int r2); - -int evaluate_profile_score ( Constraint_list *CL,Alignment *Prf1, int s1, int r1, Alignment *Prf2, int s2, int r2); -int evaluate_cdna_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int evaluate_diaa_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int evaluate_monoaa_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int evaluate_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int evaluate_tm_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int evaluate_ssp_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int evaluate_curvature_score ( Constraint_list *CL, int s1, int r1, int s2, int r2); - -int evaluate_combined_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int evaluate_physico_score ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int residue_pair_non_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2); - -int residue_pair_extended_list4rna1 ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int residue_pair_extended_list4rna2 ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int residue_pair_extended_list4rna3 ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int residue_pair_extended_list4rna4 ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int residue_pair_extended_list4rna ( Constraint_list *CL, Constraint_list *R, int s1, int r1, int s2, int r2); - -int residue_pair_extended_list_raw ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int residue_pair_extended_list_pc ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2); - - - - -int residue_pair_extended_list_g_coffee_quadruplet ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int residue_pair_extended_list_g_coffee ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int residue_pair_extended_list_quadruplet ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int residue_pair_extended_list_mixt ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int residue_pair_test_function ( Constraint_list *CL, int s1, int r1, int s2, int r2); -int extend_residue_pair ( Constraint_list *CL, int s1, int r1, int s2, int r2); - -int residue_pair_relative_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 ); -/*********************************************************************************************/ -/* */ -/* FUNCTIONS FOR GETTING THE PW COST : CL->get_dp_cost */ -/* */ -/*********************************************************************************************/ -int get_dp_cost_blosum_matrix (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int get_dp_cost_pam_matrix (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int get_dp_cost_pw_matrix (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); - -int get_cdna_best_frame_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int get_dp_cost_quadruplet ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); - - -int very_fast_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); - -int cw_profile_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int cw_profile_get_dp_cost_window ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); - -int consensus_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); - -int fast_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int fast_get_dp_cost_2 ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int fast_get_dp_cost_3 ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); - -int fast_get_dp_cost_quadruplet ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); - -int check_fast_profile_mode (Alignment *A, int ns1,int *list1,int ns2, int *list2, Constraint_list *CL); -int check_fast_mode (Alignment *A, int ns1,int *list1,int ns2, int *list2, Constraint_list *CL); - - -int slow_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int slow_get_dp_cost_pc ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int slow_get_dp_cost_test ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); - -int sw_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL); -int get_domain_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2,Constraint_list *CL , int scale , int gop, int gep); - -/*********************************************************************************************/ -/* */ -/* FUNCTIONS FOR ANALYSING AL OR MATRIX */ -/* */ -/*********************************************************************************************/ -float ** initialise_aa_physico_chemical_property_table (int *n); -int aln2n_res ( Alignment *A, int start, int end); -float get_gop_scaling_factor ( int **matrix,float id, int l1, int l2); -float get_avg_matrix_mm ( int **matrix, char *alphabet); -float get_avg_matrix_match ( int **matrix, char *alphabet); -float get_avg_matrix_diff ( int **matrix1,int **matrix2, char *alphabet); -float measure_matrix_enthropy (int **matrix,char *alphabet); -float measure_matrix_pos_avg (int **matrix,char *alphabet); -float evaluate_random_match (char *matrix, int n, int len,char *alp); -float compare_two_mat (char *mat1,char*mat2, int n, int len,char *alp); -float compare_two_mat_array (int **matrix1,int **matrix2, int n, int len,char *alp); -int ** rescale_two_mat (char *mat1,char*mat2, int n, int len,char *alp); -int ** rescale_matrix ( int **mat, float lambda, char *alp); -int **mat2inverted_mat (int **matrix, char *alp); -void output_matrix_header ( char *name, int **matrix, char *alp); -float evaluate_random_match2 (int **matrix, int n, int len,char *alp); -float measure_lambda2(char *mat1,char*mat2, int n, int len,char *alp); -float measure_lambda(char *mat1,char*mat2, int n, int len,char *alp); -Constraint_list * choose_extension_mode ( char *extend_mode, Constraint_list *CL); -int ** combine_two_matrices ( int **mat1, int **mat2); -/*********************************************************************************************/ -/* */ -/* OFF THE SHELVES EVALUATION */ -/* */ -/*********************************************************************************************/ -float sum_pair ( Alignment*A,char *mat_name, int gap_op, int gap_ext); -int lat_sum_pair (Alignment *A, char *mat); -int ** show_pair(int istart, int iend, int jstart, int jend, int *seqlen_array, char **seq_array, int dna_ktup, int dna_window, int dna_wind_gap, int dna_signif,int prot_ktup, int prot_window,int prot_wind_gap,int prot_signif, int nseqs,int dnaflag, int max_aa, int max_aln_length ); -/*By convention, 0: START, 1: END*/ - -struct Hmm -{ - - double freeT; /*Free transition*/ - double forbiden; /*Forbiden transition*/ - int start; /*start, by convention: 0*/ - int end; /*end, by convention: 1*/ - - int nS; /*Number of states*/ - int order; - struct HmmState **S; /*State List*/ - - /*Bounded HMM*/ - double **T; /*Transition matrix*/ - int **fromM; /*For any sate s, fromM[0]->number of states leading to s*/ - int **toM; /*For any sate s, toM[0]->number of s can go to*/ - /*End and Start are NOT included in toM and FromM*/ - - -}; -typedef struct Hmm Hmm; - -struct HmmAln -{ - Hmm *H; - int *state_list; -}; -typedef struct HmmAln HmmAln; - -typedef double (*Generic_em_func)(struct Hmm*, struct HmmState*, int); - -struct HmmState -{ -char name[100]; -int state; -int DJ; -int DI; - - /*Pair HMM*/ -double em; -Col_cost_func em_func; - - /*Linear HMM*/ -double *em2; -Generic_em_func em_func2; -int nT; -struct StateTrans **T; -}; -typedef struct HmmState HmmState; - -struct StateTrans -{ - char name[101]; - double tr; -}; -typedef struct StateTrans StateTrans; - -struct MatState -{ - int i; - int j; - int st; - int pst; - double sc; - struct MatState *n; - struct MatState *p; - struct MatState *m; /*memory*/ - struct MatState *s; /*memory of the start point*/ - /*Heap Mamagement: Never copy*/ - struct MatState *Hn;/*Heap Next*/ - struct MatState *Hp;/*Heap Previous*/ - - struct MatState *Mn;/*Marked Heap section*/ - struct MatState *Mp;/*Marked Heap Section*/ - int free; -}; -typedef struct MatState MatState; - - -/*********************************************************************************/ -/* */ -/* */ -/* simple HMM: Viterbi */ -/* */ -/* */ -/*********************************************************************************/ -int seq_viterbi_pair_wise (Alignment *A,int*ns, int **ls,Constraint_list *CL); - -/*********************************************************************************/ -/* */ -/* */ -/* HMM: Viterbi */ -/* */ -/* */ -/*********************************************************************************/ - -int viterbi_pair_wise_OLD (Alignment *A,int*ns, int **ls,Constraint_list *CL); -Alignment * viterbipath2aln (Alignment *A, int *ns,int **ls,int *tb, Hmm *H); -double*** viterbi_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL); -int * viterbi2path (int l1,int l2, Hmm *H, double ***M); -/*********************************************************************************/ -/* */ -/* */ -/* HMM modeling */ -/* */ -/* */ -/*********************************************************************************/ -int viterbiL_pair_wise (Alignment *A,int*ns, int **ls,Constraint_list *CL); -MatState* RviterbiL_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL,MatState *S, MatState *E); -MatState* viterbiL_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL, MatState *S, MatState *E); - -int viterbiD_pair_wise (Alignment *A,int*ns, int **ls,Constraint_list *CL); -double lu_RviterbiD_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL,MatState *S, MatState *E, int **seg_list); -MatState* RviterbiD_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL,MatState *S, MatState *E, int **seg_list); -MatState* viterbiD_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL, MatState *S, MatState *E, int **seg_list); -int **seglist2table ( int **seglist,int l1, int l2); -int *seglist2line ( int **seglist, int line, int start, int end); -int * traceback (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL,MatState *S, MatState *E, int **seg_list); - -int viterbiDGL_pair_wise (Alignment *A,int*ns, int **ls,Constraint_list *CL); -double lu_RviterbiDGL_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL,MatState *S, MatState *E, int **seg_list); -MatState* RviterbiDGL_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL,MatState *S, MatState *E, int **seg_list); -MatState* viterbiDGL_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL,MatState *S, MatState *E, int **seg_list); - - -/*********************************************************************************/ -/* */ -/* */ -/* HMM modeling */ -/* */ -/* */ -/*********************************************************************************/ -int MatStateAreIdentical (MatState*I, MatState*O); -int MaxDeltaMatState (MatState*I, MatState*O); -int MinDeltaMatState (MatState*I, MatState*O); - -MatState * ManageMatState(int Mode, MatState *C); -MatState* CopyMatState ( MatState*I, MatState*O); - -Hmm* read_hmm(char *file); -Hmm* declare_hmm(int n); -Hmm* free_Hmm(Hmm*H); -void DisplayHmm ( Hmm *H); - -/*********************************************************************************/ -/* */ -/* */ -/* HMM Models */ -/* */ -/* */ -/*********************************************************************************/ -Hmm* define_two_mat_model(Constraint_list *CL); -Hmm* define_probcons_model(Constraint_list *CL); -Hmm* define_simple_model(Constraint_list *CL); - -Hmm * bound_hmm ( Hmm *H); -Sequence *pavie_seq2random_seq (Sequence *S, char *subst); -double **pavie_seq2pavie_aln(Sequence *S,char *mat, char *mode); -Alignment *pavie_seq2pavie_sort ( Sequence *S, char *mat, char *mode); -Alignment* pavie_seq2pavie_msa ( Sequence *S, char *mat, char *mode); -NT_node pavie_seq2pavie_tree ( Sequence *S, char *mat, char *mode); -int **pavie_seq2trained_pavie_mat(Sequence *S, char *param); -int pavie_pair_wise (Alignment *A,int *ns, int **l_s,Constraint_list *CL ); -Sequence *pavie_seq2noisy_seq (Sequence *S, int val,char *subst); -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/evaluate.c b/binaries/src/tcoffee/t_coffee_source/evaluate.c deleted file mode 100644 index df57c51..0000000 --- a/binaries/src/tcoffee/t_coffee_source/evaluate.c +++ /dev/null @@ -1,5053 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" - -#include "dp_lib_header.h" -float compute_lambda (int **matrix,char *alphabet); -/*********************************************************************************************/ -/* */ -/* FUNCTIONS FOR EVALUATING THE CONSISTENCY BETWEEN ALN AND CL */ -/* */ -/*********************************************************************************************/ - -/*Fast: score= extended_res/max_extended_residue_for the whole aln - slow: score= extended_res/sum all extended score for that residue - non_extended score= non_ext /sum all non extended score for that residue - heuristic score= extended /sum of extended score of all pairs in the library - (i.e. Not ALL the possible pairs) -*/ -Alignment * main_coffee_evaluate_output2 ( Alignment *IN,Constraint_list *CL, const char *mode ); -int sub_aln2ecl_raw_score (Alignment *A, Constraint_list *CL, int ns, int *ls) -{ - int **pos; - int p1,r1, r2, s1, s2; - int score=0; - - if ( !A) return 0; - A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq); - pos=aln2pos_simple ( A,A->nseq); - - CL=index_res_constraint_list (CL, CL->weight_field); - - for (p1=0; p1len_aln; p1++) - { - for (s1=0; s10 && r2>0) - { - score+= residue_pair_extended_list_pc (CL,ls[s1], r1, ls[s2], r2)*SCORE_K; - } - } - } - } - free_int (pos, -1); - return score; - return (score/(((ns*ns)-ns)/2))/A->len_aln; -} -int aln2ecl_raw_score (Alignment *A, Constraint_list *CL) -{ - int **pos; - int p1,r1, r2, s1, s2; - int score=0; - if ( !A) return 0; - A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq); - pos=aln2pos_simple ( A,A->nseq); - - CL=index_res_constraint_list (CL, CL->weight_field); - - for (p1=0; p1len_aln; p1++) - { - for (s1=0; s1nseq-1; s1++) - { - for (s2=s1+1; s2nseq; s2++) - { - - r1=pos[s1][p1]; - r2=pos[s2][p1]; - if (r1>0 && r2>0)score+= residue_pair_extended_list_pc (CL,s1, r1, s2, r2); - } - } - } - free_int (pos, -1); - return score; - return (score/(((A->nseq*A->nseq)-A->nseq)/2))/A->len_aln; -} -int node2sub_aln_score (Alignment *A,Constraint_list *CL, char *mode, NT_node T) -{ - if ( !T || !T->right ||!T->left)return 0; - else - { - int *ns; - int **ls; - ns=vcalloc (2, sizeof (int)); - ls=vcalloc (2, sizeof (int*)); - ns[0]= (T->left)->nseq; - ns[1]=(T->right)->nseq; - ls[0]= (T->left)->lseq; - ls[1]=(T->right)->lseq; - - return sub_aln2sub_aln_score (A, CL, mode, ns, ls); - } - return -1; -} -int sub_aln2sub_aln_score ( Alignment *A,Constraint_list *CL, const char *mode, int *ns, int **ls) -{ - /*Warning: Does Not Take Gaps into account*/ - - int **pos; - int a; - float score=0; - - /*Make sure evaluation functions update their cache if needed*/ - A=update_aln_random_tag (A); - pos=aln2pos_simple ( A, -1, ns, ls); - for (a=0; a< A->len_aln; a++) - score+=CL->get_dp_cost (A, pos, ns[0], ls[0], a, pos, ns[1],ls[1], a, CL); - free_int (pos, -1); - - score=(int)(((float)score)/(A->len_aln*SCORE_K)); - score=(int)(CL->L && CL->normalise)?((score*MAXID)/(CL->normalise)):(score); - return (int)score; -} -int sub_aln2sub_aln_raw_score ( Alignment *A,Constraint_list *CL, const char *mode, int *ns, int **ls) -{ - /*Warning: Does Not Take Gaps into account*/ - - int **pos; - int a; - float score=0; - - /*Make sure evaluation functions update their cache if needed*/ - A=update_aln_random_tag (A); - pos=aln2pos_simple ( A, -1, ns, ls); - for (a=0; a< A->len_aln; a++) - score+=CL->get_dp_cost (A, pos, ns[0], ls[0], a, pos, ns[1],ls[1], a, CL); - free_int (pos, -1); - return (int) score; -} - -Alignment* main_coffee_evaluate_output_sub_aln ( Alignment *A,Constraint_list *CL, const char *mode, int *n_s, int **l_s) -{ - Alignment *SUB1, *SUB2, *SUB3; - int a, b, c,*list_seq; - - - if (strm ( CL->evaluate_mode, "no"))return NULL; - else - { - list_seq=vcalloc (n_s[0]+n_s[1], sizeof (int)); - for (b=0, a=0; a< 2; a++){for (c=0;c< n_s[a]; c++)list_seq[b++]=l_s[a][c];} - - - SUB1=copy_aln (A, NULL); - SUB2=extract_sub_aln (SUB1,n_s[0]+n_s[1],list_seq); - SUB3=main_coffee_evaluate_output (SUB2,CL,CL->evaluate_mode); - free_aln (SUB1); - free_aln (SUB2); - vfree (list_seq); - - return SUB3; - } -} -Alignment * overlay_alignment_evaluation ( Alignment *I, Alignment *O) -{ - int a, b, c, r, i; - int *buf; - - if ( !I || !O) return O; - if ( I->len_aln!=O->len_aln)printf_exit (EXIT_FAILURE, stderr, "ERROR: Incompatible alignments in overlay_alignment_evaluation"); - - buf=vcalloc ( MAX(I->len_aln, O->len_aln), sizeof (int)); - - for (a=0; anseq; a++) - { - if (!strm (I->name[a], O->name[a]))printf_exit (EXIT_FAILURE, stderr, "ERROR: Incompatible alignments in overlay_alignment_evaluation"); - for (b=0; blen_aln; b++) - { - r=I->seq_al[a][b]; - if ( islower(r))O->seq_al[a][b]=0; - else if (r<=9 || (r>='0' && r<='9'))O->seq_al[a][b]=I->seq_al[a][b]; - } - } - return O; -} - -Alignment * main_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL, const char *mode ) -{ - - Alignment *TopS=NULL, *LastS=NULL, *CurrentS=NULL; - - - if ( IN->A)IN=IN->A; - while (IN) - { - - CurrentS= main_coffee_evaluate_output2(IN, CL, mode); - if (!TopS)LastS=TopS=CurrentS; - else - { - LastS->A=CurrentS; - LastS=CurrentS; - } - IN=IN->A; - } - return TopS; -} - -Alignment * main_coffee_evaluate_output2 ( Alignment *IN,Constraint_list *CL, const char *mode ) -{ - - /*Make sure evaluation functions update their cache if needed*/ - IN=update_aln_random_tag (IN); - - if ( CL->evaluate_residue_pair==evaluate_matrix_score || CL->ne==0 ||strm ( mode , "categories") || strm ( mode , "matrix")|| strm(mode, "sar")|| strstr (mode, "boxshade") ) - { - - if ( strm ( mode , "categories")) return categories_evaluate_output (IN, CL); - else if ( strm ( mode , "matrix"))return matrix_evaluate_output (IN, CL); - else if ( strm ( mode, "sar"))return sar_evaluate_output (IN, CL); - else if ( strstr ( mode, "boxshade"))return boxshade_evaluate_output (IN, CL, atoi (strstr(mode, "_")+1)); - - else if ( CL->evaluate_residue_pair==evaluate_matrix_score) return matrix_evaluate_output (IN, CL); - else if ( CL->ne==0) return matrix_evaluate_output (IN, CL); - } - else if ( strm (mode, "no"))return NULL; - else if ( strm4 ( mode, "t_coffee_fast","tcoffee_fast","fast_tcoffee", "fast_t_coffee")) - { - return fast_coffee_evaluate_output ( IN,CL); - } - else if ( strm4 ( mode, "t_coffee_slow","tcoffee_slow","slow_tcoffee","slow_t_coffee" )) - { - return slow_coffee_evaluate_output ( IN,CL); - } - - else if ( strm4 ( mode, "tcoffee_non_extended","t_coffee_non_extended","non_extended_tcoffee","non_extended_t_coffee")) - { - return non_extended_t_coffee_evaluate_output ( IN,CL); - } - else if ( strm5 ( mode, "tcoffee_heuristic","t_coffee_heuristic","heuristic_tcoffee","heuristic_t_coffee", "dali")) - { - return heuristic_coffee_evaluate_output ( IN,CL); - } - else - { - fprintf ( stderr, "\nUNKNOWN MODE FOR ALIGNMENT EVALUATION: *%s* [FATAL:%s]",mode, PROGRAM); - crash (""); - return NULL; - } - return IN; -} - - - -Alignment * coffee_evaluate_output ( Alignment *IN,Constraint_list *CL) - { - fprintf ( stderr, "\n[WARNING:%s]THE FUNCTION coffee_evaluate_output IS NOT ANYMORE SUPPORTED\n", PROGRAM); - fprintf ( stderr, "\n[WARNING]fast_coffee_evaluate_output WILL BE USED INSTEAD\n"); - - return fast_coffee_evaluate_output (IN,CL); - } -Alignment * matrix_evaluate_output ( Alignment *IN,Constraint_list *CL) - { - int a,b, c,r, s, r1, r2; - Alignment *OUT=NULL; - - double **tot_res; - double **max_res; - - double **tot_seq; - double **max_seq; - - double **tot_col; - double **max_col; - - double max_aln=0; - double tot_aln=0; - - - /* - Residue x: sum of observed extended X.. /sum of possible X.. - */ - - - if ( !CL->M)CL->M=read_matrice ("blosum62mt"); - - OUT=copy_aln (IN, OUT); - - - tot_res=declare_double ( IN->nseq, IN->len_aln); - max_res=declare_double ( IN->nseq, IN->len_aln); - - tot_seq=declare_double ( IN->nseq, 1); - max_seq=declare_double ( IN->nseq, 1); - tot_col=declare_double ( IN->len_aln,1); - max_col=declare_double ( IN->len_aln,1); - - max_aln=tot_aln=0; - - for (a=0; a< IN->len_aln; a++) - { - for ( b=0; b< IN->nseq; b++) - { - r1=tolower(IN->seq_al[b][a]); - if ( is_gap(r1))continue; - r= CL->M[r1-'A'][r1-'A']; - r= 1; - for ( c=0; cnseq; c++) - { - r2=tolower(IN->seq_al[c][a]); - if (b==c || is_gap (r2))continue; - - s=CL->M[r2-'A'][r1-'A']; - s=(s<=0)?0:1; - - tot_res[b][a]+=s; - max_res[b][a]+=r; - - tot_col[a][0]+=s; - max_col[a][0]+=r; - - tot_seq[b][0]+=s; - max_seq[b][0]+=r; - - tot_aln+=s; - max_aln+=r; - } - } - } - - - for ( a=0; a< IN->nseq; a++) - { - if ( !max_seq[a][0])continue; - - OUT->score_seq[a]=(tot_seq[a][0]*100)/max_seq[a][0]; - for (b=0; b< IN->len_aln; b++) - { - r1=IN->seq_al[a][b]; - if ( is_gap(r1) || !max_res[a][b])continue; - - r1=(tot_res[a][b]*10)/max_res[a][b]; - r1=(r1>=10)?9:r1; - r1=r1<0?0:r1; - (OUT)->seq_al[a][b]=r1+'0'; - } - } - - for ( a=0; a< IN->len_aln; a++) - { - r1=(max_col[a][0]==0)?0:((tot_col[a][0]*10)/max_col[a][0]); - r1=(r1>=10)?9:r1; - (OUT)->seq_al[OUT->nseq][a]=r1+'0'; - } - sprintf ( OUT->name[IN->nseq], "cons"); - if (max_aln)OUT->score_seq[OUT->nseq]=OUT->score_aln=(100*tot_aln)/max_aln; - - - free_double (tot_res,-1); - free_double (max_res,-1); - - free_double (tot_seq,-1); - free_double (max_seq,-1); - - return OUT; - } - -Alignment * sar_evaluate_output ( Alignment *IN,Constraint_list *CL) - { - int a,b, c,r, s, r1, r2; - Alignment *OUT=NULL; - - double **tot_res; - double **max_res; - - double **tot_seq; - double **max_seq; - - double **tot_col; - double **max_col; - - double max_aln=0; - double tot_aln=0; - - - /* - Residue x: sum of observed extended X.. /sum of possible X.. - */ - - - if ( !CL->M)CL->M=read_matrice ("blosum62mt"); - - OUT=copy_aln (IN, OUT); - - - tot_res=declare_double ( IN->nseq, IN->len_aln); - max_res=declare_double ( IN->nseq, IN->len_aln); - - tot_seq=declare_double ( IN->nseq, 1); - max_seq=declare_double ( IN->nseq, 1); - tot_col=declare_double ( IN->len_aln,1); - max_col=declare_double ( IN->len_aln,1); - - max_aln=tot_aln=0; - - for (a=0; a< IN->len_aln; a++) - { - for (b=0; b< IN->nseq; b++) - { - r1=tolower(IN->seq_al[b][a]); - for (c=0; cnseq; c++) - { - r2=tolower(IN->seq_al[c][a]); - if (b==c)continue; - - if ( is_gap(r1) && is_gap(r2))s=0; - else s=(r1==r2)?1:0; - - r=1; - - - tot_res[b][a]+=s; - max_res[b][a]+=r; - - tot_col[a][0]+=s; - max_col[a][0]+=r; - - tot_seq[b][0]+=s; - max_seq[b][0]+=r; - - tot_aln+=s; - max_aln+=r; - } - } - } - - for ( a=0; a< IN->nseq; a++) - { - if ( !max_seq[a][0])continue; - OUT->score_seq[a]=(max_seq[a][0]*100)/max_seq[a][0]; - for (b=0; b< IN->len_aln; b++) - { - r1=IN->seq_al[a][b]; - if ( is_gap(r1) || !max_res[a][b])continue; - r1=(tot_res[a][b]*10)/max_res[a][b]; - r1=(r1>=10)?9:r1; - r1=r1<0?0:r1; - (OUT)->seq_al[a][b]=r1+'0'; - } - } - - for ( a=0; a< IN->len_aln; a++) - { - r1=(max_col[a][0]==0)?0:((tot_col[a][0]*10)/max_col[a][0]); - r1=(r1>=10)?9:r1; - (OUT)->seq_al[OUT->nseq][a]=r1+'0'; - } - sprintf ( OUT->name[IN->nseq], "cons"); - if (max_aln)OUT->score_aln=(100*tot_aln)/max_aln; - - - free_double (tot_res,-1); - free_double (max_res,-1); - - free_double (tot_seq,-1); - free_double (max_seq,-1); - - return OUT; - } -Alignment * boxshade_evaluate_output ( Alignment *IN,Constraint_list *CL, int T) - { - Alignment *OUT=NULL; - int **aa; - int r,br, bs, a, b; - float f; - - - /* - Residue x: sum of observed extended X.. /sum of possible X.. - */ - - - OUT=copy_aln (IN, OUT); - aa=declare_int (26, 2); - - for ( a=0; a< OUT->len_aln; a++) - { - for ( b=0; b< 26; b++){aa[b][1]=0;aa[b][0]='a'+b;} - for ( b=0; b< OUT->nseq; b++) - { - r=tolower(OUT->seq_al[b][a]); - if ( !is_gap(r))aa[r-'a'][1]++; - } - sort_int ( aa, 2, 1, 0,25); - f=(aa[25][1]*100)/OUT->nseq; - - if (fnseq; b++) - { - r=tolower(OUT->seq_al[b][a]); - if (r==br && bs>'1')OUT->seq_al[b][a]=bs; - } - OUT->seq_al[b][a]=bs; - } - } - sprintf ( OUT->name[IN->nseq], "cons"); - - return OUT; - } - -Alignment * categories_evaluate_output ( Alignment *IN,Constraint_list *CL) - { - - Alignment *OUT=NULL; - int a, b, r; - int *aa; - float score, nseq2, tot_aln; - float n; - /* - Residue x: sum of observed extended X.. /sum of possible X.. - */ - OUT=copy_aln (IN, OUT); - aa=vcalloc ( 26, sizeof (int)); - nseq2=IN->nseq*IN->nseq; - - for (tot_aln=0, a=0; a< IN->len_aln; a++) - { - for (n=0,b=0; b< IN->nseq; b++) - { - r=IN->seq_al[b][a]; - - if ( is_gap(r))n++; - else - { - aa[tolower(r)-'a']++; - n++; - } - } - n=n*n; - for ( score=0,b=0; b< 26; b++){score+=aa[b]*aa[b];aa[b]=0;} - /*score/=nseq2;*/ - score=(n==0)?0:score/n; - tot_aln+=score; - r=score*10; - r=(r>=10)?9:r; - (OUT)->seq_al[OUT->nseq][a]='0'+r; - } - OUT->score_aln=(tot_aln/OUT->len_aln)*100; - sprintf ( OUT->name[IN->nseq], "cons"); - vfree(aa); - return OUT; - } - -Alignment * categories_evaluate_output_old ( Alignment *IN,Constraint_list *CL) - { - - Alignment *OUT=NULL; - int nc,a, b, r; - int *aa, ng; - float score, nseq2, tot_aln, min=0; - - /* - Residue x: sum of observed extended X.. /sum of possible X.. - */ - OUT=copy_aln (IN, OUT); - aa=vcalloc ( 26, sizeof (int)); - nseq2=IN->nseq*IN->nseq; - - for (tot_aln=0, a=0; a< IN->len_aln; a++) - { - for (ng=0,b=0; b< IN->nseq; b++) - { - r=IN->seq_al[b][a]; - - if ( is_gap(r))ng++; - else - { - aa[tolower(r)-'a']++; - } - } - for (nc=0, b=0; b<26; b++) - { - if ( aa[b])nc++; - aa[b]=0; - } - if (nc>9)score=0; - else score=9-nc; - - score=(2*min)/IN->nseq; - - tot_aln+=score; - r=score*10; - r=(r>=10)?9:r; - (OUT)->seq_al[OUT->nseq][a]='0'+r; - } - - OUT->score_aln=(tot_aln/OUT->len_aln)*100; - sprintf ( OUT->name[IN->nseq], "cons"); - vfree(aa); - return OUT; - } - -Alignment * fast_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL) - { - int a,b, c, m,res, s, s1, s2, r1, r2; - Alignment *OUT=NULL; - int **pos, **pos2; - - double score_col=0, score_aln=0, score_res=0; - double max_col, max_aln; - double *max_seq, *score_seq; - int local_m; - int local_nseq; - - - /*NORMALIZE: with the highest scoring pair found in the multiple alignment*/ - - - if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair; } - - OUT=copy_aln (IN, OUT); - pos=aln2pos_simple(IN, IN->nseq); - pos2=aln2defined_residues (IN, CL); - - max_seq=vcalloc ( IN->nseq, sizeof (double)); - score_seq=vcalloc ( IN->nseq, sizeof (double)); - - - - /*1: Identify the highest scoring pair within the alignment*/ - - for ( m=0, a=0; a< IN->len_aln; a++) - { - for ( b=0; b< IN->nseq; b++) - { - s1=IN->order[b][0]; - r1=pos[b][a]; - - - for ( c=0; c< IN->nseq; c++) - { - s2=IN->order[c][0]; - r2=pos[c][a]; - if ( s1==s2 && !CL->do_self)continue; - - if ( s1< s2)s=(CL->evaluate_residue_pair)( CL, s1, r1, s2, r2); - else s=(CL->evaluate_residue_pair)( CL, s2, r2, s1, r1); - - s=(s!=UNDEFINED)?s:0; - m=MAX(m, s); - } - } - } - - local_m=m; - - sprintf ( OUT->name[IN->nseq], "cons"); - for ( max_aln=0,score_aln=0,a=0; a< IN->len_aln; a++) - { - OUT->seq_al[IN->nseq][a]=NO_COLOR_RESIDUE; - for ( local_nseq=0,b=0; bnseq; b++){local_nseq+=(pos[b][a]>0 && pos2[b][a])?1:0;} - local_m=m*(local_nseq-1); - - for ( max_col=0, score_col=0,b=0; b< IN->nseq; b++) - { - OUT->seq_al[b][a]=NO_COLOR_RESIDUE; - s1=IN->order[b][0]; - r1=pos[b][a]; - - if (r1<=0 || !pos2[b][a]) - { - continue; - } - - for ( score_res=0,c=0; c< IN->nseq; c++) - { - s2=IN->order[c][0]; - r2=pos[c][a]; - - if ((s1==s2 && !CL->do_self) || r2<=0 || !pos2[c][a]){continue;} - max_col +=m; - max_seq[b]+=m; - max_aln +=m; - - if ( s1< s2)s=(CL->evaluate_residue_pair)( CL, s1, r1, s2, r2); - else s=(CL->evaluate_residue_pair)( CL, s2, r2, s1, r1); - s=(s!=UNDEFINED)?s:0; - - score_res+=s; - score_col+=s; - score_seq[b]+=s; - score_aln+=s; - } - - res=(local_m==0)?NO_COLOR_RESIDUE:((score_res*10)/local_m); - (OUT)->seq_al[b][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9)); - - - } - - res=(max_col==0)?NO_COLOR_RESIDUE:((score_col*10)/max_col); - OUT->seq_al[IN->nseq][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9)); - - } - - IN->score_aln=OUT->score_aln=(max_aln==0)?0:((score_aln*100)/max_aln); - for ( a=0; a< OUT->nseq; a++) - { - OUT->score_seq[a]=(max_seq[a]==0)?0:((score_seq[a]*100)/max_seq[a]); - } - - free_int (pos , -1); - free_int (pos2, -1); - - vfree ( score_seq); - vfree ( max_seq); - return OUT; - } - - - -Alignment * slow_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL) - { - int a,b, c,res, s, s1, s2, r1, r2; - Alignment *OUT=NULL; - int **pos, **pos2; - double max_score_r, score_r, max; - double score_col=0, score_aln=0; - double max_score_col, max_score_aln; - double *max_score_seq, *score_seq; - int ***res_extended_weight; - int n_res_in_col; - - - /* - Residue x: sum of observed extended X.. /sum of possible X.. - */ - - - - - if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair; } - - - OUT=copy_aln (IN, OUT); - pos=aln2pos_simple(IN, IN->nseq); - pos2=aln2defined_residues (IN, CL); - - max_score_seq=vcalloc ( IN->nseq, sizeof (double)); - score_seq=vcalloc ( IN->nseq, sizeof (double)); - res_extended_weight=declare_arrayN(3,sizeof(int), (CL->S)->nseq, (CL->S)->max_len+1, 2); - max=(CL->normalise)?(100*CL->normalise)*SCORE_K:100; - - for (a=0; a< IN->len_aln; a++) - { - for ( b=0; b< IN->nseq-1; b++) - { - s1=IN->order[b][0]; - r1=pos[b][a]; - for ( c=b+1; c< IN->nseq; c++) - { - s2=IN->order[c][0]; - r2=pos[c][a]; - if ( s1==s2 && !CL->do_self)continue; - else if ( r1<=0 || r2<=0) continue; - else - { - s=(CL->evaluate_residue_pair)( CL, s1, r1, s2, r2); - res_extended_weight[s1][r1][0]+=s*100; - res_extended_weight[s2][r2][0]+=s*100; - res_extended_weight[s1][r1][1]+=max; - res_extended_weight[s2][r2][1]+=max; - } - } - } - } - - - sprintf ( OUT->name[IN->nseq], "cons"); - for ( max_score_aln=0,score_aln=0,a=0; a< IN->len_aln; a++) - { - OUT->seq_al[IN->nseq][a]=NO_COLOR_RESIDUE; - for ( n_res_in_col=0,b=0; bnseq; b++){n_res_in_col+=(pos[b][a]>0 && pos2[b][a]>0)?1:0;} - for ( max_score_col=0, score_col=0,b=0; b< IN->nseq; b++) - { - OUT->seq_al[b][a]=NO_COLOR_RESIDUE; - s1=IN->order[b][0]; - r1=pos[b][a]; - if (r1<=0 || pos2[b][a]<1)continue; - else - { - max_score_r =res_extended_weight[s1][r1][1]; - score_r =res_extended_weight[s1][r1][0]; - if ( max_score_r==0 && n_res_in_col>1)res=0; - else if ( n_res_in_col==1)res=NO_COLOR_RESIDUE; - else res=((score_r*10)/max_score_r); - - - (OUT)->seq_al[b][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res, 9)); - max_score_col+=max_score_r; - score_col+=score_r; - max_score_seq[b]+=max_score_r; - score_seq[b]+=score_r; - max_score_aln+=max_score_r; - score_aln+=score_r; - } - if ( max_score_col==0 && n_res_in_col>1)res=0; - else if ( n_res_in_col<2)res=NO_COLOR_RESIDUE; - else res=((score_col*10)/max_score_col); - - OUT->seq_al[IN->nseq][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9)); - } - } - IN->score_aln=OUT->score_aln=(max_score_aln==0)?0:((score_aln*100)/max_score_aln); - for ( a=0; a< OUT->nseq; a++) - { - OUT->score_seq[a]=(max_score_seq[a]==0)?0:((score_seq[a]*100)/max_score_seq[a]); - } - - - vfree ( score_seq); - vfree ( max_score_seq); - free_arrayN((void*)res_extended_weight, 3); - - - free_int (pos, -1); - free_int (pos2, -1); - return OUT; - } - - -Alignment * heuristic_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL) - { - int a,b, c,res, s, s1, s2, r1, r2; - Alignment *OUT=NULL; - int **pos; - int max_score_r, score_r; - double score_col=0, score_aln=0; - int max_score_col, max_score_aln; - double *max_score_seq, *score_seq; - int **tot_extended_weight; - int **res_extended_weight; - int n_res_in_col; - - /* - Residue x: sum of observed extended X.. /sum of possible X.. - */ - - if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair; } - - OUT=copy_aln (IN, OUT); - pos=aln2pos_simple(IN, IN->nseq); - - - max_score_seq=vcalloc ( IN->nseq, sizeof (double)); - score_seq=vcalloc ( IN->nseq, sizeof (double)); - - tot_extended_weight=list2residue_partial_extended_weight(CL); - res_extended_weight=declare_int ((CL->S)->nseq, (CL->S)->max_len+1); - - for (a=0; a< IN->len_aln; a++) - { - for ( b=0; b< IN->nseq-1; b++) - { - s1=IN->order[b][0]; - r1=pos[b][a]; - for ( c=b+1; c< IN->nseq; c++) - { - s2=IN->order[c][0]; - r2=pos[c][a]; - if ( s1==s2 && !CL->do_self)continue; - else if ( r1<=0 || r2<=0) continue; - else - { - if ( s1< s2)s=(CL->evaluate_residue_pair)( CL, s1, r1, s2, r2); - else s=(CL->evaluate_residue_pair)( CL, s2, r2, s1, r1); - res_extended_weight[s1][r1]+=s; - res_extended_weight[s2][r2]+=s; - } - } - } - } - - - sprintf ( OUT->name[IN->nseq], "cons"); - for ( max_score_aln=0,score_aln=0,a=0; a< IN->len_aln; a++) - { - OUT->seq_al[IN->nseq][a]=NO_COLOR_RESIDUE; - for ( n_res_in_col=0,b=0; bnseq; b++){n_res_in_col+=(pos[b][a]>0)?1:0;} - for ( max_score_col=0, score_col=0,b=0; b< IN->nseq; b++) - { - OUT->seq_al[b][a]=NO_COLOR_RESIDUE; - s1=IN->order[b][0]; - r1=pos[b][a]; - if (r1<=0)continue; - else - { - max_score_r =tot_extended_weight[s1][r1]; - score_r=res_extended_weight[s1][r1]; - res=(max_score_r==0 ||n_res_in_col<2 )?NO_COLOR_RESIDUE:((score_r*10)/max_score_r); - (OUT)->seq_al[b][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res, 9)); - max_score_col+=max_score_r; - score_col+=score_r; - max_score_seq[b]+=max_score_r; - score_seq[b]+=score_r; - max_score_aln+=max_score_r; - score_aln+=score_r; - } - res=(max_score_col==0 || n_res_in_col<2)?NO_COLOR_RESIDUE:((score_col*10)/max_score_col); - OUT->seq_al[IN->nseq][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9)); - } - } - IN->score_aln=OUT->score_aln=MIN(100,((max_score_aln==0)?0:((score_aln*100)/max_score_aln))); - for ( a=0; a< OUT->nseq; a++) - { - OUT->score_seq[a]=MIN(100,((max_score_seq[a]==0)?0:((score_seq[a]*100)/max_score_seq[a]))); - } - - - vfree ( score_seq); - vfree ( max_score_seq); - - free_int (tot_extended_weight, -1); - free_int (res_extended_weight, -1); - free_int (pos, -1); - - return OUT; - } -Alignment * non_extended_t_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL) - { - int a,b, c,res, s1, s2, r1, r2; - Alignment *OUT=NULL; - int **pos; - int max_score_r, score_r; - double score_col=0, score_aln=0; - int max_score_col, max_score_aln; - double *max_score_seq, *score_seq; - int local_nseq; - int **tot_non_extended_weight; - int **res_non_extended_weight; - int *l; - CLIST_TYPE *entry=NULL; - int p; - int max_score=0; - - entry=vcalloc (CL->entry_len, CL->el_size); - if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair; } - - OUT=copy_aln (IN, OUT); - pos=aln2pos_simple(IN, IN->nseq); - - - max_score_seq=vcalloc ( IN->nseq, sizeof (double)); - score_seq=vcalloc ( IN->nseq, sizeof (double)); - - tot_non_extended_weight=list2residue_total_weight(CL); - res_non_extended_weight=declare_int ((CL->S)->nseq, (CL->S)->max_len+1); - - for (a=0; a< IN->len_aln; a++) - { - for ( b=0; b< IN->nseq-1; b++) - { - s1=IN->order[b][0]; - r1=pos[b][a]; - for ( c=b+1; c< IN->nseq; c++) - { - s2=IN->order[c][0]; - r2=pos[c][a]; - if ( s1==s2 && !CL->do_self)continue; - else if ( r1<=0 || r2<=0) continue; - else - { - entry[SEQ1]=s1; - entry[SEQ2]=s2; - entry[R1]=r1; - entry[R2]=r2; - if ((l=main_search_in_list_constraint (entry,&p,4,CL))!=NULL) - { - res_non_extended_weight[s1][r1]+=l[WE]; - res_non_extended_weight[s2][r2]+=l[WE]; - } - entry[SEQ1]=s2; - entry[SEQ2]=s1; - entry[R1]=r2; - entry[R2]=r1; - if ((l=main_search_in_list_constraint (entry,&p,4,CL))!=NULL) - { - res_non_extended_weight[s1][r1]+=l[WE]; - res_non_extended_weight[s2][r2]+=l[WE]; - } - max_score=MAX(max_score,res_non_extended_weight[s1][r1]); - max_score=MAX(max_score,res_non_extended_weight[s2][r2]); - - } - } - } - } - - sprintf ( OUT->name[IN->nseq], "cons"); - for ( max_score_aln=0,score_aln=0,a=0; a< IN->len_aln; a++) - { - OUT->seq_al[IN->nseq][a]=NO_COLOR_RESIDUE; - for ( local_nseq=0,b=0; bnseq; b++){local_nseq+=(pos[b][a]>0)?1:0;} - - for ( max_score_col=0, score_col=0,b=0; b< IN->nseq; b++) - { - OUT->seq_al[b][a]=NO_COLOR_RESIDUE; - s1=IN->order[b][0]; - r1=pos[b][a]; - if (r1<=0)continue; - else - { - max_score_r =max_score;/*tot_non_extended_weight[s1][r1];*/ - score_r=res_non_extended_weight[s1][r1]; - res=(max_score_r==0 || local_nseq<2 )?NO_COLOR_RESIDUE:((score_r*10)/max_score_r); - - (OUT)->seq_al[b][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res, 9)); - max_score_col+=max_score_r; - score_col+=score_r; - max_score_seq[b]+=max_score_r; - score_seq[b]+=score_r; - max_score_aln+=max_score_r; - score_aln+=score_r; - } - res=(max_score_col==0 || local_nseq<2)?NO_COLOR_RESIDUE:((score_col*10)/max_score_col); - OUT->seq_al[IN->nseq][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9)); - } - } - IN->score_aln=OUT->score_aln=(max_score_aln==0)?0:((score_aln*100)/max_score_aln); - for ( a=0; a< OUT->nseq; a++) - { - OUT->score_seq[a]=(max_score_seq[a]==0)?0:((score_seq[a]*100)/max_score_seq[a]); - OUT->score_seq[a]=(OUT->score_seq[a]>100)?100:OUT->score_seq[a]; - } - OUT->score_aln=(OUT->score_aln>100)?100:OUT->score_aln; - - vfree ( score_seq); - vfree ( max_score_seq); - - free_int (tot_non_extended_weight, -1); - free_int (res_non_extended_weight, -1); - vfree(entry); - free_int (pos, -1); - - return OUT; - } - - -/*********************************************************************************************/ -/* */ -/* PROFILE/PRofile Functions */ -/* */ -/*********************************************************************************************/ -int channel_profile_profile (int *prf1, int *prf2, Constraint_list *CL); - -Profile_cost_func get_profile_mode_function (char *name, Profile_cost_func func) -{ - int a; - static int nfunc; - static Profile_cost_func *flist; - static char **nlist; - - - - /*The first time: initialize the list of pairwse functions*/ - /*If func==NULL:REturns a pointer to the function associated with a name*/ - /*If name is empty:Prints the name of the function associated with name*/ - - if ( nfunc==0) - { - flist=vcalloc ( 100, sizeof (Pwfunc)); - nlist=declare_char (100, 100); - - flist[nfunc]=cw_profile_profile; - sprintf (nlist[nfunc], "cw_profile_profile"); - nfunc++; - - flist[nfunc]=muscle_profile_profile; - sprintf (nlist[nfunc], "muscle_profile_profile"); - nfunc++; - - flist[nfunc]=channel_profile_profile; - sprintf (nlist[nfunc], "channel_profile_profile"); - nfunc++; - } - - for ( a=0; a0 && r2>0) - { - r1--; - r2--; - - prf1=(Profile1)?(Profile1->P)->count2[r1]:NULL; - prf2=(Profile2)?(Profile2->P)->count2[r2]:NULL; - - if (!prf1) {prf1=dummy; prf1[3]=(CL->S)->seq[s1][r1];} - else if (!prf2){prf2=dummy; prf2[3]=(CL->S)->seq[s2][r2];} - - score=((prf_prf==NULL)?cw_profile_profile:prf_prf) (prf1, prf2, CL); - return score; - } - else - return 0; - } - -int cw_profile_profile_count (int *prf1, int *prf2, Constraint_list *CL) - { - /*see function aln2count2 for prf structure*/ - int a, b, n; - int res1, res2; - double score=0; - - - for ( n=0,a=3; aM[res1-'A'][res2-'A']; - n+=prf1[a+1]*prf2[b+1]; - } - - - score=(score*SCORE_K)/n; - return score; - } -int muscle_profile_profile (int *prf1, int *prf2, Constraint_list *CL) - { - /*see function aln2count2 for prf structure*/ - int a, b; - int res1, res2; - double score=0, fg1, fg2, fi, fj, m; - static double *exp_lu; - if (exp_lu==NULL) - { - exp_lu=vcalloc ( 10000, sizeof (double)); - exp_lu+=2000; - for ( a=-1000; a<1000; a++) - exp_lu[a]=exp((double)a); - } - - - - for (a=3; aM[res1-'A'][res2-'A']);*/ - m=exp_lu[CL->M[res1-'A'][res2-'A']]; - score+=m*fi*fj; - } - } - - fg1=(double)prf1[2]/100; - fg2=(double)prf2[2]/100; - score=(score==0)?0:log(score)*(1-fg1)*(1-fg2); - score=(score*SCORE_K); - /*if ( score<-100)fprintf ( stderr, "\nSCORE %d %d", (int)score, cw_profile_profile(prf1, prf2, CL));*/ - - return (int)score; - } -int cw_profile_profile (int *prf1, int *prf2, Constraint_list *CL) - { - /*see function aln2count2 for prf structure*/ - int a, b, n,p; - int res1, res2; - double score=0; - - - for ( n=0,a=3; aM[res1-'A'][res2-'A']; - } - score=(score*SCORE_K)/((double)(n==0)?1:n); - return score; - } -int cw_profile_profile_old (int *prf1, int *prf2, Constraint_list *CL) - { - /*see function aln2count2 for prf structure*/ - int a, b, n,p; - int res1, res2; - double score=0; - - - - for ( n=0,a=3; aM[res1-'A'][res2-'A']; - } - score=(score*SCORE_K)/((double)(n==0)?1:n); - return score; - } -int channel_profile_profile ( int *prf1, int *prf2, Constraint_list *CL) -{ - - int score=0; - - prf1+=prf1[1]; - prf2+=prf2[1]; - - - if (prf1[0]!=prf1[0]){fprintf ( stderr, "\nERROR: Inconsistent number of channels [channel_profile_profile::FATAL%s]", PROGRAM);} - else - { - int a, n; - for (a=1, n=0; a<=prf1[0]; a++) - { - if (prf1[a]>0 && prf2[a]>0) - { - n++;score+=CL->M[prf1[a]-'A'][prf2[a]-'A']; - - } - } - - if ( n==0)return 0; - - score=(n==0)?0:(score*SCORE_K)/n; - - } - return score; -} - -/*********************************************************************************************/ -/* */ -/* FUNCTIONS FOR GETING THE COST : (Sequences) ->evaluate_residue_pair */ -/* */ -/*********************************************************************************************/ -int evaluate_blast_profile_score (Constraint_list *CL, int s1, int r1, int s2, int r2) -{ - Alignment *PRF1; - Alignment *PRF2; - - - PRF1=(Alignment*)atop(seq2T_value (CL->S, s1, "A", "_RB_")); - PRF2=(Alignment*)atop(seq2T_value (CL->S, s2, "A", "_RB_")); - - return generic_evaluate_profile_score (CL,PRF1,s1, r1, PRF2,s2, r2, CL->profile_mode); -} - -int evaluate_aln_profile_score (Constraint_list *CL, int s1, int r1, int s2, int r2) -{ - - return generic_evaluate_profile_score (CL,seq2R_template_profile((CL->S),s1),s1, r1, seq2R_template_profile(CL->S,s2),s2, r2, CL->profile_mode); -} - - -int evaluate_profile_score (Constraint_list *CL,Alignment *Prf1, int s1, int r1, Alignment *Prf2,int s2, int r2) -{ - - return generic_evaluate_profile_score (CL, Prf1, s1,r1,Prf2, s2,r2,CL->profile_mode); -} - -int evaluate_cdna_matrix_score (Constraint_list *CL, int s1, int r1, int s2, int r2) - { - char a1, a2; - - if (r1>0 && r2>0) - { - r1--; - r2--; - - a1=translate_dna_codon((CL->S)->seq[s1]+r1,'x'); - a2=translate_dna_codon((CL->S)->seq[s2]+r2,'x'); - - - - if (a1=='x' || a2=='x')return 0; - else return CL->M[a1-'A'][a2-'A']*SCORE_K; - } - else - { - return 0; - } - } -int evaluate_physico_score ( Constraint_list *CL, int s1, int r1, int s2, int r2) -{ - int a, b, p; - double tot; - static float **prop_table; - static int n_prop; - static double max; - if (r1<0 || r2<0)return 0; - if ( !prop_table) - { - prop_table= initialise_aa_physico_chemical_property_table(&n_prop); - for (p=0; p< n_prop; p++)max+=100; - max=sqrt(max); - } - a=tolower (( CL->S)->seq[s1][r1]); - b=tolower (( CL->S)->seq[s2][r2]); - - for (tot=0,p=0; p< n_prop; p++) - { - tot+=(double)(prop_table[p][a]-prop_table[p][b])*(prop_table[p][a]-prop_table[p][b]); - } - - tot=(sqrt(tot)/max)*10; - - return (int) tot*SCORE_K; -} - - - -int evaluate_diaa_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2) - { - - static int ****m; - static int *alp; - - if (m==NULL) - { - FILE *fp; - char k1[2], k2[2]; - int v1, v2, c; - char *buf=NULL; - int a; - - m=declare_arrayN(4, sizeof (int), 26, 26, 26, 26); - fp=vfopen ("diaa_mat.mat", "r"); - while ((c=fgetc (fp))!=EOF) - { - - ungetc (c, fp); - buf=vfgets(buf, fp); - - if (c=='#'); - else - { - sscanf (buf, "%s %s %d %d", k1, k2, &v1, &v2); - - m[k1[0]-'a'][k1[1]-'a'][k2[0]-'a'][k2[1]-'a']=v1; - m[k2[0]-'a'][k2[1]-'a'][k1[0]-'a'][k1[1]-'a']=v1; - } - } - vfclose (fp); - alp=vcalloc (256, sizeof (int)); - for (a=0; a<26; a++)alp[a+'a']=1; - alp['b']=0; - alp['j']=0; - alp['o']=0; - alp['u']=0; - alp['x']=0; - alp['z']=0; - } - - - if (r1>0 && r2>0) - { - int s=0, n=0; - char aa1, aa2, aa3, aa4, u; - - r1--; - r2--; - - if (r1>0 && r2>0) - { - aa1=tolower((CL->S)->seq[s1][r1-1]); - aa2=tolower((CL->S)->seq[s1][r1]); - aa3=tolower((CL->S)->seq[s2][r2-1]); - aa4=tolower((CL->S)->seq[s2][r2]); - u=alp[(int)aa1];u+=alp[(int)aa2];u+=alp[(int)aa3];u+=alp[(int)aa4]; - if (u==4) - { - s+=m[aa1-'a'][aa2-'a'][aa3-'a'][aa4-'a']; - n++; - } - } - - aa1=tolower((CL->S)->seq[s1][r1]); - aa2=tolower((CL->S)->seq[s1][r1+1]); - aa3=tolower((CL->S)->seq[s2][r2]); - aa4=tolower((CL->S)->seq[s2][r2+1]); - u=alp[(int)aa1];u+=alp[(int)aa2];u+=alp[(int)aa3];u+=alp[(int)aa4]; - if (u==4) - { - s+=m[aa1-'a'][aa2-'a'][aa3-'a'][aa4-'a']; - n++; - } - if (n)return (s*SCORE_K)/n; - else return 0; - } - return 0;} -int evaluate_monoaa_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2) - { - - static int **m; - static int *alp; - - if (m==NULL) - { - FILE *fp; - char k1[2], k2[2]; - int v1, v2, c; - char *buf=NULL; - int a; - - m=declare_arrayN(2, sizeof (int), 26, 26); - fp=vfopen ("monoaa_mat.mat", "r"); - while ((c=fgetc (fp))!=EOF) - { - - ungetc (c, fp); - buf=vfgets(buf, fp); - - if (c=='#'); - else - { - sscanf (buf, "%s %s %d %d", k1, k2, &v1, &v2); - - m[k1[0]-'a'][k2[0]-'a']=v1; - m[k2[0]-'a'][k1[0]-'a']=v1; - } - } - vfclose (fp); - alp=vcalloc (256, sizeof (int)); - for (a=0; a<26; a++)alp[a+'a']=1; - alp['b']=0; - alp['j']=0; - alp['o']=0; - alp['u']=0; - alp['x']=0; - alp['z']=0; - } - - - if (r1>0 && r2>0) - { - int s=0, n=0; - char aa1, aa3, u; - - r1--; - r2--; - - if (r1>0 && r2>0) - { - aa1=tolower((CL->S)->seq[s1][r1]); - aa3=tolower((CL->S)->seq[s2][r2]); - u=alp[(int)aa1];u+=alp[(int)aa3]; - if (u==2) - { - s+=m[aa1-'a'][aa3-'a']; - n++; - } - } - - if (n)return (s*SCORE_K)/n; - else return 0; - } - return 0;} -int evaluate_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2) - { - - if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2)) - { - return evaluate_aln_profile_score ( CL, s1,r1, s2, r2); - } - - if (r1>0 && r2>0) - { - r1--; - r2--; - - return CL->M[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K; - } - else - return 0; - } -int *get_curvature ( int s1, Constraint_list *CL); -int evaluate_curvature_score( Constraint_list *CL, int s1, int r1, int s2, int r2) -{ - static int **st; - int score; - CL->gop=0; - CL->gep=0; - - if (!st) st= vcalloc ((CL->S)->nseq, sizeof (char*)); - if (!st[s1]) - { - st[s1]=get_curvature (s1, CL); - } - if (!st[s2]) - { - st[s2]=get_curvature (s2, CL); - } - - if (r1>0 && r2>0) - { - char p1, p2; - - r1--; - r2--; - - p1=st[s1][r1]; - p2=st[s2][r2]; - - score=p1-p2; - score=FABS(score); - score=20-score; - //HERE ("%d", score); - //if (score<0)HERE ("%d", score); - return score; - } - else - { - return 0; - } - -} -int *get_curvature ( int s1, Constraint_list *CL) -{ - int *array, n=0, a; - char c; - FILE *fp; - char name [1000], b1[100], b2[100]; - float f; - sprintf ( name, "%s.curvature", (CL->S)->name[s1]); - array=vcalloc (strlen ((CL->S)->seq[s1]), sizeof (int)); - fp=vfopen ( name, "r"); - while ( fscanf (fp, "%s %d %c %f\n",b1, &a, &c,&f )==4) - { - if ( c!=(CL->S)->seq[s1][n]){HERE ("ERROR: %c %c", c,(CL->S)->seq[s1][n] );exit (0);} - else array[n++]=(int)(float)100*(float)f; - } - vfclose (fp); - return array; -} -int evaluate_tm_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2) -{ - static char **st; - static int **tmat; - - - if (!tmat) - { - tmat=read_matrice ("blosum62mt"); - } - - if (!st) st= vcalloc ((CL->S)->nseq, sizeof (char*)); - if (!st[s1])st[s1]=seq2T_template_string((CL->S),s1); - if (!st[s2])st[s2]=seq2T_template_string((CL->S),s2); - - - - if (r1>0 && r2>0) - { - int p1, p2; - - r1--; - r2--; - p1=p2=-1; - - if (st[s1])p1=tolower (st[s1][r1]); - if (st[s2])p2=tolower (st[s2][r2]); - - if ( p1=='h' && p2=='h')return tmat[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K; - //else if (p1=='h' || p2=='h' ) return -100*SCORE_K; - else return CL->M[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K; - - } - else - { - return 0; - } -} -int evaluate_ssp_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2) -{ - static char **st; - static int **alpha, **beta, **coil; - - - if (!alpha) - { - beta=read_matrice ("beta_mat"); - alpha=read_matrice ("alpha_mat"); - coil=read_matrice ("coil_mat"); - } - - if (!st) st= vcalloc ((CL->S)->nseq, sizeof (char*)); - if (!st[s1])st[s1]=seq2E_template_string((CL->S),s1); - if (!st[s2])st[s2]=seq2E_template_string((CL->S),s2); - - - if ( !st[s1])HERE ("1******"); - if ( !st[s2])HERE ("2******"); - - if (r1>0 && r2>0) - { - char p1, p2; - float F; - - int score=0; - p1=p2=-1; - r1--; - r2--; - - if (st[s1])p1=tolower (st[s1][r1]); - if (st[s2])p2=tolower (st[s2][r2]); - - if (p1!=-1 && p1==p2)F=1.3; - else F=1; - - - score= CL->M[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*F*SCORE_K; - - - return score; - } - - else - { - return 0; - } -} - - -int evaluate_combined_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2) - { - /* - function documentation: start - - int evaluate_matrix_score ( Constraint_list *CL, int s1, int s2, int r1, int r2) - - this function evaluates the score for matching residue S1(r1) wit residue S2(r2) - using Matrix CL->M; - - function documentation: end - */ - - if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2)) - { - return evaluate_aln_profile_score ( CL, s1,r1, s2, r2); - } - - if (r1>0 && r2>0) - { - r1--; - r2--; - if (r1==0 || r2==0)return CL->M[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K; - else - { - int A1, A2, B1, B2, a2, b2; - int score; - - A1=toupper((CL->S)->seq[s1][r1-1]); - A2=toupper((CL->S)->seq[s1][r1]); - B1=toupper((CL->S)->seq[s2][r2-1]); - B2=toupper((CL->S)->seq[s2][r2]); - - a2=tolower(A2); - b2=tolower(B2); - A1-='A';A2-='A';B1-='A'; B2-='A';a2-='A';b2-='A'; - - score=CL->M[a2][b2]-FABS((CL->M[A1][A2])-(CL->M[B1][B2])); - score*=SCORE_K; - return score; - } - } - else - return 0; - } - -int residue_pair_non_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) - { - - /* - This is the generic Function->works with everything - - int residue_pair_non_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2, int field ); - - Computes the non extended score for aligning residue seq1(r1) Vs seq2(r2) - - This function can compare a sequence with itself. - - Associated functions: See util constraint list, list extention functions. - function documentation: end - */ - - int p; - - - static int *entry; - int *r; - int field; - - field = CL->weight_field; - - - if ( r1<=0 || r2<=0)return 0; - else if ( !CL->extend_jit) - { - if ( !entry) entry=vcalloc (LIST_N_FIELDS , sizeof (int)); - entry[SEQ1]=s1; - entry[SEQ2]=s2; - entry[R1]=r1; - entry[R2]=r2; - if ( r1==r2 && s1==s2) return UNDEFINED; - r=main_search_in_list_constraint( entry,&p,4,CL); - if (r==NULL)return 0; - else return r[field]*SCORE_K; - } - else - return UNDEFINED;/*ERROR*/ - - - } - - - -int residue_pair_extended_list_mixt (Constraint_list *CL, int s1, int r1, int s2, int r2 ) - { - int score=0; - - score+= residue_pair_extended_list_quadruplet(CL, s1, r1, s2, r2); - score+= residue_pair_extended_list (CL, s1, r1, s2, r2); - - return score*SCORE_K; - } - -int residue_pair_extended_list_quadruplet (Constraint_list *CL, int s1, int r1, int s2, int r2 ) - { - double score=0; - - int t_s, t_r, t_w, q_s, q_r, q_w; - int a, b; - static int **hasch; - - int field; - /* This measure the quadruplets cost on a pair of residues*/ - - - - field=CL->weight_field; - - if ( r1<=0 || r2<=0)return 0; - if ( !hasch) - { - hasch=vcalloc ( (CL->S)->nseq, sizeof (int*)); - for ( a=0; a< (CL->S)->nseq; a++)hasch[a]=vcalloc ( (CL->S)->len[a]+1, sizeof (int)); - } - - CL=index_res_constraint_list ( CL, field); - hasch[s1][r1]=100000; - for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) - { - t_s=CL->residue_index[s1][r1][a]; - t_r=CL->residue_index[s1][r1][a+1]; - t_w=CL->residue_index[s1][r1][a+2]; - if ( CL->seq_for_quadruplet[t_s]) - { - for ( b=1; bresidue_index[t_s][t_r][0]; b+=3) - { - q_s=CL->residue_index[t_s][t_r][b]; - q_r=CL->residue_index[t_s][t_r][b+1]; - q_w=CL->residue_index[t_s][t_r][b+2]; - if (CL-> seq_for_quadruplet[q_s]) - hasch[q_s][q_r]=MIN(q_w,t_w); - - } - } - } - - - for (a=1; a< CL->residue_index[s2][r2][0]; a+=3) - { - t_s=CL->residue_index[s2][r2][a]; - t_r=CL->residue_index[s2][r2][a+1]; - t_w=CL->residue_index[s2][r2][a+2]; - if ( CL->seq_for_quadruplet[t_s]) - { - for ( b=1; bresidue_index[t_s][t_r][0]; b+=3) - { - q_s=CL->residue_index[t_s][t_r][b]; - q_r=CL->residue_index[t_s][t_r][b+1]; - q_w=CL->residue_index[t_s][t_r][b+2]; - if (hasch[q_s][q_r] && CL->seq_for_quadruplet[q_s]) - score+=MIN(hasch[q_s][q_r],MIN(q_w,t_w)); - } - } - } - - score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score; - - for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) - { - t_s=CL->residue_index[s1][r1][a]; - t_r=CL->residue_index[s1][r1][a+1]; - t_w=CL->residue_index[s1][r1][a+2]; - if ( CL->seq_for_quadruplet[t_s]) - { - for ( b=1; bresidue_index[t_s][t_r][0]; b+=3) - { - q_s=CL->residue_index[t_s][t_r][b]; - q_r=CL->residue_index[t_s][t_r][b+1]; - hasch[q_s][q_r]=0; - } - } - } - - return (int)(score*SCORE_K); - } - - -Constraint_list * R_extension ( Constraint_list *CL, Constraint_list *R); -int residue_pair_extended_list4rna4 (Constraint_list *CL,int s1, int r1, int s2, int r2 ) -{ - static int rna_lib; - - if (!rna_lib) - { - sprintf ( CL->rna_lib, "%s", seq2rna_lib (CL->S, NULL)); - rna_lib=1; - } - return residue_pair_extended_list4rna2 (CL, s1, r1, s2,r2); -} -int residue_pair_extended_list4rna1 (Constraint_list *CL, int s1, int r1, int s2, int r2 ) -{ - static Constraint_list *R; - if (!R)R=read_rna_lib (CL->S, CL->rna_lib); - return residue_pair_extended_list4rna (CL, R, s1, r1, s2, r2); -} - -int residue_pair_extended_list4rna3 (Constraint_list *CL,int s1, int r1, int s2, int r2 ) -{ - static Constraint_list *R; - if (!R) - { - R=read_rna_lib (CL->S, CL->rna_lib); - rna_lib_extension (CL,R); - } - return residue_pair_extended_list (CL, s1,r1, s2,r2); -} - -int residue_pair_extended_list4rna2 (Constraint_list *CL,int s1, int r1, int s2, int r2 ) -{ - static Constraint_list *R; - - - if (!R) - { - - R=read_rna_lib (CL->S, CL->rna_lib); - rna_lib_extension (CL,R); - - } - - return residue_pair_extended_list4rna (CL, R, s1, r1, s2, r2); -} -int residue_pair_extended_list4rna ( Constraint_list *CL,Constraint_list *R, int s1, int r1, int s2, int r2 ) -{ - - int a, b, n1, n2; - int list1[100]; - int list2[100]; - int score=0, score2; - - - - if ( r1<0 || r2<0)return 0; - n1=n2=0; - - list1[n1++]=r1; - for (a=1; aresidue_index[s1][r1][0]; a+=3) - { - list1[n1++]=R->residue_index[s1][r1][a+1]; - } - - - list2[n2++]=r2; - for (a=1; aresidue_index[s2][r2][0]; a+=3) - { - list2[n2++]=R->residue_index[s2][r2][a+1]; - } - - - score=residue_pair_extended_list ( CL, s1,list1[0], s2,list2[0]); - - for (score2=0,a=1; arna_lib, &n); - - R=declare_constraint_list ( CL->S,NULL, NULL, 0,NULL, NULL); - - for (a=0; a< n; a++) - { - R=read_constraint_list_file (R, list[a]); - } - R=index_res_constraint_list (R, CL->weight_field); - - } - - if ( r1<0 || r2<0)return 0; - n1=n2=0; - - list1[n1++]=r1; - for (a=1; aresidue_index[s1][r1][0]; a+=3) - { - list1[n1++]=R->residue_index[s1][r1][a+1]; - } - - - list2[n2++]=r2; - for (a=1; aresidue_index[s2][r2][0]; a+=3) - { - list2[n2++]=R->residue_index[s2][r2][a+1]; - } - - - score=residue_pair_extended_list ( CL, s1,list1[0], s2,list2[0]); - - for (score2=0,a=1; aweight_field; - - if ( r1<=0 || r2<=0)return 0; - if ( !hasch || max_len!=(CL->S)->max_len) - { - max_len=(CL->S)->max_len; - if ( hasch) free_int ( hasch, -1); - hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); - } - - CL=index_res_constraint_list ( CL, field); - - /* Check matches for R1 in the indexed lib*/ - hasch[s1][r1]=FORBIDEN; - for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) - { - t_s=CL->residue_index[s1][r1][a]; - t_r=CL->residue_index[s1][r1][a+1]; - hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2]; - } - - /*Check Matches for r1 <-> r2 in the indexed lib */ - for (a=1; a< CL->residue_index[s2][r2][0]; a+=3) - { - t_s=CL->residue_index[s2][r2][a]; - t_r=CL->residue_index[s2][r2][a+1]; - - - if (hasch[t_s][t_r]) - { - if (hasch[t_s][t_r]==FORBIDEN) - { - score+=CL->residue_index[s2][r2][a+2]; - } - else - { - delta=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+2]); - score+=delta; - } - } - } - - clean_residue_pair_hasch ( s1, r1,s2, r2, hasch, CL); - return score; - } -int residue_pair_extended_list_pc ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) - { - double score=0; - - - - int a, t_s, t_r; - static int **hasch; - static int max_len; - int field; - double delta; - - /* - - function documentation: start - - int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2); - - Computes the extended score for aligning residue seq1(r1) Vs seq2(r2) - Computes: matrix_score - non extended score - extended score - - The extended score depends on the function index_res_constraint_list. - This function can compare a sequence with itself. - - Associated functions: See util constraint list, list extention functions. - - function documentation: end - */ - - field=CL->weight_field; - - if ( r1<=0 || r2<=0)return 0; - if ( !hasch || max_len!=(CL->S)->max_len) - { - max_len=(CL->S)->max_len; - if ( hasch) free_int ( hasch, -1); - hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); - } - - CL=index_res_constraint_list ( CL, field); - - /* Check matches for R1 in the indexed lib*/ - hasch[s1][r1]=FORBIDEN; - for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) - { - t_s=CL->residue_index[s1][r1][a]; - t_r=CL->residue_index[s1][r1][a+1]; - hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2]; - } - - /*Check Matches for r1 <-> r2 in the indexed lib */ - for (a=1; a< CL->residue_index[s2][r2][0]; a+=3) - { - t_s=CL->residue_index[s2][r2][a]; - t_r=CL->residue_index[s2][r2][a+1]; - - - if (hasch[t_s][t_r]) - { - if (hasch[t_s][t_r]==FORBIDEN) - { - score+=((float)CL->residue_index[s2][r2][a+2]/NORM_F); - } - else - { - //delta=((float)hasch[t_s][t_r]/NORM_F)*((float)CL->residue_index[s2][r2][a+2]/NORM_F); - delta=MIN((((float)hasch[t_s][t_r]/NORM_F)),(((float)CL->residue_index[s2][r2][a+2]/NORM_F))); - score+=delta; - } - } - } - - clean_residue_pair_hasch ( s1, r1,s2, r2, hasch, CL); - score/=(CL->S)->nseq; - return score*NORM_F; - } - - -int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) - { - double score=0; - double max_score=0; - double max_val=0; - - int a, t_s, t_r; - static int **hasch; - static int max_len; - int field; - - /* - new function: self normalized - function documentation: start - - int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2); - - Computes the extended score for aligning residue seq1(r1) Vs seq2(r2) - Computes: matrix_score - non extended score - extended score - - The extended score depends on the function index_res_constraint_list. - This function can compare a sequence with itself. - - Associated functions: See util constraint list, list extention functions. - - function documentation: end - */ - - field=CL->weight_field; - - if ( r1<=0 || r2<=0)return 0; - if ( !hasch || max_len!=(CL->S)->max_len) - { - max_len=(CL->S)->max_len; - if ( hasch) free_int ( hasch, -1); - hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); - } - - CL=index_res_constraint_list ( CL, field); - - /* Check matches for R1 in the indexed lib*/ - hasch[s1][r1]=FORBIDEN; - for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) - { - t_s=CL->residue_index[s1][r1][a]; - t_r=CL->residue_index[s1][r1][a+1]; - hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2]; - max_score+=CL->residue_index[s1][r1][a+2]; - } - - /*Check Matches for r1 <-> r2 in the indexed lib */ - for (a=1; a< CL->residue_index[s2][r2][0]; a+=3) - { - t_s=CL->residue_index[s2][r2][a]; - t_r=CL->residue_index[s2][r2][a+1]; - - - if (hasch[t_s][t_r]) - { - if (hasch[t_s][t_r]==FORBIDEN) - { - score+=CL->residue_index[s2][r2][a+2]; - max_score+=CL->residue_index[s2][r2][a+2]; - } - else - { - double delta; - delta=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+2]); - - score+=delta; - max_score-=hasch[t_s][t_r]; - max_score+=delta; - max_val=MAX(max_val,delta); - } - } - else - { - max_score+=CL->residue_index[s2][r2][a+2]; - } - } - - max_score-=hasch[s2][r2]; - clean_residue_pair_hasch ( s1, r1,s2, r2, hasch, CL); - - - if ( max_score==0)score=0; - else if ( CL->normalise) - { - score=((score*CL->normalise)/max_score)*SCORE_K; - if (max_val> CL->normalise) - { - score*=max_val/(double)CL->normalise; - } - } - return (int) score; - } -int ** clean_residue_pair_hasch (int s1, int r1, int s2, int r2,int **hasch, Constraint_list *CL) - { - int a, t_s, t_r; - if ( !hasch) return hasch; - - for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) - { - t_s=CL->residue_index[s1][r1][a]; - t_r=CL->residue_index[s1][r1][a+1]; - hasch[t_s][t_r]=0; - } - hasch[s1][r1]=hasch[s2][r2]=0; - return hasch; - } -int residue_pair_extended_list_old ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) - { - double score=0; - - int a, t_s, t_r; - static int **hasch; - static int max_len; - - int field; - /* - function documentation: start - - int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2); - - Computes the extended score for aligning residue seq1(r1) Vs seq2(r2) - Computes: matrix_score - non extended score - extended score - - The extended score depends on the function index_res_constraint_list. - This function can compare a sequence with itself. - - Associated functions: See util constraint list, list extention functions. - - function documentation: end - */ - - - - field=CL->weight_field; - - if ( r1<=0 || r2<=0)return 0; - if ( !hasch || max_len!=(CL->S)->max_len) - { - max_len=(CL->S)->max_len; - if ( hasch) free_int ( hasch, -1); - hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); - } - - CL=index_res_constraint_list ( CL, field); - - hasch[s1][r1]=100000; - for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) - { - t_s=CL->residue_index[s1][r1][a]; - t_r=CL->residue_index[s1][r1][a+1]; - hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2]; - } - - - for (a=1; a< CL->residue_index[s2][r2][0]; a+=3) - { - t_s=CL->residue_index[s2][r2][a]; - t_r=CL->residue_index[s2][r2][a+1]; - if (hasch[t_s][t_r]) - { - if (field==WE) - { - - score+=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+2]); - } - } - } - - - score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score; - - - for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) - { - t_s=CL->residue_index[s1][r1][a]; - t_r=CL->residue_index[s1][r1][a+1]; - hasch[t_s][t_r]=0; - } - hasch[s1][r1]=hasch[s2][r2]=0; - - - return (int)(score*SCORE_K); - } -int residue_pair_test_function ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) - { - double score=0; - - int a, t_s, t_r; - static int **hasch; - static int max_len; - int cons1; - int cons2; - - int field; - /* - function documentation: start - - int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2); - - Computes the extended score for aligning residue seq1(r1) Vs seq2(r2) - Computes: matrix_score - non extended score - extended score - - The extended score depends on the function index_res_constraint_list. - This function can compare a sequence with itself. - - Associated functions: See util constraint list, list extention functions. - - function documentation: end - */ - - - CL->weight_field=WE; - field=CL->weight_field; - - - if ( r1<=0 || r2<=0)return 0; - if ( !hasch || max_len!=(CL->S)->max_len) - { - max_len=(CL->S)->max_len; - if ( hasch) free_int ( hasch, -1); - hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); - } - - CL=index_res_constraint_list ( CL, field); - - hasch[s1][r1]=1000; - for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) - { - t_s=CL->residue_index[s1][r1][a]; - t_r=CL->residue_index[s1][r1][a+1]; - hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2]; - } - - - for (a=1; a< CL->residue_index[s2][r2][0]; a+=3) - { - t_s=CL->residue_index[s2][r2][a]; - t_r=CL->residue_index[s2][r2][a+1]; - if (hasch[t_s][t_r]) - { - cons1=hasch[t_s][t_r]; - cons2=CL->residue_index[s2][r2][a+2]; - score +=MIN(cons1,cons2); - } - } - - - score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score; - - for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) - { - t_s=CL->residue_index[s1][r1][a]; - t_r=CL->residue_index[s1][r1][a+1]; - hasch[t_s][t_r]=0; - } - hasch[s1][r1]=hasch[s2][r2]=0; - - - return (int)(score*SCORE_K); - } - -int residue_pair_relative_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) - { - int a, t_s, t_r; - static int **hasch; - static int max_len; - int score=0; - int total_score=0; - int field; - /* - function documentation: start - - int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2); - - Computes the extended score for aligning residue seq1(r1) Vs seq2(r2) - Computes: matrix_score - non extended score - extended score - - The extended score depends on the function index_res_constraint_list. - This function can compare a sequence with itself. - - Associated functions: See util constraint list, list extention functions. - - function documentation: end - */ - - - - field=CL->weight_field; - - if ( r1<=0 || r2<=0)return 0; - if ( !hasch || max_len!=(CL->S)->max_len) - { - max_len=(CL->S)->max_len; - if ( hasch) free_int ( hasch, -1); - hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); - } - - CL=index_res_constraint_list ( CL, field); - - hasch[s1][r1]=100000; - for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) - { - t_s=CL->residue_index[s1][r1][a]; - t_r=CL->residue_index[s1][r1][a+1]; - hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2]; - total_score+=CL->residue_index[s1][r1][a+2]; - } - - - for (a=1; a< CL->residue_index[s2][r2][0]; a+=3) - { - t_s=CL->residue_index[s2][r2][a]; - t_r=CL->residue_index[s2][r2][a+1]; - total_score+=CL->residue_index[s1][r1][a+2]; - if (hasch[t_s][t_r]) - { - if (field==WE){score+=2*MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+2]);} - } - } - - score=((CL->normalise*score)/total_score); - - - for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) - { - t_s=CL->residue_index[s1][r1][a]; - t_r=CL->residue_index[s1][r1][a+1]; - hasch[t_s][t_r]=0; - } - hasch[s1][r1]=hasch[s2][r2]=0; - - return score*SCORE_K; - } -int residue_pair_extended_list_g_coffee_quadruplet ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) -{ - int t_s, t_r, t_w, q_s, q_r, q_w; - int a, b; - static int **hasch; - int score=0, s=0; - - int field; - /* This measure the quadruplets cost on a pair of residues*/ - - field=CL->weight_field; - - if ( r1<=0 || r2<=0)return 0; - if ( !hasch) - { - hasch=vcalloc ( (CL->S)->nseq, sizeof (int*)); - for ( a=0; a< (CL->S)->nseq; a++)hasch[a]=vcalloc ( (CL->S)->len[a]+1, sizeof (int)); - } - - CL=index_res_constraint_list ( CL, field); - hasch[s1][r1]=100000; - for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) - { - t_s=CL->residue_index[s1][r1][a]; - t_r=CL->residue_index[s1][r1][a+1]; - t_w=CL->residue_index[s1][r1][a+2]; - if ( CL->seq_for_quadruplet[t_s]) - { - for ( b=1; bresidue_index[t_s][t_r][0]; b+=3) - { - q_s=CL->residue_index[t_s][t_r][b]; - q_r=CL->residue_index[t_s][t_r][b+1]; - if (CL-> seq_for_quadruplet[q_s]) - { - - hasch[q_s][q_r]=MIN(CL->residue_index[t_s][t_r][b+2],t_w); - } - } - } - } - - - for (s=0,score=0,a=1; a< CL->residue_index[s2][r2][0]; a+=3) - { - t_s=CL->residue_index[s2][r2][a]; - t_r=CL->residue_index[s2][r2][a+1]; - t_w=CL->residue_index[s2][r2][a+2]; - if ( CL->seq_for_quadruplet[t_s]) - { - for ( b=1; bresidue_index[t_s][t_r][0]; b+=3) - { - q_s=CL->residue_index[t_s][t_r][b]; - q_r=CL->residue_index[t_s][t_r][b+1]; - q_w=CL->residue_index[t_s][t_r][b+2]; - if (hasch[q_s][q_r] && CL->seq_for_quadruplet[q_s]) - s=MIN(hasch[q_s][q_r],MIN(CL->residue_index[t_s][t_r][b+2],q_w)); - score=MAX(score, s); - } - } - } - - for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) - { - t_s=CL->residue_index[s1][r1][a]; - t_r=CL->residue_index[s1][r1][a+1]; - t_w=CL->residue_index[s1][r1][a+2]; - if ( CL->seq_for_quadruplet[t_s]) - { - for ( b=1; bresidue_index[t_s][t_r][0]; b+=3) - { - q_s=CL->residue_index[t_s][t_r][b]; - q_r=CL->residue_index[t_s][t_r][b+1]; - hasch[q_s][q_r]=0; - } - } - } - - return score*SCORE_K; - } -int residue_pair_extended_list_g_coffee ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) - { - int a, t_s, t_r; - static int **hasch; - int score=0,s; - - int field; - /* - function documentation: start - - int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2); - - Computes the extended score for aligning residue seq1(r1) Vs seq2(r2) - Computes: matrix_score - non extended score - extended score - - The extended score depends on the function index_res_constraint_list. - This function can compare a sequence with itself. - - Associated functions: See util constraint list, list extention functions. - - function documentation: end - */ - - field=CL->weight_field; - - if ( r1<=0 || r2<=0)return 0; - if ( !hasch) - { - hasch=vcalloc ( (CL->S)->nseq, sizeof (int*)); - for ( a=0; a< (CL->S)->nseq; a++)hasch[a]=vcalloc ( (CL->S)->len[a]+1, sizeof (int)); - } - - CL=index_res_constraint_list ( CL, field); - - hasch[s1][r1]=100000; - for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) - { - t_s=CL->residue_index[s1][r1][a]; - t_r=CL->residue_index[s1][r1][a+1]; - hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2]; - } - - - for (s=0, score=0,a=1; a< CL->residue_index[s2][r2][0]; a+=3) - { - t_s=CL->residue_index[s2][r2][a]; - t_r=CL->residue_index[s2][r2][a+1]; - - if (hasch[t_s][t_r]) - { - if (field==WE) - {s=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+2]); - score=MAX(s,score); - } - } - } - - for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) - { - t_s=CL->residue_index[s1][r1][a]; - t_r=CL->residue_index[s1][r1][a+1]; - hasch[t_s][t_r]=0; - } - hasch[s1][r1]=hasch[s2][r2]=0; - - return score*SCORE_K; - } - -int extend_residue_pair ( Constraint_list *CL, int s1, int r1, int s2, int r2) - { - double score=0; - - int a, t_s, t_r, p; - static int **hasch; - - static int *entry; - int *r; - int field; - - - - /* - This is the generic Function->works with everything - should be gradually phased out - - - int extend_residue_pair ( Constraint_list *CL, int s1, int r1, int s2, int r2, int field ) - - Computes the extended score for aligning residue seq1(r1) Vs seq2(r2) - Computes: matrix_score - non extended score - extended score - - The extended score depends on the function index_res_constraint_list. - This function can compare a sequence with itself. - - Associated functions: See util constraint list, list extention functions. - function documentation: end - */ - - - field=CL->weight_field; - - if ( r1<=0 || r2<=0)return 0; - else if ( !CL->L && CL->M) - { - return evaluate_matrix_score (CL, s1,r1, s2, r2); - } - - else if ( !CL->extend_jit) - { - if ( !entry) entry=vcalloc (LIST_N_FIELDS , sizeof (int)); - entry[SEQ1]=s1; - entry[SEQ2]=s2; - entry[R1]=r1; - entry[R2]=r2; - r=main_search_in_list_constraint( entry,&p,4,CL); - if (r==NULL)return 0; - else return r[field]; - } - else - { - if ( !hasch) - { - hasch=vcalloc ( (CL->S)->nseq, sizeof (int*)); - for ( a=0; a< (CL->S)->nseq; a++)hasch[a]=vcalloc ( (CL->S)->len[a]+1, sizeof (int)); - } - - CL=index_res_constraint_list ( CL, field); - - hasch[s1][r1]=100000; - for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) - { - t_s=CL->residue_index[s1][r1][a]; - t_r=CL->residue_index[s1][r1][a+1]; - hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2]; - } - - - for (a=1; a< CL->residue_index[s2][r2][0]; a+=3) - { - t_s=CL->residue_index[s2][r2][a]; - t_r=CL->residue_index[s2][r2][a+1]; - if (hasch[t_s][t_r]) - { - if (field==WE)score+=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+2] ); - - } - } - score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score; - for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) - { - t_s=CL->residue_index[s1][r1][a]; - t_r=CL->residue_index[s1][r1][a+1]; - hasch[t_s][t_r]=0; - } - hasch[s1][r1]=hasch[s2][r2]=0; - - return (int)(score*SCORE_K); - } - } -/*********************************************************************************************/ -/* */ -/* FUNCTIONS FOR GETTING THE PW COST : CL->get_dp_cost */ -/* */ -/*********************************************************************************************/ -int get_dp_cost_blosum_matrix (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - int s1, r1, s2, r2; - static int **matrix; - - if (!matrix) matrix=read_matrice ("blosum62mt"); - s1=A->order[list1[0]][0]; - s2=A->order[list2[0]][0]; - r1=pos1[list1[0]][col1]; - r2=pos2[list2[0]][col2]; - - /*dp cost function: works only with two sequences*/ - - if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2)) - return evaluate_aln_profile_score ( CL, s1,r1, s2, r2) -CL->nomatch*SCORE_K; - else if (r1>0 && r2>0) - { - r1--; - r2--; - - - return matrix [(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K -CL->nomatch*SCORE_K; - - } - else - return -CL->nomatch*SCORE_K ; -} -int get_dp_cost_pam_matrix (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - int s1, r1, s2, r2; - static int **matrix; - - if (!matrix) matrix=read_matrice ("pam250mt"); - s1=A->order[list1[0]][0]; - s2=A->order[list2[0]][0]; - r1=pos1[list1[0]][col1]; - r2=pos2[list2[0]][col2]; - - /*dp cost function: works only with two sequences*/ - - - if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2)) - return evaluate_aln_profile_score ( CL, s1,r1, s2, r2) -CL->nomatch*SCORE_K; - else if (r1>0 && r2>0) - { - r1--; - r2--; - - - return matrix [(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K -CL->nomatch*SCORE_K; - - } - else - return -CL->nomatch*SCORE_K ; -} - -int get_dp_cost_pw_matrix (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - int s1, r1, s2, r2; - - s1=A->order[list1[0]][0]; - s2=A->order[list2[0]][0]; - r1=pos1[list1[0]][col1]; - r2=pos2[list2[0]][col2]; - - /*dp cost function: works only with two sequences*/ - if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2)) - return evaluate_aln_profile_score ( CL, s1,r1, s2, r2) -CL->nomatch*SCORE_K; - else if (r1>0 && r2>0) - { - r1--; - r2--; - - - return CL->M[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K -CL->nomatch*SCORE_K; - - } - else - return -CL->nomatch*SCORE_K ; -} - -/*********************************************************************************************/ -/* */ -/* FUNCTIONS FOR GETTING THE COST : CL->get_dp_cost */ -/* */ -/*********************************************************************************************/ - - - -int get_cdna_best_frame_dp_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) - { - int a, b; - int n=4; - int s; - char a1, a2; - static int l1, l2; - static Alignment *B; - static int **score; - - if ( !score)score=declare_int(3, 2); - - if (!A) - { - free_aln(B); - B=NULL; - return UNDEFINED; - } - if (!B) - { - if (ns1+ns2>2){fprintf ( stderr, "\nERROR: get_cdna_dp_cost mode is only for pair-wise ALN [FATAL]\n");crash("");} - free_aln (B); - B=copy_aln (A, NULL); - - l1=(int)strlen ( A->seq_al[list1[0]]); - for ( b=0; bseq_al[list1[0]][b]=translate_dna_codon (A->seq_al[list1[0]]+b, 'x'); - l2=(int)strlen ( A->seq_al[list2[0]]); - for ( b=0; bseq_al[list2[0]][b]=translate_dna_codon (A->seq_al[list2[0]]+b, 'x'); - } - -/*Set the frame*/ - - for ( a=0; a< 3; a++)score[a][0]=score[a][1]=0; - for ( a=col1-(n*3),b=col2-(n*3); a=l1 || b>=l2)continue; - - a1=tolower(B->seq_al[list1[0]][a]); - a2=tolower(B->seq_al[list2[0]][b]); - - score[a%3][0]+=(a1=='x' || a2=='x')?0:CL->M[a1-'A'][a2-'A']; - score[a%3][1]++; - } - - for ( a=0; a< 3; a++)score[a][0]=(score[a][1]>0)?(score[a][0]/score[a][1]):0; - if ( score[0][0]>score[1][0] && score[0][0]>score[2][0]) - s=score[0][0]; - else if ( score[1][0]>score[0][0] && score[1][0]>score[2][0]) - s=score[1][0]; - else s=score[2][0]; - - return s*SCORE_K; - - } - -int get_dp_cost_quadruplet ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) - { - int score; - - - if ( ns1==1 || ns2==1) - score=slow_get_dp_cost ( A, pos1, ns1, list1,col1, pos2, ns2, list2, col2, CL); - else - score=fast_get_dp_cost_quadruplet ( A, pos1, ns1, list1,col1, pos2, ns2, list2, col2, CL); - - return (score==UNDEFINED)?UNDEFINED:(score-SCORE_K*CL->nomatch); - } -int get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) - { - int MODE=0; - int score; - - - - if (A==NULL)return 0; - - if (MODE!=2 || MODE==0 || (!CL->L && CL->M) || (!CL->L && CL->T)|| ns1==1 || ns2==1) - score=slow_get_dp_cost ( A, pos1, ns1, list1,col1, pos2, ns2, list2, col2, CL); - else if (MODE==1 || MODE==2) - score=fast_get_dp_cost ( A, pos1, ns1, list1,col1, pos2, ns2, list2, col2, CL); - else - score=0; - - - - return (score==UNDEFINED)?UNDEFINED:(score-SCORE_K*CL->nomatch); - } -int ***make_cw_lu (int **cons, int l, Constraint_list *CL); -int ***make_cw_lu (int **cons, int l, Constraint_list *CL) -{ - int ***lu; - int p, a,r; - - lu=declare_arrayN(3, sizeof (int),l,26, 2); - for ( p=0; pM[r][cons[p][a]-'A']; - lu[p][r][1]+=cons[p][a+1]; - } - } - } - return lu; -} - -int cw_profile_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - static int last_tag; - static int *pr, ***lu; - int score; - static int *list[2], ns[2], **cons[2], ref; - int eva_col,ref_col, a, p, r; - float t1, t2; - - - - - if (last_tag!=A->random_tag) - { - int n1, n2; - - last_tag=A->random_tag; - list[0]=list1;ns[0]=ns1; - list[1]=list2;ns[1]=ns2; - free_int (cons[0],-1);free_int (cons[1],-1);free_arrayN((void*)lu,3); - cons[0]=NULL;cons[1]=NULL;lu=NULL; - - n1=sub_aln2nseq_prf (A, ns[0], list[0]); - n2=sub_aln2nseq_prf (A, ns[1], list[1]); - if ( n1>1 || n2>1) - { - cons[0]=sub_aln2count_mat2 (A, ns[0], list[0]); - cons[1]=sub_aln2count_mat2 (A, ns[1], list[1]); - ref=(ns[0]>ns[1])?0:1; - lu=make_cw_lu(cons[ref],(int)strlen(A->seq_al[list[ref][0]]), CL); - } - } - - if (!lu) - { - char r1, r2; - r1=A->seq_al[list1[0]][col1]; - r2=A->seq_al[list2[0]][col2]; - if ( r1!='-' && r2!='-') - return CL->M[r1-'A'][r2-'A']*SCORE_K -SCORE_K*CL->nomatch; - else - return -SCORE_K*CL->nomatch; - } - else - { - eva_col= (ref==0)?col2:col1; - ref_col= (ref==0)?col1:col2; - pr=cons[1-ref][eva_col]; - t1=t2=0; - for (a=3; a< pr[1]; a+=3) - { - r=tolower(pr[a]); - p= pr[a+1]; - - t1+=lu[ref_col][r-'a'][0]*p; - t2+=lu[ref_col][r-'a'][1]*p; - } - score=(t2==0)?0:(t1*SCORE_K)/t2; - score -=SCORE_K*CL->nomatch; - return score; - } -} -int cw_profile_get_dp_cost_old ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - static int last_tag; - static int **cons1, **cons2; - int score; - - - if (last_tag!=A->random_tag) - { - last_tag=A->random_tag; - free_int (cons1,-1);free_int (cons2,-1); - cons1=sub_aln2count_mat2 (A, ns1, list1); - cons2=sub_aln2count_mat2 (A, ns2, list2); - } - score=cw_profile_profile (cons1[col1], cons2[col2], CL)-SCORE_K*CL->nomatch; - return score; -} - -int cw_profile_get_dp_cost_window ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - static int last_tag; - static int **cons1, **cons2; - int a, score, window_size=5, n, p1, p2; - - - if (last_tag!=A->random_tag) - { - last_tag=A->random_tag; - free_int (cons1,-1);free_int (cons2,-1); - cons1=sub_aln2count_mat2 (A, ns1, list1); - cons2=sub_aln2count_mat2 (A, ns2, list2); - } - - for (n=0,score=0,a=0; anomatch; - n++; - } - if (n>0)score/=n; - - return score; - } - -int consensus_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) - { - static int last_tag; - static char *seq1, *seq2; - - - /*Works only with matrix*/ - if (last_tag !=A->random_tag) - { - int ls1, ls2, lenls1, lenls2; - - last_tag=A->random_tag; - vfree (seq1);vfree (seq2); - seq1=sub_aln2cons_seq_mat (A, ns1, list1, "blosum62mt"); - seq2=sub_aln2cons_seq_mat (A, ns2, list2, "blosum62mt"); - ls1=list1[ns1-1];ls2=list2[ns2-1]; - lenls1=(int)strlen (A->seq_al[ls1]); lenls2=(int)strlen (A->seq_al[ls2]); - } - - return (CL->M[seq1[col1]-'A'][seq2[col2]-'A']*SCORE_K)-SCORE_K*CL->nomatch; - } - -int fast_get_dp_cost_quadruplet ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) - { - /*WARNING: WORKS ONLY WITH List to Extend*/ - /*That function does a quruple extension beween two columns by pooling the residues together*/ - - double score=0; - - int a,b, c; - int n_gap1=0; - int n_gap2=0; - - int s1, rs1, r1, t_r, t_s,t_w, q_r, q_s, q_w, s2, rs2, r2; - int **buf_pos, buf_ns, *buf_list, buf_col; - - static int **hasch1; - static int **hasch2; - - static int **n_hasch1; - static int **n_hasch2; - - static int **is_in_col1; - static int **is_in_col2; - - - if (ns2>ns1) - { - buf_pos=pos1; - buf_ns=ns1; - buf_list=list1; - buf_col=col1; - - pos1=pos2; - ns1=ns2; - list1=list2; - col1=col2; - - pos2=buf_pos; - ns2=buf_ns; - list2=buf_list; - col2=buf_col; - } - - CL=index_res_constraint_list ( CL, WE); - if ( !hasch1) - { - - hasch1=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); - hasch2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); - n_hasch1=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); - n_hasch2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); - is_in_col1=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); - is_in_col2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); - } - - for ( a=0; a< ns1; a++) - { - rs1= list1[a]; - s1=A->order[rs1][0]; - r1=pos1[rs1][col1]; - - if (r1<0)n_gap1++; - else - { - is_in_col1[s1][r1]=1; - for (b=1; b< CL->residue_index[s1][r1][0]; b+=3) - { - t_s=CL->residue_index[s1][r1][b]; - t_r=CL->residue_index[s1][r1][b+1]; - t_w=CL->residue_index[s1][r1][b+2]; - for ( c=1; c< CL->residue_index[t_s][t_r][0]; c+=3) - { - q_s=CL->residue_index[t_s][t_r][c]; - q_r=CL->residue_index[t_s][t_r][c+1]; - q_w=CL->residue_index[t_s][t_r][c+2]; - hasch1[q_s][q_r]+=MIN(q_w, t_w); - n_hasch1[q_s][q_r]++; - } - } - } - } - - for ( a=0; a< ns2; a++) - { - rs2=list2[a]; - s2=A->order[rs2][0]; - r2=pos2[rs2][col2]; - - if (r2<0)n_gap2++; - else - { - is_in_col2[s2][r2]=1; - for (b=1; b< CL->residue_index[s2][r2][0]; b+=3) - { - t_s=CL->residue_index[s2][r2][b]; - t_r=CL->residue_index[s2][r2][b+1]; - t_w=CL->residue_index[s2][r2][b+2]; - for ( c=1; c< CL->residue_index[t_s][t_r][0]; c+=3) - { - q_s=CL->residue_index[t_s][t_r][c]; - q_r=CL->residue_index[t_s][t_r][c+1]; - q_w=CL->residue_index[t_s][t_r][c+2]; - hasch2[q_s][q_r]+=MIN(t_w, q_w); - n_hasch2[q_s][q_r]++; - } - } - } - } - - - for ( a=0; a< ns2; a++) - { - rs2=list2[a]; - s2=A->order[rs2][0]; - r2=pos1[rs2][col2]; - - if (r2<0); - else - { - for (b=1; b< CL->residue_index[s2][r2][0]; b+=3) - { - t_s=CL->residue_index[s2][r2][b]; - t_r=CL->residue_index[s2][r2][b+1]; - - for ( c=1; c< CL->residue_index[t_s][t_r][0]; c+=3) - { - q_s=CL->residue_index[t_s][t_r][c]; - q_r=CL->residue_index[t_s][t_r][c+1]; - if ( hasch2[q_s][q_r] && hasch1[q_s][q_r]&& !(is_in_col1[q_s][q_r] || is_in_col2[q_s][q_r])) - { - score+=MIN(hasch2[q_s][q_r]*(n_hasch1[q_s][q_r]),hasch1[q_s][q_r]*(n_hasch2[q_s][q_r])); - } - else if ( hasch2[q_s][q_r] && is_in_col1[q_s][q_r]) - { - score+=hasch2[q_s][q_r]*(n_hasch1[q_s][q_r]+1); - } - else if (hasch1[q_s][q_r] && is_in_col2[q_s][q_r]) - { - score+=hasch1[q_s][q_r]*(n_hasch2[q_s][q_r]+1); - } - hasch2[q_s][q_r]=0; - n_hasch2[q_s][q_r]=0; - } - } - hasch2[s2][r2]=0; - is_in_col2[s2][r2]=0; - } - } - - - for ( a=0; a< ns1; a++) - { - rs1= list1[a]; - s1=A->order[rs1][0]; - r1=pos1[rs1][col1]; - - if (r1<0); - else - { - is_in_col1[s1][r1]=0; - hasch1[s1][r1]=0; - for (b=1; b< CL->residue_index[s1][r1][0]; b+=3) - { - t_s=CL->residue_index[s1][r1][b]; - t_r=CL->residue_index[s1][r1][b+1]; - for ( c=1; c< CL->residue_index[t_s][t_r][0]; c+=3) - { - q_s=CL->residue_index[t_s][t_r][c]; - q_r=CL->residue_index[t_s][t_r][c+1]; - hasch1[q_s][q_r]=0; - n_hasch1[q_s][q_r]=0; - } - } - } - } - - - score=(score*SCORE_K)/((ns1-n_gap1)*(ns2-n_gap2)); - score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score; - - return (int)(score-SCORE_K*CL->nomatch); - } - - -int fast_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) - { - /*WARNING: WORKS ONLY WITH List to Extend*/ - - double score=0; - - int a,b; - int n_gap1=0; - int n_gap2=0; - - int s1, rs1, r1, t_r, t_s, s2, rs2, r2; - static int **hasch1; - static int **hasch2; - - static int **n_hasch1; - static int **n_hasch2; - - static int **is_in_col1; - static int **is_in_col2; - - - - CL=index_res_constraint_list ( CL, WE); - if ( !hasch1) - { - - hasch1=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); - hasch2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); - n_hasch1=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); - n_hasch2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); - is_in_col1=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); - is_in_col2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); - } - - for ( a=0; a< ns1; a++) - { - rs1= list1[a]; - s1=A->order[rs1][0]; - r1=pos1[rs1][col1]; - - if (r1<0)n_gap1++; - else - { - is_in_col1[s1][r1]=1; - for (b=1; b< CL->residue_index[s1][r1][0]; b+=3) - { - t_s=CL->residue_index[s1][r1][b]; - t_r=CL->residue_index[s1][r1][b+1]; - hasch1[t_s][t_r]+=CL->residue_index[s1][r1][b+2]; - n_hasch1[t_s][t_r]++; - } - } - } - - - for ( a=0; a< ns2; a++) - { - rs2=list2[a]; - s2=A->order[rs2][0]; - r2=pos2[rs2][col2]; - - if (r2<0)n_gap2++; - else - { - is_in_col2[s2][r2]=1; - for (b=1; b< CL->residue_index[s2][r2][0]; b+=3) - { - t_s=CL->residue_index[s2][r2][b]; - t_r=CL->residue_index[s2][r2][b+1]; - - hasch2[t_s][t_r]+=CL->residue_index[s2][r2][b+2]; - n_hasch2[t_s][t_r]++; - } - } - } - /*return 2;*/ - - if ( ns2order[rs2][0]; - r2=pos1[rs2][col2]; - - if (r2<0); - else - { - for (b=1; b< CL->residue_index[s2][r2][0]; b+=3) - { - t_s=CL->residue_index[s2][r2][b]; - t_r=CL->residue_index[s2][r2][b+1]; - - if ( hasch2[t_s][t_r] && hasch1[t_s][t_r]&& !(is_in_col1[t_s][t_r] || is_in_col2[t_s][t_r])) - { - score+=MIN(hasch2[t_s][t_r]*(n_hasch1[t_s][t_r]),hasch1[t_s][t_r]*(n_hasch2[t_s][t_r])); - } - else if ( hasch2[t_s][t_r] && is_in_col1[t_s][t_r]) - { - score+=hasch2[t_s][t_r]*(n_hasch1[t_s][t_r]+1); - } - else if (hasch1[t_s][t_r] && is_in_col2[t_s][t_r]) - { - score+=hasch1[t_s][t_r]*(n_hasch2[t_s][t_r]+1); - } - hasch2[t_s][t_r]=0; - n_hasch2[t_s][t_r]=0; - } - hasch2[s2][r2]=0; - is_in_col2[s2][r2]=0; - } - } - - - for ( a=0; a< ns1; a++) - { - rs1= list1[a]; - s1=A->order[rs1][0]; - r1=pos1[rs1][col1]; - - if (r1<0); - else - { - is_in_col1[s1][r1]=0; - hasch1[s1][r1]=0; - for (b=1; b< CL->residue_index[s1][r1][0]; b+=3) - { - t_s=CL->residue_index[s1][r1][b]; - t_r=CL->residue_index[s1][r1][b+1]; - - hasch1[t_s][t_r]=0; - n_hasch1[t_s][t_r]=0; - } - } - } - } - else - { - for ( a=0; a< ns1; a++) - { - rs1=list1[a]; - s1=A->order[rs1][0]; - r1=pos1[rs1][col1]; - - if (r1<0); - else - { - for (b=1; b< CL->residue_index[s1][r1][0]; b+=3) - { - t_s=CL->residue_index[s1][r1][b]; - t_r=CL->residue_index[s1][r1][b+1]; - - if ( hasch1[t_s][t_r] && hasch2[t_s][t_r]&& !(is_in_col2[t_s][t_r] || is_in_col1[t_s][t_r])) - { - score+=MIN(hasch1[t_s][t_r]*(n_hasch2[t_s][t_r]),hasch2[t_s][t_r]*(n_hasch1[t_s][t_r])); - } - else if ( hasch1[t_s][t_r] && is_in_col2[t_s][t_r]) - { - score+=hasch1[t_s][t_r]*(n_hasch2[t_s][t_r]+1); - } - else if (hasch2[t_s][t_r] && is_in_col1[t_s][t_r]) - { - score+=hasch2[t_s][t_r]*(n_hasch1[t_s][t_r]+1); - } - hasch1[t_s][t_r]=0; - n_hasch1[t_s][t_r]=0; - } - hasch1[s1][r1]=0; - is_in_col1[s1][r1]=0; - } - } - - - for ( a=0; a< ns2; a++) - { - rs2= list2[a]; - s2=A->order[rs2][0]; - r2=pos1[rs2][col2]; - - if (r2<0); - else - { - is_in_col2[s2][r2]=0; - hasch1[s2][r2]=0; - for (b=1; b< CL->residue_index[s2][r2][0]; b+=3) - { - t_s=CL->residue_index[s2][r2][b]; - t_r=CL->residue_index[s2][r2][b+1]; - - hasch2[t_s][t_r]=0; - n_hasch2[t_s][t_r]=0; - } - } - } - } - score=(score*SCORE_K)/((ns1-n_gap1)*(ns2-n_gap2)); - score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score; - - return (int)(score-SCORE_K*CL->nomatch); - } - -int fast_get_dp_cost_2 ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) - { - double score=0; - - int a, b, s1, s2,r1, r2; - static int n_pair; - - int s; - int res_res=0; - int rs1, rs2; - static int last_ns1; - static int last_ns2; - static int last_top1; - static int last_top2; - static int **check_list; - - - /*New heuristic get dp_cost on a limited number of pairs*/ - /*This is the current default*/ - - - if ( last_ns1==ns1 && last_top1==list1[0] && last_ns2==ns2 && last_top2==list2[0]); - else - { - - - last_ns1=ns1; - last_ns2=ns2; - last_top1=list1[0]; - last_top2=list2[0]; - if ( check_list) free_int (check_list, -1); - check_list=declare_int ( (CL->S)->nseq*(CL->S)->nseq, 3); - - for ( n_pair=0,a=0; a< ns1; a++) - { - s1 =list1[a]; - rs1=A->order[s1][0]; - for ( b=0; b< ns2; b++, n_pair++) - { - s2 =list2[b]; - rs2=A->order[s2][0]; - check_list[n_pair][0]=s1; - check_list[n_pair][1]=s2; - check_list[n_pair][2]=(!CL->DM)?0:(CL->DM)->similarity_matrix[rs1][rs2]; - } - sort_int ( check_list, 3, 2, 0, n_pair-1); - } - } - - if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair;} - - - for ( a=n_pair-1; a>=0; a--) - { - s1= check_list[a][0]; - rs1=A->order[s1][0]; - r1 =pos1[s1][col1]; - - s2= check_list[a][1]; - rs2=A->order[s2][0]; - r2 =pos2[s2][col2]; - - if ( r1>0 && r2 >0) - {res_res++;} - if ( rs1>rs2) - { - SWAP (rs1, rs2); - SWAP (r1, r2); - } - - if ((s=(CL->evaluate_residue_pair)(CL, rs1, r1, rs2, r2))!=UNDEFINED) score+=s; - else - { - - return UNDEFINED; - } - if ( res_res>=CL->max_n_pair && CL->max_n_pair!=0)a=0; - } - - score=(res_res==0)?0:( (score)/res_res); - score=score-SCORE_K*CL->nomatch; - - return (int)score; - } - -int fast_get_dp_cost_3 ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - static int last_tag; - static Constraint_list *NCL; - int score; - - if ( ns1==1 && ns2==1) - { - return slow_get_dp_cost( A,pos1, ns1,list1, col1, pos2, ns2, list2, col2,CL); - } - - if ( last_tag !=A->random_tag) - { - int *ns, **ls; - - last_tag=A->random_tag; - ns=vcalloc (2, sizeof (int));ns[0]=ns1; ns[1]=ns2; - ls=vcalloc (2, sizeof (int*));ls[0]=list1; ls[1]=list2; - - NCL=progressive_index_res_constraint_list ( A, ns, ls, CL); - vfree (ls); vfree (ns); - } - score=residue_pair_extended_list ( NCL,list1[0],col1, list2[0], col2); - score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score; - score=(score-SCORE_K*CL->nomatch); - return score; -} - - - -int slow_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) - { - double score=0; - - int a, b, s1, s2,r1, r2; - int s; - int gap_gap=0; - int gap_res=0; - int res_res=0; - int rs1, rs2; - static int last_tag; - static int *dummy; - - if (col1<0 || col2<0) return 0; - if ( last_tag !=A->random_tag) - { - last_tag=A->random_tag; - if (!dummy) - { - dummy=vcalloc (10, sizeof(int)); - dummy[0]=1;/*Number of Amino acid types on colum*/ - dummy[1]=5;/*Length of Dummy*/ - dummy[3]='\0';/*Amino acid*/ - dummy[4]=1; /*Number of occurences*/ - dummy[5]=100; /*Frequency in the MSA column*/ - } - } - - if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair;} - - for ( a=0; a< ns1; a++) - { - for ( b=0; border[s1][0]; - r1 =pos1[s1][col1]; - - s2 =list2[b]; - rs2=A->order[s2][0]; - r2 =pos2[s2][col2]; - - if ( rs1>rs2) - { - SWAP (rs1, rs2); - SWAP (r1, r2); - } - - if (r1==0 && r2==0)gap_gap++; - else if ( r1<0 || r2<0) gap_res++; - else - { - res_res++; - if ((s=(CL->evaluate_residue_pair)(CL, rs1, r1, rs2, r2))!=UNDEFINED) score+=s; - else - { - - return UNDEFINED; - } - } - - } - } - - - score=(res_res==0)?0:( (score)/res_res); - - return score-SCORE_K*CL->nomatch; - - } -int slow_get_dp_cost_pc ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) - { - double score=0; - - int a, b, s1, s2,r1, r2; - int s; - int gap_gap=0; - int gap_res=0; - int res_res=0; - int rs1, rs2; - static int last_tag; - static int *dummy; - - if (col1<0 || col2<0) return 0; - if ( last_tag !=A->random_tag) - { - last_tag=A->random_tag; - if (!dummy) - { - dummy=vcalloc (10, sizeof(int)); - dummy[0]=1;/*Number of Amino acid types on colum*/ - dummy[1]=5;/*Length of Dummy*/ - dummy[3]='\0';/*Amino acid*/ - dummy[4]=1; /*Number of occurences*/ - dummy[5]=100; /*Frequency in the MSA column*/ - } - } - - if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair;} - - for ( a=0; a< ns1; a++) - { - for ( b=0; border[s1][0]; - r1 =pos1[s1][col1]; - - s2 =list2[b]; - rs2=A->order[s2][0]; - r2 =pos2[s2][col2]; - - if ( rs1>rs2) - { - SWAP (rs1, rs2); - SWAP (r1, r2); - } - - if (r1==0 && r2==0)gap_gap++; - else if ( r1<0 || r2<0) gap_res++; - else - { - res_res++; - if ((s=residue_pair_extended_list_pc(CL, rs1, r1, rs2, r2))!=UNDEFINED) score+=s; - else - { - - return UNDEFINED; - } - } - - } - } - - - score=(res_res==0)?0:( (score)/res_res); - - return score; - - } -int slow_get_dp_cost_test ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) - { - double score=0; - - int a, b, s1, s2,r1, r2; - int gap_gap=0, gap_res=0, res_res=0, rs1, rs2; - - for ( a=0; a< ns1; a++) - { - for ( b=0; border[s1][0]; - r1 =pos1[s1][col1]; - - s2 =list2[b]; - rs2=A->order[s2][0]; - r2 =pos2[s2][col2]; - - if ( rs1>rs2) - { - SWAP (rs1, rs2); - SWAP (r1, r2); - } - - if (r1==0 && r2==0)gap_gap++; - else if ( r1<0 || r2<0) gap_res++; - else - { - res_res++; - score+=residue_pair_extended_list_raw (CL, rs1, r1, rs2, r2); - } - } - } - - return (int)(score*10)/(ns1*ns2); - } - -int sw_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) - { - int a, b; - int s1,r1,rs1; - int s2,r2,rs2; - - - - - for ( a=0; a< ns1; a++) - { - s1 =list1[a]; - rs1=A->order[s1][0]; - r1 =pos1[s1][col1]; - if ( r1<=0)continue; - for ( b=0; b< ns2; b++) - { - - - s2 =list2[b]; - rs2=A->order[s2][0]; - r2 =pos2[s2][col2]; - - if (r2<=0)continue; - - - if (sw_pair_is_defined (CL, rs1, r1, rs2, r2)==UNDEFINED)return UNDEFINED; - } - } - - return slow_get_dp_cost ( A, pos1, ns1, list1, col1, pos2, ns2, list2,col2, CL); - - } - - - - - - - - -int get_domain_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2,Constraint_list *CL , int scale , int gop, int gep) - - { - int a, b, s1, s2,r1, r2; - static int *entry; - int *r; - int score=0; - int gap_gap=0; - int gap_res=0; - int res_res=0; - int rs1, rs2; - int flag_list_is_aa_sub_mat=0; - int p; - -/*Needs to be cleanned After Usage*/ - - - - if ( entry==NULL) entry=vcalloc (LIST_N_FIELDS , sizeof (int)); - - for (a=0; a< ns1; a++) - { - s1=list1[a]; - rs1=A->order[s1][0]; - for ( b=0; border[s2][0]; - - entry[SEQ1]=rs1; - entry[SEQ2]=rs2; - r1=entry[R1]=pos1[s1][col1]; - r2=entry[R2]=pos2[s2][col2]; - - if ( !flag_list_is_aa_sub_mat) - { - if ( r1==r2 && rs1==rs2) - { - - return UNDEFINED; - } - else if (r1==0 && r2==0) - { - gap_gap++; - } - else if ( r1<=0 || r2<=0) - { - gap_res++; - } - else if ((r=main_search_in_list_constraint ( entry,&p,4,CL))!=NULL) - { - res_res++; - - if (r[WE]!=UNDEFINED) - { - score+=(r[WE]*SCORE_K)+scale; - } - else - { - fprintf ( stderr, "**"); - return UNDEFINED; - } - } - } - } - } - return score; - score=((res_res+gap_res)==0)?0:score/(res_res+gap_res); - return score; - } - -/*********************************************************************************************/ -/* */ -/* FUNCTIONS FOR ANALYSING AL OR MATRIX */ -/* */ -/*********************************************************************************************/ - -int aln2n_res ( Alignment *A, int start, int end) - { - int a, b; - int score=0; - - for ( a=start; anseq; b++)score+=!is_gap(A->seq_al[b][a]); - return score; - } - -float get_gop_scaling_factor ( int **matrix,float id, int l1, int l2) - { - return id* get_avg_matrix_mm(matrix, AA_ALPHABET); - } - -float get_avg_matrix_mm ( int **matrix, char *alphabet) - { - int a, b; - float naa; - float gop; - int l; - - - l=MIN(20,(int)strlen (alphabet)); - for (naa=0, gop=0,a=0; a=0){naa++;tot+=matrix[alphabet[a]-'A'][alphabet[b]-'A'];} - - } - return tot/naa; -} - -float measure_matrix_enthropy (int **matrix,char *alphabet) - { - - int a, b; - double s, p, q, h=0, tq=0; - float lambda; - float *frequency; - /*frequencies tqken from psw*/ - - frequency=set_aa_frequencies (); - - - lambda=compute_lambda(matrix,alphabet); - fprintf ( stderr, "\nLambda=%f", (float)lambda); - - for ( a=0; a< 20; a++) - for ( b=0; b<=a; b++) - { - s=matrix[alphabet[a]-'A'][alphabet[b]-'A']; - - - p=frequency[alphabet[a]-'A']*frequency[alphabet[b]-'A']; - - if ( p==0)continue; - - q=exp(lambda*s+log(p)); - - tq+=q; - h+=q*log(q/p)*log(2); - - } - - fprintf ( stderr,"\ntq=%f\n", (float)tq); - - return (float) h; - } -float compute_lambda (int **matrix,char *alphabet) -{ - - int a, b; - double lambda, best_lambda=0, delta, best_delta=0, p, tq,s; - static float *frequency; - - if ( !frequency)frequency=set_aa_frequencies (); - - for ( lambda=0.001; lambda<1; lambda+=0.005) - { - tq=0; - for ( a=0; a< 20; a++) - for ( b=0; b<20; b++) - { - p=frequency[alphabet[a]-'A']*frequency[alphabet[b]-'A']; - s=matrix[alphabet[a]-'A'][alphabet[b]-'A']; - tq+=exp(lambda*s+log(p)); - } - delta=fabs(1-tq); - if (lambda==0.001) - { - best_delta=delta; - best_lambda=lambda; - } - else - { - if (delta1)break; - } - fprintf ( stderr, "\nRESULT: %f %f ", best_lambda, best_delta); - return (float) best_lambda; -} - - - -float evaluate_random_match (char *mat, int n, int len,char *alp) -{ - int **matrix; - matrix=read_matrice ( mat); - fprintf ( stderr, "Matrix=%15s ", mat); - return evaluate_random_match2 (matrix, n,len,alp); - -} - -float evaluate_random_match2 (int **matrix, int n, int len,char *alp) -{ - int a, b, c, d, c1, c2, tot; - static int *list; - static float *freq; - float score_random=0; - float score_id=0; - float score_good=0; - float tot_len=0; - float tot_try=0; - - - if ( !list) - { - vsrand(0); - freq=set_aa_frequencies (); - list=vcalloc ( 10000, sizeof (char)); - } - - for (tot=0,c=0,a=0;a<20; a++) - { - b=freq[alp[a]-'A']*1000; - tot+=b; - for (d=0; d=0){score_good+=matrix[list[c1]-'A'][list[c2]-'A']; tot_len++;} - } - - - score_random=score_random/tot_len; - score_id=score_id/tot_len; - score_good=score_good/tot_len; - - fprintf ( stderr, "Random=%8.3f Id=%8.3f Good=%8.3f [%7.2f]\n",score_random, score_id, score_good, tot_len/tot_try); - - return score_random; -} -float compare_two_mat (char *mat1,char*mat2, int n, int len,char *alp) -{ - int **matrix1, **matrix2; - - evaluate_random_match (mat1, n, len,alp); - evaluate_random_match (mat2, n, len,alp); - matrix1=read_matrice ( mat1); - matrix2=read_matrice ( mat2); - matrix1=rescale_matrix(matrix1, 10, alp); - matrix2=rescale_matrix(matrix2, 10, alp); - compare_two_mat_array(matrix1,matrix2, n, len,alp); - return 0; -} - - -int ** rescale_two_mat (char *mat1,char*mat2, int n, int len,char *alp) -{ - float lambda; - int **matrix1, **matrix2; - - lambda=measure_lambda2 (mat1, mat2, n, len, alp)*10; - - fprintf ( stderr, "\nLambda=%.2f", lambda); - matrix2=read_matrice(mat2); - matrix2=neg_matrix2pos_matrix(matrix2); - matrix2=rescale_matrix( matrix2, lambda,"abcdefghiklmnpqrstvwxyz"); - - matrix1=read_matrice(mat1); - matrix1=neg_matrix2pos_matrix(matrix1); - matrix1=rescale_matrix( matrix1,10,"abcdefghiklmnpqrstvwxyz"); - - output_matrix_header ( "stdout", matrix2, alp); - evaluate_random_match2(matrix1, 1000, 100, alp); - evaluate_random_match2(matrix2, 1000, 100, alp); - compare_two_mat_array(matrix1,matrix2, n, len,alp); - - return matrix2; -} -float measure_lambda2(char *mat1,char*mat2, int n, int len,char *alp) -{ - int **m1, **m2; - float f1, f2; - - m1=read_matrice (mat1); - m2=read_matrice (mat2); - - m1=neg_matrix2pos_matrix(m1); - m2=neg_matrix2pos_matrix(m2); - - f1=measure_matrix_pos_avg( m1, alp); - f2=measure_matrix_pos_avg( m2, alp); - - return f1/f2; -} - - -float measure_lambda (char *mat1,char*mat2, int n, int len,char *alp) -{ - int c; - int **matrix1, **matrix2, **mat; - float a; - float best_quality=0, quality=0, best_lambda=0; - - matrix1=read_matrice ( mat1); - matrix2=read_matrice ( mat2); - matrix1=rescale_matrix(matrix1, 10, alp); - matrix2=rescale_matrix(matrix2, 10, alp); - - for (c=0, a=0.1; a< 2; a+=0.05) - { - fprintf ( stderr, "Lambda=%.2f\n", a); - mat=duplicate_int (matrix2,-1,-1); - mat=rescale_matrix(mat, a, alp); - quality=compare_two_mat_array(matrix1,mat, n, len,alp); - quality=MAX((-quality),quality); - - if (c==0 || (best_quality>quality)) - { - c=1; - fprintf ( stderr, "*"); - best_quality=quality; - best_lambda=a; - } - - - evaluate_random_match2(mat, 1000, 100, alp); - evaluate_random_match2(matrix1, 1000, 100, alp); - free_int (mat, -1); - } - - return best_lambda; - -} - -float compare_two_mat_array (int **matrix1,int **matrix2, int n, int len,char *alp) -{ - int a, b, c, d, c1, c2, tot; - static int *list; - static float *freq; - float delta_random=0; - float delta2_random=0; - - float delta_id=0; - float delta2_id=0; - - float delta_good=0; - float delta2_good=0; - - float delta; - - float tot_len=0; - float tot_try=0; - - - - if ( !list) - { - vsrand(0); - freq=set_aa_frequencies (); - list=vcalloc ( 10000, sizeof (char)); - } - - for (tot=0,c=0,a=0;a<20; a++) - { - b=freq[alp[a]-'A']*1000; - tot+=b; - for (d=0; d=0 || matrix2[list[c1]-'A'][list[c2]-'A'] ) - { - delta=matrix1[list[c1]-'A'][list[c2]-'A']-matrix2[list[c1]-'A'][list[c2]-'A']; - delta_good+=delta; - delta2_good+=MAX(delta,(-delta)); - tot_len++; - } - } - - - delta_random=delta_random/tot_len; - delta2_random=delta2_random/tot_len; - - - delta_id=delta_id/tot_len; - delta2_id=delta2_id/tot_len; - - delta_good=delta_good/tot_len; - delta2_good=delta2_good/tot_len; - - - fprintf ( stderr, "\tRand=%8.3f %8.3f\n\tId =%8.3f %8.3f\n\tGood=%8.3f %8.3f\n",delta_random, delta2_random, delta_id,delta2_id, delta_good,delta2_good); - - return delta_good; -} - - - -int ** rescale_matrix ( int **matrix, float lambda, char *alp) -{ - int a, b; - - - for ( a=0; a< 20; a++) - for ( b=0; b< 20; b++) - { - matrix[alp[a]-'A'][alp[b]-'A']= matrix[alp[a]-'A'][alp[b]-'A']*lambda; - } - return matrix; -} -int **mat2inverted_mat (int **matrix, char *alp) -{ - int a, b, min, max, v,l; - int c1,c2, C1, C2; - - l=(int)strlen (alp); - min=max=matrix[alp[0]-'A'][alp[0]-'A']; - for ( a=0; amax)?p[a][b]:max; - } - for (b='a'; b<='z'; b++) - { - p[a][b]=((p[a][b]-min)/(max-min))*10; - - } - } - - return p; -} -Constraint_list * choose_extension_mode ( char *extend_mode, Constraint_list *CL) -{ - if ( !CL) - { - fprintf ( stderr, "\nWarning: CL was not set"); - return CL; - } - else if ( strm ( extend_mode, "rna0")) - { - CL->evaluate_residue_pair=residue_pair_extended_list; - CL->get_dp_cost =slow_get_dp_cost; - } - else if ( strm ( extend_mode, "rna1") || strm (extend_mode, "rna")) - { - CL->evaluate_residue_pair=residue_pair_extended_list4rna1; - CL->get_dp_cost =slow_get_dp_cost; - } - else if ( strm ( extend_mode, "rna2")) - { - CL->evaluate_residue_pair=residue_pair_extended_list4rna2; - CL->get_dp_cost =slow_get_dp_cost; - } - else if ( strm ( extend_mode, "rna3")) - { - CL->evaluate_residue_pair=residue_pair_extended_list4rna3; - CL->get_dp_cost =slow_get_dp_cost; - } - else if ( strm ( extend_mode, "rna4")) - { - CL->evaluate_residue_pair=residue_pair_extended_list4rna4; - CL->get_dp_cost =slow_get_dp_cost; - } - else if ( strm ( extend_mode, "pc") && !CL->M) - { - CL->evaluate_residue_pair=residue_pair_extended_list_pc; - CL->get_dp_cost =slow_get_dp_cost; - } - else if ( strm ( extend_mode, "triplet") && !CL->M) - { - CL->evaluate_residue_pair=residue_pair_extended_list; - CL->get_dp_cost =get_dp_cost; - } - else if ( strm ( extend_mode, "relative_triplet") && !CL->M) - { - CL->evaluate_residue_pair=residue_pair_relative_extended_list; - CL->get_dp_cost =fast_get_dp_cost_2; - } - else if ( strm ( extend_mode, "g_coffee") && !CL->M) - { - CL->evaluate_residue_pair=residue_pair_extended_list_g_coffee; - CL->get_dp_cost =slow_get_dp_cost; - } - else if ( strm ( extend_mode, "g_coffee_quadruplets") && !CL->M) - { - CL->evaluate_residue_pair=residue_pair_extended_list_g_coffee_quadruplet; - CL->get_dp_cost =slow_get_dp_cost; - } - else if ( strm ( extend_mode, "fast_triplet") && !CL->M) - { - CL->evaluate_residue_pair=residue_pair_extended_list; - CL->get_dp_cost =fast_get_dp_cost; - } - else if ( strm ( extend_mode, "test_triplet") && !CL->M) - { - CL->evaluate_residue_pair=residue_pair_extended_list; - CL->get_dp_cost =fast_get_dp_cost_3; - } - else if ( strm ( extend_mode, "very_fast_triplet") && !CL->M) - { - CL->evaluate_residue_pair=residue_pair_extended_list; - CL->get_dp_cost =fast_get_dp_cost_2; - } - else if ( strm ( extend_mode, "slow_triplet") && !CL->M) - { - CL->evaluate_residue_pair=residue_pair_extended_list; - CL->get_dp_cost =slow_get_dp_cost; - } - else if ( strm ( extend_mode, "mixt") && !CL->M) - { - CL->evaluate_residue_pair=residue_pair_extended_list_mixt; - CL->get_dp_cost=slow_get_dp_cost; - } - else if ( strm ( extend_mode, "quadruplet") && !CL->M) - { - CL->evaluate_residue_pair=residue_pair_extended_list_quadruplet; - CL->get_dp_cost =get_dp_cost_quadruplet; - } - else if ( strm ( extend_mode, "test") && !CL->M) - { - CL->evaluate_residue_pair=residue_pair_test_function; - CL->get_dp_cost =slow_get_dp_cost_test; - } - else if ( strm ( extend_mode, "ssp")) - { - - CL->evaluate_residue_pair=evaluate_ssp_matrix_score; - CL->get_dp_cost=slow_get_dp_cost; - CL->normalise=1; - } - else if ( strm ( extend_mode, "tm")) - { - - CL->evaluate_residue_pair=evaluate_tm_matrix_score; - CL->get_dp_cost=slow_get_dp_cost; - CL->normalise=1; - } - else if ( strm ( extend_mode, "matrix")) - { - - CL->evaluate_residue_pair=evaluate_matrix_score; - CL->get_dp_cost=cw_profile_get_dp_cost; - CL->normalise=1; - } - else if ( strm ( extend_mode, "curvature")) - { - CL->evaluate_residue_pair=evaluate_curvature_score; - CL->get_dp_cost=slow_get_dp_cost; - CL->normalise=1; - } - else if ( CL->M) - { - CL->evaluate_residue_pair=evaluate_matrix_score; - CL->get_dp_cost=cw_profile_get_dp_cost; - CL->normalise=1; - } - else - { - fprintf ( stderr, "\nERROR: %s is an unknown extend_mode[FATAL:%s]\n", extend_mode, PROGRAM); - myexit (EXIT_FAILURE); - } - return CL; -} -int ** combine_two_matrices ( int **mat1, int **mat2) -{ - int naa, re1, re2, Re1, Re2, a, b, u, l; - - naa=(int)strlen (BLAST_AA_ALPHABET); - for ( a=0; a< naa; a++) - for ( b=0; b< naa; b++) - { - re1=BLAST_AA_ALPHABET[a]; - re2=BLAST_AA_ALPHABET[b]; - if (re1=='*' || re2=='*'); - else - { - - Re1=toupper(re1);Re2=toupper(re2); - re1-='A';re2-='A';Re1-='A';Re2-='A'; - - l=mat1[re1][re2]; - u=mat2[re1][re2]; - mat1[re1][re2]=mat2[re1][re2]=l; - mat2[Re1][Re2]=mat2[Re1][Re2]=u; - } - } - return mat1; -} - -/* Off the shelves evaluations */ -/*********************************************************************************************/ -/* */ -/* OFF THE SHELVES EVALUATION */ -/* */ -/*********************************************************************************************/ - - -int lat_sum_pair (Alignment *A, char *mat) -{ - int a,b,c, tot=0, v1, v2, score; - int **matrix; - - matrix=read_matrice (mat); - - for (a=0; anseq; a++) - for ( b=0; bnseq; b++) - { - for (c=1; clen_aln; c++) - { - char r11, r12; - - r11=A->seq_al[a][c-1]; - r12=A->seq_al[a][c]; - if (is_gap(r11) || is_gap(r12))continue; - else v1=matrix[r11-'A'][r12-'A']; - - r11=A->seq_al[b][c-1]; - r12=A->seq_al[b][c]; - if (is_gap(r11) || is_gap(r12))continue; - else v2=matrix[r11-'A'][r12-'A']; - - score+=(v1-v2)*(v1-v2); - tot++; - } - } - score=(100*score)/tot; - return (float)score; -} - - - -/* Off the shelves evaluations */ -/*********************************************************************************************/ -/* */ -/* OFF THE SHELVES EVALUATION */ -/* */ -/*********************************************************************************************/ - -int comp_pair ( int len,char *sa, char *sb, int seqA, int seqB,int *tgp_a, int *tgp_b,int gap_op,int gap_ex, int start, int end,int **matrix,int MODE); -int score_gap ( int len, char *sa, char *sb,int seqA, int seqB,int *tgp_a, int *tgp_b, int op, int ex, int start, int end, int MODE); -void evaluate_tgp_decoded_chromosome ( Alignment *A,int **TGP,int start, int end,int MODE); -int gap_type ( char a, char b); - - - -float sum_pair ( Alignment*A,char *mat_name, int gap_op, int gap_ex) - { - int a,b; - float pscore=0; - - int start, end; - static int **tgp; - double score=0; - int MODE=1; - int **matrix; - - matrix=read_matrice (mat_name); - matrix=mat2inverted_mat (matrix, "acdefghiklmnpqrstvwy"); - - start=0; - end=A->len_aln-1; - - if ( tgp==NULL) - tgp= declare_int (A->nseq,2); - - evaluate_tgp_decoded_chromosome ( A,tgp,start, end,MODE); - - for ( a=0; a< A->nseq-1; a++) - for (b=a+1; bnseq; b++) - { - pscore= comp_pair (A->len_aln,A->seq_al[a], A->seq_al[b],a, b,tgp[a], tgp[b],gap_op,gap_ex, start, end,matrix, MODE); - score+=pscore*100; - /*score+=(double)pscore*(int)(PARAM->OFP)->weight[A->order[a][0]][A->order[b][0]];*//*NO WEIGHTS*/ - } - - score=score/(A->nseq*A->nseq); - - return (float)score; - } - -int comp_pair ( int len,char *sa, char *sb, int seqA, int seqB,int *tgp_a, int *tgp_b,int gap_op,int gap_ex, int start, int end,int **matrix,int MODE) - { - int score=0, a, ex; - - - - if ( end-start>=0) - score+= score_gap (len, sa,sb, seqA, seqB,tgp_a, tgp_b, gap_op,gap_ex, start, end,MODE); - - ex=gap_ex; - - - for (a=start; a<=end; a++) - { - if ( is_gap(sa[a]) || is_gap(sb[a])) - { - if (is_gap(sa[a]) && is_gap(sb[a])); - else - { - - score +=ex; - } - } - else - { - score += matrix [sa[a]-'A'][sb[a]-'A']; - - } - } - return score; - } -int score_gap ( int len, char *sa, char *sb,int seqA, int seqB,int *tgp_a, int *tgp_b, int op, int ex, int start, int end, int MODE) - { - int a,b; - int ga=0,gb=0; - int score=0; - - - int right_gap, left_gap; - - - - - - int type; - int flag1=0; - int flag2=0; - int continue_loop; - int sequence_pattern[2][3]; - int null_gap; - int natural_gap=1; - - /*op= gor_gap_op ( 0,seqA, seqB, PARAM); - ex= gor_gap_ext ( 0, seqA, seqB, PARAM);*/ - - - - for (a=start; a<=end; ++a) - { - - type= gap_type ( sa[a], sb[a]); - - if ( type==2 && ga<=gb) - {++ga; - gb=0; - score += op; - } - else if (type==1 && ga >=gb) - { - ++gb; - ga=0; - score +=op; - } - else if (type==0) - { - ga++; - gb++; - } - - else if (type== -1) - ga=gb=0; - - - if (natural_gap==0) - { - if ( type== -1) - flag1=flag2=0; - else if ( type==0) - flag2=1; - else if ( (type==flag1) && flag2==1) - { - score+=op; - flag2=0; - } - else if ( (type!=flag1) && flag2==1) - { - flag1=type; - flag2=0; - } - else if ( flag2==0) - flag1=type; - } - } - /*gap_type -/-:0, X/X:-1 X/-:1, -/X:2*/ -/*evaluate the pattern of gaps*/ - - continue_loop=1; - sequence_pattern[0][0]=sequence_pattern[1][0]=0; - for ( a=start; a<=end && continue_loop==1; a++) - { - left_gap= gap_type ( sa[a], sb[a]); - if ( left_gap!= 0) - { - if ( left_gap==-1) - { - sequence_pattern[0][0]=sequence_pattern[1][0]=0; - continue_loop=0; - } - else - { - null_gap=0; - for (b=a; b<=end && continue_loop==1; b++) - {type=gap_type( sa[b], sb[b]); - if (type==0) - null_gap++; - if ( type!=left_gap && type !=0) - { - continue_loop=0; - sequence_pattern[2-left_gap][0]= b-a-null_gap; - sequence_pattern [1-(2-left_gap)][0]=0; - } - } - if ( continue_loop==1) - { - continue_loop=0; - sequence_pattern[2-left_gap][0]= b-a-null_gap; - sequence_pattern [1-(2-left_gap)][0]=0; - } - } - } - } - - sequence_pattern[0][2]=sequence_pattern[1][2]=1; - for ( a=start; a<=end; a++) - { - if ( !is_gap(sa[a])) - sequence_pattern[0][2]=0; - if ( !is_gap(sb[a])) - sequence_pattern[1][2]=0; - - } - continue_loop=1; - sequence_pattern[0][1]=sequence_pattern[1][1]=0; - for ( a=end; a>=start && continue_loop==1; a--) - { - right_gap= gap_type ( sa[a], sb[a]); - if ( right_gap!= 0) - { - if ( right_gap==-1) - { - sequence_pattern[0][1]=sequence_pattern[1][1]=0; - continue_loop=0; - } - else - { - null_gap=0; - for (b=a; b>=start && continue_loop==1; b--) - {type=gap_type( sa[b], sb[b]); - if ( type==0) - null_gap++; - if ( type!=right_gap && type !=0) - { - continue_loop=0; - sequence_pattern[2-right_gap][1]= a-b-null_gap; - sequence_pattern [1-(2-right_gap)][1]=0; - } - } - if ( continue_loop==1) - { - continue_loop=0; - sequence_pattern[2-right_gap][1]= a-b-null_gap; - sequence_pattern [1-(2-right_gap)][1]=0; - } - } - } - } - -/* -printf ( "\n*****************************************************"); -printf ( "\n%c\n%c", sa[start],sb[start]); -printf ( "\n%d %d %d",sequence_pattern[0][0] ,sequence_pattern[0][1], sequence_pattern[0][2]); -printf ( "\n%d %d %d",sequence_pattern[1][0] ,sequence_pattern[1][1], sequence_pattern[1][2]); -printf ( "\n*****************************************************"); -*/ - -/*correct the scoring*/ - - - if ( MODE==0) - { - if ( FABS(tgp_a[0])>1 && (FABS(tgp_a[0])>FABS( tgp_b[0]))) - score-= (sequence_pattern[0][0]>0)?op:0; - if ( FABS(tgp_b[0])>1 && (FABS(tgp_b[0])> FABS(tgp_a[0]))) - score-= (sequence_pattern[1][0]>0)?op:0; - } - else if ( MODE ==1 || MODE ==2) - { - if ( FABS(tgp_a[0])>1 && (FABS(tgp_a[0])>FABS( tgp_b[0])) && (tgp_a[1]!=1 || sequence_pattern[0][2]==0)) - score-= (sequence_pattern[0][0]>0)?op:0; - if ( FABS(tgp_b[0])>1 && (FABS(tgp_b[0])> FABS(tgp_a[0])) && (tgp_b[1]!=1 || sequence_pattern[1][2]==0)) - score-= (sequence_pattern[1][0]>0)?op:0; - - - if ( tgp_a[0]>=1 && tgp_a[0]==tgp_b[0]) - score -=(sequence_pattern[0][0]>0)?op:0; - if ( tgp_b[0]>=1 && tgp_a[0]==tgp_b[0]) - score-= (sequence_pattern[1][0]>0)?op:0; - - - if ( tgp_a[1]==1 && sequence_pattern[0][2]==0) - score -= ( sequence_pattern[0][1]>0)?op:0; - else if ( tgp_a[1]==1 && sequence_pattern[0][2]==1 && tgp_a[0]<=0) - score -= ( sequence_pattern[0][1]>0)?op:0; - - - if ( tgp_b[1]==1 && sequence_pattern[1][2]==0) - score -= ( sequence_pattern[1][1]>0)?op:0; - else if ( tgp_b[1]==1 && sequence_pattern[1][2]==1 && tgp_b[0]<=0) - score -= ( sequence_pattern[1][1]>0)?op:0; - - if ( MODE==2) - { - if ( tgp_a[0]>0) - score -=sequence_pattern[0][0]*ex; - if ( tgp_b[0]>0) - score -= sequence_pattern[1][0]*ex; - if ( tgp_a[1]>0) - score-=sequence_pattern[0][1]*ex; - if ( tgp_b[1]>0) - score-=sequence_pattern[1][1]*ex; - } - } - - - return score; - - - - } -void evaluate_tgp_decoded_chromosome ( Alignment *A,int **TGP,int start, int end,int MODE) - { - int a,b; - int continue_loop; - - - - if (MODE==11 || MODE==13|| MODE==14) - { - if ( start==0)for ( a=0; anseq; a++)TGP[a][0]=-1; - else for ( a=0; anseq; a++)TGP[a][0]=(is_gap(A->seq_al[a][start-1])==1)?0:1; - - if ( end==A->len_aln-1)for ( a=0; anseq; a++)TGP[a][1]=-1; - else for ( a=0; anseq; a++)TGP[a][1]=(is_gap(A->seq_al[a][start-1])==1)?0:1; - } - else - { - /* 0: in the middle of the alignement - 1: natural end - 2: q left gap is the continuation of another gap that was open outside the bloc ( don't open it) - */ - - for ( a=0; a< A->nseq; a++) - { - TGP[a][0]=1; - TGP[a][1]=1; - for ( b=0; b< start; b++) - if ( !is_gap(A->seq_al[a][b])) - TGP[a][0]=0; - if ( start>0 ) - { - if (is_gap(A->seq_al[a][start-1]) && TGP[a][0]!=1) - {TGP[a][0]=-1; - continue_loop=1; - for ( b=(start-1); b>=0 && continue_loop==1; b--) - {TGP[a][0]-= ( is_gap(A->seq_al[a][b])==1)?1:0; - continue_loop= (is_gap(A->seq_al[a][b])==1)?continue_loop:0; - } - } - } - else if (is_gap(A->seq_al[a][start-1]) && TGP[a][0]==1) - { - TGP[a][0]=1; - continue_loop=1; - for ( b=(start-1); b>=0 && continue_loop==1; b--) - {TGP[a][0]+= ( is_gap(A->seq_al[a][b])==1)?1:0; - continue_loop= (is_gap(A->seq_al[a][b])==1)?continue_loop:0; - } - } - for ( b=(A->len_aln-1); b>end; b--) - if ( !is_gap(A->seq_al[a][b])) - TGP[a][1]=0; - } - } - } -int gap_type ( char a, char b) - { - /*gap_type -/-:0, X/X:-1 X/-:1, -/STAR:2*/ - - if ( is_gap(a) && is_gap(b)) - return 0; - else if ( !is_gap(a) && !is_gap(b)) - return -1; - else if ( !is_gap(a)) - return 1; - else if ( !is_gap(b)) - return 2; - else - return -1; - } - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/evaluate_dirichlet.c b/binaries/src/tcoffee/t_coffee_source/evaluate_dirichlet.c deleted file mode 100644 index 960c142..0000000 --- a/binaries/src/tcoffee/t_coffee_source/evaluate_dirichlet.c +++ /dev/null @@ -1,599 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" -static float dm[]={ -0.178091, -1.18065, 0.270671, 0.039848, 0.017576, 0.016415, 0.014268, 0.131916, 0.012391, 0.022599, 0.020358, 0.030727, 0.015315, 0.048298, 0.053803, 0.020662, 0.023612, 0.216147, 0.147226, 0.065438, 0.003758, 0.009621, -0.056591, -1.35583, 0.021465, 0.0103, 0.011741, 0.010883, 0.385651, 0.016416, 0.076196, 0.035329, 0.013921, 0.093517, 0.022034, 0.028593, 0.013086, 0.023011, 0.018866, 0.029156, 0.018153, 0.0361, 0.07177, 0.419641, -0.0960191, -6.66436 ,0.561459, 0.045448, 0.438366, 0.764167, 0.087364, 0.259114, 0.21494, 0.145928, 0.762204, 0.24732, 0.118662, 0.441564, 0.174822, 0.53084, 0.465529, 0.583402, 0.445586, 0.22705, 0.02951, 0.12109, -0.078123, -2.08141, 0.070143, 0.01114, 0.019479, 0.094657, 0.013162, 0.048038, 0.077, 0.032939, 0.576639, 0.072293, 0.02824, 0.080372, 0.037661, 0.185037, 0.506783, 0.073732, 0.071587, 0.042532, 0.011254, 0.028723, -0.0834977, -2.08101, 0.041103, 0.014794, 0.00561, 0.010216, 0.153602, 0.007797, 0.007175, 0.299635, 0.010849, 0.999446, 0.210189, 0.006127, 0.013021, 0.019798, 0.014509, 0.012049, 0.035799, 0.180085, 0.012744, 0.026466, -0.0904123, -2.56819, 0.115607, 0.037381, 0.012414, 0.018179, 0.051778, 0.017255, 0.004911, 0.796882, 0.017074, 0.285858, 0.075811, 0.014548, 0.015092, 0.011382, 0.012696, 0.027535, 0.088333, 0.94434, 0.004373, 0.016741, -0.114468, -1.76606, 0.093461, 0.004737, 0.387252, 0.347841, 0.010822, 0.105877, 0.049776, 0.014963, 0.094276, 0.027761, 0.01004, 0.187869, 0.050018, 0.110039, 0.038668, 0.119471, 0.065802, 0.02543, 0.003215, 0.018742, -0.0682132, -4.98768, 0.452171, 0.114613, 0.06246, 0.115702, 0.284246, 0.140204, 0.100358, 0.55023, 0.143995, 0.700649, 0.27658, 0.118569, 0.09747, 0.126673, 0.143634, 0.278983, 0.358482, 0.66175, 0.061533, 0.199373, -0.234585, -0.0995, 0.005193, 0.004039, 0.006722, 0.006121, 0.003468, 0.016931, 0.003647, 0.002184, 0.005019, 0.00599, 0.001473, 0.004158, 0.009055, 0.00363, 0.006583, 0.003172, 0.00369, 0.002967, 0.002772,0.002686}; - -double int_logB (int *i, int n) - { - static double *array; - int a; - - if ( array==NULL)array=vcalloc ( 1000, sizeof (double)); - - for ( a=0; a< n; a++) - array[a]=(double)i[a]; - return double_logB(array, n); - } -double float_logB (float *i, int n) - { - static double *array; - int a; - - if ( array==NULL)array=vcalloc ( 1000, sizeof (double)); - for ( a=0; a< n; a++) - array[a]=(double)i[a]; - return double_logB(array, n); - } - -double double_logB (double *x, int n) - { - double vx=0; - double result=0; - int i; - - - for ( i=0; iALPHA[a][b]+c); - - return lup; - } - -double double_logB2(int j, double *n,Mixture *D) - { - double vx=0; - double result=0; - int i; - - static double ***lup; - - - - if ( lup==NULL)lup=make_lup_table (D); - - - - for ( i=0; in_aa; i++)vx+=(double)n[i]+D->ALPHA[j][i]; - for ( i=0; in_aa; i++) - { - - - result+=lup[j][i][(int)n[i]]; - } - return result-lgamma2(vx); - } - -double compute_exponant ( double *n, int j, Mixture *D) - { - - if ( j>=9)fprintf ( stderr, "\nPB: j=%d", j); - - return double_logB2(j, n,D)-D->double_logB_alpha[j]; - } - - -double *compute_matrix_p ( double *n,int Nseq) - { - - /* - reads in a frquency list of various amino acids: - - sum freq(aa)=1 (gaps are ignored) - - aa[1]=x1 - aa[2]=x2 - .... - - Outputs a similar list with frequencies 'Blurred' using a pam250 mt - */ - - - - static int **matrix; - double *R; - int a, b; - double v,min, tot; - - - if ( !matrix) - { - matrix=read_matrice ( "pam250mt"); - } - - R=vcalloc ( 26, sizeof (double)); - - - for ( a=0; a<26; a++) - { - if (!is_aa(a+'a'))continue; - if ( n[a]==0)continue; - - for ( b=0; b< 26; b++) - { - if (!is_aa(b+'a'))continue; - v=n[a]*(matrix[a][b]); - if ( v>0) - { - R[b]+=v+(10*n[a]); - } - } - } - - min=R[0]; - for ( min=R[0],a=0; a< 26; a++)min=MIN(min,R[a]); - for ( tot=0, a=0; a< 26; a++) {R[a]-=min;tot+=R[a];} - for ( a=0; a< 26; a++)if ( is_aa(a+'a')){R[a]=R[a]*((float)(100)/(float)tot);} - return R; - } - - -double *compute_dirichlet_p ( double *n,int Nseq) - { - /* - Given a list of frequenceies measured for the residues, this function returns - the p_values associated with each residue in the column - */ - - int a, b; - double X_LIST[100]; - double sum, log_sum, max; - static Mixture *D; - static double *R; - - - - if (!D) - { - D=read_dirichlet (NULL); - - D->n_aa=20; - R=vcalloc ( D->n_aa, sizeof (double)); - D->double_logB_alpha=vcalloc (D->N_COMPONENT , sizeof (double)); - - D->exponant_list=vcalloc (D->N_COMPONENT , sizeof (double)); - precompute_log_B ( D->double_logB_alpha,D); - D->alpha_tot=vcalloc (D->N_COMPONENT , sizeof (double)); - for ( a=0; aN_COMPONENT; a++) - for ( b=0; b< D->n_aa; b++) - D->alpha_tot[a]+=D->ALPHA[a][b]; - } - - - - for ( D->tot_n=0,a=0; a< D->n_aa; a++)D->tot_n+=(double)n[a]; - max=D->exponant_list[0]=compute_exponant ( n, 0, D); - for ( a=1; aN_COMPONENT; a++) - { - D->exponant_list[a]=compute_exponant ( n, a,D); - max= ( max< D->exponant_list[a])?D->exponant_list[a]:max; - } - for ( a=1; aN_COMPONENT; a++)D->exponant_list[a]=D->exponant_list[a]-max; - - - for ( sum=0,log_sum=0,a=0; a< D->n_aa; a++) - { - sum+=X_LIST[a]=compute_X (n, a,D); - } - log_sum=log(sum); - - - for (a=0; an_aa; a++) - { - R[a]=(log(X_LIST[a])-log_sum); - } - - - /* - printf ( "\n["); - for ( a=0;a< n_aa; a++)printf ("%d ", n[a]); - printf ("] score=%f",(float) result ); - - fprintf ( stderr, "\nRESULT=%f", (float)result); - exit(0); - */ - return R; - - } - -void precompute_log_B ( double *table,Mixture *D) - { - int a; - for ( a=0; a< D->N_COMPONENT; a++) - { - table[a]=double_logB ( D->ALPHA[a], D->n_aa); - } - } -double compute_X (double *n,int i,Mixture *D) - { - int j; - double term1, term2,result; - double **alpha; - double *q; - - - - alpha=D->ALPHA; - q=D->DM_Q; - - for (result=0, j=0; jN_COMPONENT; j++) - { - term1=exp (D->exponant_list[j])*q[j]; - term2=(alpha[j][i]+(double)n[i])/(D->alpha_tot[j]+D->tot_n); - result+=term1*term2; - } - return result; - } -Mixture * read_dirichlet ( char *name) - { - FILE *fp; - int a,b, c; - float f; - Mixture *D; - - - D=vcalloc ( 1, sizeof (Mixture)); - - - D->N_COMPONENT=9; - D->ALPHA=vcalloc (9, sizeof (double*)); - for ( a=0; a< 9; a++) - D->ALPHA[a]=vcalloc (20, sizeof (double)); - D->DM_Q=vcalloc (9, sizeof (double)); - - if (name!=NULL) - { - fp=vfopen ( name, "r"); - for ( a=0; a< 9; a++) - { - fscanf(fp, "%f\n", &f); - D->DM_Q[a]=(double)f; - fscanf(fp, "%f", &f); - - for ( b=0; b<20; b++) - { - fscanf(fp, "%f", &f); - D->ALPHA[a][b]=(double)f; - } - fscanf(fp, "\n"); - } - for ( a=0; a< 9; a++) - { - fprintf(stderr, "\n%f\n",(float)D->DM_Q[a] ); - - for ( b=0; b<20; b++) - { - fprintf(stderr, "%f ", (float)D->ALPHA[a][b]); - } - fprintf(stderr, "\n"); - } - fprintf ( stderr, "\nN_C=%d",D->N_COMPONENT ); - vfclose ( fp); - } - else - { - for (c=0, a=0; a< 9;a++) - { - D->DM_Q[a]=dm[c++]; - for (b=0; b<20; b++) - D->ALPHA[a][b]=dm[c++]; - } - } - - return D; - } -int dirichlet_code( char aa) - { - - char x; - - x=tolower (aa); - - if ( (x<'a') || (x>'z')) - crash ( "CODE UNDEFINED"); - else if ( x<='a') - return x-'a'; - else if ( x<='i') - return x-('a'+1); - else if ( x<= 'n') - return x-('a'+2); - else if ( x<='t') - return x-('a'+3); - else if ( x<='w') - return x-('a'+4); - else if ( x=='y') - return x-('a'+5); - else - { - crash ("ERROR in dirichlet_code"); - return 0; - } - return 0; - - } - - -static const double -two52= 4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */ -half= 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */ -one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */ -pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */ -a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */ -a1 = 3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */ -a2 = 6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */ -a3 = 2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */ -a4 = 7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */ -a5 = 2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */ -a6 = 1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */ -a7 = 5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */ -a8 = 2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */ -a9 = 1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */ -a10 = 2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */ -a11 = 4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */ -tc = 1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */ -tf = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */ -/* tt = -(tail of tf) */ -tt = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */ -t0 = 4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */ -t1 = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */ -t2 = 6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */ -t3 = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */ -t4 = 1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */ -t5 = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */ -t6 = 6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */ -t7 = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */ -t8 = 2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */ -t9 = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */ -t10 = 8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */ -t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */ -t12 = 3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */ -t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */ -t14 = 3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */ -u0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */ -u1 = 6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */ -u2 = 1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */ -u3 = 9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */ -u4 = 2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */ -u5 = 1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */ -v1 = 2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */ -v2 = 2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */ -v3 = 7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */ -v4 = 1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */ -v5 = 3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */ -s0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */ -s1 = 2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */ -s2 = 3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */ -s3 = 1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */ -s4 = 2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */ -s5 = 1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */ -s6 = 3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */ -r1 = 1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */ -r2 = 7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */ -r3 = 1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */ -r4 = 1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */ -r5 = 7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */ -r6 = 7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */ -w0 = 4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */ -w1 = 8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */ -w2 = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */ -w3 = 7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */ -w4 = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */ -w5 = 8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */ -w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */ - -static const double zero= 0.00000000000000000000e+00; - -static double sin_pi(double x) -{ - double y,z; - int n,ix; - - ix=(*(long long *)&x)>>32; - ix &= 0x7fffffff; - - if(ix<0x3fd00000) return sin(pi*x); - y = -x; /* x is assume negative */ - - /* - * argument reduction, make sure inexact flag not raised if input - * is an integer - */ - z = floor(y); - if(z!=y) { /* inexact anyway */ - y *= 0.5; - y = 2.0*(y - floor(y)); /* y = |x| mod 2.0 */ - n = (int) (y*4.0); - } else { - if(ix>=0x43400000) { - y = zero; n = 0; /* y must be even */ - } else { - if(ix<0x43300000) z = y+two52; /* exact */ - n=(*(long long *)&x); - n &= 1; - y = n; - n<<= 2; - } - } - switch (n) { - case 0: y = sin(pi*y); break; - case 1: - case 2: y = cos(pi*(0.5-y)); break; - case 3: - case 4: y = sin(pi*(one-y)); break; - case 5: - case 6: y = -cos(pi*(y-1.5)); break; - default: y = sin(pi*(y-2.0)); break; - } - return -y; -} - -double lgamma2 ( double x) -{ - int s; - return lgamma_r ( x, &s); -} -double lgamma_r(double x, int *signgamp) -{ - double t,y,z,nadj=0,p,p1,p2,p3,q,r,w; - int i,hx,lx,ix; - - hx=(*(long long *)&x)>>32; - lx=(*(long long *)&x); - - /* purge off +-inf, NaN, +-0, and negative arguments */ - *signgamp = 1; - ix = hx&0x7fffffff; - if(ix>=0x7ff00000) return x*x; - if((ix|lx)==0) return one/fabs(x); - if(ix<0x3b900000) { /* |x|<2**-70, return -log(|x|) */ - if(hx<0) { - *signgamp = -1; - return -log(-x); - } else return -log(x); - } - if(hx<0) { - if(ix>=0x43300000) /* |x|>=2**52, must be -integer */ - return x/zero; - t = sin_pi(x); - if(t==zero) return one/fabs(t); /* -integer */ - nadj = log(pi/fabs(t*x)); - if(t=0x3FE76944) {y = one-x; i= 0;} - else if(ix>=0x3FCDA661) {y= x-(tc-one); i=1;} - else {y = x; i=2;} - } else { - r = zero; - if(ix>=0x3FFBB4C3) {y=2.0-x;i=0;} /* [1.7316,2] */ - else if(ix>=0x3FF3B4C4) {y=x-tc;i=1;} /* [1.23,1.73] */ - else {y=x-one;i=2;} - } - switch(i) { - case 0: - z = y*y; - p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10)))); - p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11))))); - p = y*p1+p2; - r += (p-0.5*y); break; - case 1: - z = y*y; - w = z*y; - p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12))); /* parallel comp */ - p2 = t1+w*(t4+w*(t7+w*(t10+w*t13))); - p3 = t2+w*(t5+w*(t8+w*(t11+w*t14))); - p = z*p1-(tt-w*(p2+y*p3)); - r += (tf + p); break; - case 2: - p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5))))); - p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5)))); - r += (-0.5*y + p1/p2); - } - } - else if(ix<0x40200000) { /* x < 8.0 */ - i = (int)x; - t = zero; - y = x-(double)i; - p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6)))))); - q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6))))); - r = half*y+p/q; - z = one; /* lgamma(1+s) = log(s) + lgamma(s) */ - switch(i) { - case 7: z *= (y+6.0); /* FALLTHRU */ - case 6: z *= (y+5.0); /* FALLTHRU */ - case 5: z *= (y+4.0); /* FALLTHRU */ - case 4: z *= (y+3.0); /* FALLTHRU */ - case 3: z *= (y+2.0); /* FALLTHRU */ - r += log(z); break; - } - /* 8.0 <= x < 2**58 */ - } else if (ix < 0x43900000) { - t = log(x); - z = one/x; - y = z*z; - w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6))))); - r = (x-half)*(t-one)+w; - } else - /* 2**58 <= x <= inf */ - r = x*(log(x)-one); - if(hx<0) r = nadj - r; - return r; -} -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/evaluate_for_domain.c b/binaries/src/tcoffee/t_coffee_source/evaluate_for_domain.c deleted file mode 100644 index 6f53a12..0000000 --- a/binaries/src/tcoffee/t_coffee_source/evaluate_for_domain.c +++ /dev/null @@ -1,311 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" - -#include "dp_lib_header.h" - - - - - -int evaluate_moca_domain ( Alignment *A, Constraint_list *CL) - { - /* - function documentation: start - int evaluate_moca_domain ( Alignment *A, Constraint_list *CL) - - This function evaluates a multiple local alignment - If the alignmnent is to be accepted, return score - Else return UNDEFINED - - function documentation: end - */ - - - int score=0; - int start, end, a, b; - Alignment *B=NULL; - char alp[200]; - - - score=UNDEFINED; - end=0; - start=0; - - sprintf ( alp, "acefghiklmnpqrstuvwy"); - - if ( A->len_aln>0) - { - score=(int)(output_maln_pval ( "/dev/null", A)*-100); - return score; - } - else - return 0; - - - - - while ((end+1)!=A->len_aln) - { - end=get_nol_aln_border (A,start,GO_RIGHT); - if ( end==start)break; - fprintf ( stderr, "\n**%d %d (%d)",start, end, A->len_aln); - B=copy_aln (A, B); - B=extract_aln (B,start,end); - for (a=0; anseq; a++) - for ( b=0; blen_aln; b++) - if ( is_gap (B->seq_al[a][b]))B->seq_al[a][b]=alp[(int)rand()%(strlen (alp))]; - - - start=end; - fprintf ( stderr, "==>%d",(int)(output_maln_pval ( "/dev/null", B)*-100) ); - if ( score==UNDEFINED)score=(int)(output_maln_pval ( "/dev/null", B)*-100); - else - score=MAX(score,(int)(output_maln_pval ( "/dev/null", B)*-100)); - - - } - free_aln (B); - return score; - } - - -int moca_slow_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) - { - int s; - - s=slow_get_dp_cost ( A, pos1, ns1, list1, col1, pos2, ns2, list2, col2, CL); - - - if ( s==UNDEFINED)return UNDEFINED; - else return s+(CL->moca)->moca_scale; - - } -int moca_evaluate_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2) -{ - /* - function documentation: start - int moca_residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) - - THIS FUNCTION RETURNS THE EXTENDED SCORE OF A PAIR OF RESIDUES - it is meant to work with local aln pair_wise routines, by using (CL->moca)->forbiden_residues - a constant value is substracted from the extended score. - - This function is meant toi be used with omain_dp, therefore, it allows the match of identical residues. - - function documentation: end - */ - - if (unpack_seq_residues ( &s1, &r1, &s2, &r2, CL->packed_seq_lu)==UNDEFINED)return UNDEFINED; - else if ( (CL->moca)->forbiden_residues && ((CL->moca)->forbiden_residues[s1][r1]==UNDEFINED ||(CL->moca)->forbiden_residues[s2][r2]==UNDEFINED))return UNDEFINED; - else if ( s1==s2 && r1 == r2) return UNDEFINED; - else return evaluate_matrix_score(CL, s1, r1, s2, r2); - } - - - - -int moca_residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) - { - /* - function documentation: start - int moca_residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) - - THIS FUNCTION RETURNS THE EXTENDED SCORE OF A PAIR OF RESIDUES - it is meant to work with local aln pair_wise routines, by using (CL->moca)->forbiden_residues - a constant value is substracted from the extended score. - - This function is meant toi be used with omain_dp, therefore, it allows the match of identical residues. - - function documentation: end - */ - - if (unpack_seq_residues ( &s1, &r1, &s2, &r2, CL->packed_seq_lu)==UNDEFINED)return UNDEFINED; - else if ( (CL->moca)->forbiden_residues && ((CL->moca)->forbiden_residues[s1][r1]==UNDEFINED ||(CL->moca)->forbiden_residues[s2][r2]==UNDEFINED))return UNDEFINED; - else if ( s1==s2 && r1 == r2) return UNDEFINED; - else return residue_pair_extended_list (CL, s1, r1, s2, r2); - - - } - -int **cache_cl_with_moca_domain (Alignment *A, Constraint_list *CL) - { - /* - function documentation: start - int **cache_cl_with_moca_domain (Alignment *A, Constraint_list *CL) - - Read a multiple alignmnent - Given all the residues (CL->S)->seq[x][y] contained in the maln - Set (CL->moca)->forbiden_residues[x][y] to UNDEFINED - return (CL->moca)->forbiden_residues - - WARNING - You must make sure that the evalation strategy uses (CL->moca)->forbiden_residues - (CL->moca)->forbiden_residues[0][1]->first residue(1) of First sequence(0) - function documentation: end - */ - - int **pos; - int a, b; - - pos=aln2pos_simple(A, A->nseq); - - if ( !(CL->moca)->forbiden_residues)(CL->moca)->forbiden_residues=declare_int ((CL->S)->nseq, strlen ((CL->S)->seq[0])+1); - - for ( a=0; anseq;a++) - for ( b=0; b< A->len_aln; b++) - (CL->moca)->forbiden_residues[A->order[a][0]][pos[a][b]]=UNDEFINED; - - free_int (pos, -1); - return (CL->moca)->forbiden_residues; - } -Alignment *make_moca_nol_aln ( Alignment *A, Constraint_list *CL) -{ - - return A; -} - -/*********************************************************************************************/ -/* */ -/* DOMAIN Z SCORE EVALUATION */ -/* */ -/*********************************************************************************************/ - -int evaluate_domain_aln_z_score (Alignment *A, int start, int end,Constraint_list *CL, char *alphabet) - { - int a; - static Alignment *B; - double score, ref_score; - double N_EVAL=1000; - double sum=0, sum2=0; - - - if ( A==NULL || A->nseq==0 || A->len_aln==0)return 0; - ref_score=(double)evaluate_domain_aln (A,start, end,CL); - for (sum=0, sum2=0,a=0;anseq, end-start, alphabet); - score=(double)evaluate_domain_aln (B,0,B->len_aln,CL); - sum+=score; - sum2+=score*score; - } - score=(return_z_score(ref_score, sum, sum2, N_EVAL)*100)/A->len_aln; - - return(int) score; - } - -int evaluate_domain_aln ( Alignment *A, int start, int end,Constraint_list *CL) - { - int a, b, c; - int score, c1, c2; - static int **mat; - - /* - function documentation: start - - This function uses a pam250 to evaluate the sum of pairs score of A, - between position start(included) to position end (exluded), - - the numbering starts 0 - function documentation: end - */ - - if ( !mat)mat=read_matrice ( "pam250mt"); - - for ( c=start, score=0; cnseq-1; a++) - for ( b=a+1; b< A->nseq; b++) - { - c1=tolower(A->seq_al[a][c]); - c2=tolower(A->seq_al[b][c]); - - if ( !is_gap (c1) && !is_gap(c2))score+=mat[c1-'A'][c2-'A']; - } - } - return score; - } - -int unpack_seq_residues ( int *s1, int *r1, int *s2, int *r2, int **packed_seq_lu) - { - /* Given a series of sequences concatenated (packed), and the coordinates of two residues - This function translates the coordinates into the real ones and allows evaluation - Note for this function residues go from [1->N], sequences from [0->N[ - This is true for in and out comming residues number - NOTE: The sequence cannot be guessed when the residues r1 or r2 are GAPS, therefore UNDEFINED is returned - NOTE: Concatenated sequences are separated with X, such residues cause an UNDEFINED to be returned - */ - - if ( packed_seq_lu==NULL)return 1; - else if ( s1[0]!=s2[0])return 1; - else if ( r1[0]<=0 || r2[0]<=0)return UNDEFINED; - else if ( packed_seq_lu[r1[0]][0]==UNDEFINED || packed_seq_lu[r2[0]][0]==UNDEFINED)return UNDEFINED; - else - { - s1[0]=packed_seq_lu[r1[0]][0]; - r1[0]=packed_seq_lu[r1[0]][1]; - - s2[0]=packed_seq_lu[r2[0]][0]; - r2[0]=packed_seq_lu[r2[0]][1]; - } - return 1; - } - -Alignment * unpack_seq_aln ( Alignment *A,Constraint_list *CL) - { - int a, b, r_seq, r_start, r_len; - - - if (!CL->packed_seq_lu) return A; - - for (a=0; a< A->nseq; a++) - { - r_seq =CL->packed_seq_lu[A->order[a][1]+1][0]; - r_start=CL->packed_seq_lu[A->order[a][1]+1][1]; - - A->order[a][0]=r_seq; - A->order[a][1]=r_start-1; - - for ( r_len=0,b=0; b< A->len_aln; b++)r_len+=!is_gap(A->seq_al[a][b]); - sprintf ( A->name[a],"%s_%d_%d", (A->S)->name[r_seq], r_start, r_start+r_len-1); - } - - return A; - } - - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/evaluate_for_struc.c b/binaries/src/tcoffee/t_coffee_source/evaluate_for_struc.c deleted file mode 100644 index 1292642..0000000 --- a/binaries/src/tcoffee/t_coffee_source/evaluate_for_struc.c +++ /dev/null @@ -1,2674 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" -void print_atom ( Atom*A); - -float **** quantile_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL,Pdb_param *PP, FILE *fp); -float **** irmsdmin_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL,Pdb_param *PP, FILE *fp); -int apdb ( int argc, char *argv[]) - { - - Constraint_list *CL=NULL; - Sequence *S=NULL; - Alignment *A=NULL; - Alignment *EA=NULL; - Pdb_param *pdb_param; - - Fname *F=NULL; - char *file_name; - int a,c; - - int n_pdb; - -/*PARAMETERS VARIABLES*/ - int garbage; - char *parameters; - FILE *fp_parameters; - - int quiet; - char *se_name; - FILE *le=NULL; - - char **list_file; - int n_list; - char **struc_to_use; - int n_struc_to_use; - - char *aln; - char *repeat_seq; - char *repeat_pdb; - - char *color_mode; - char *comparison_io; - - int n_excluded_nb; - - float maximum_distance; - float similarity_threshold; - float md_threshold; - - - int print_rapdb; - - char *outfile; - char *run_name; - - char *apdb_outfile; - char *cache; - - char **out_aln_format; - int n_out_aln_format; - - char *output_res_num; - char *local_mode; - float filter; - int filter_aln; - int irmsd_graph; - int nirmsd_graph; - int n_template_file; - char **template_file_list; - char *mode; - int prot_min_sim; - int prot_max_sim; - int prot_min_cov; - int pdb_min_sim; - int pdb_max_sim; - int pdb_min_cov; - - - - char *prot_blast_server; - char *pdb_blast_server; - - - char *pdb_db; - char *prot_db; - - - argv=standard_initialisation (argv, &argc); - -/*PARAMETER PROTOTYPE: READ PARAMETER FILE */ - declare_name (parameters); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-parameters" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "R_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Read the files in the parameter file" ,\ - /*Parameter*/ ¶meters ,\ - /*Def 1*/ "NULL" ,\ - /*Def 2*/ "stdin" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - if ( parameters && parameters[0]) - { - argv[argc]=vcalloc ( VERY_LONG_STRING, sizeof(char)); - a=0; - fp_parameters=vfopen (parameters, "r"); - while ((c=fgetc (fp_parameters))!=EOF)argv[1][a++]=c; - vfclose (fp_parameters); - argv[argc][a]='\0'; - argc++; - argv=break_list ( argv, &argc, "=:;, \n"); - } -/*PARAMETER PROTOTYPE*/ - declare_name (se_name); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-quiet" ,\ - /*Flag*/ &quiet ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/ &se_name ,\ - /*Def 1*/ "stderr" ,\ - /*Def 2*/ "/dev/null" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - - le=vfopen ( se_name, "w"); - fprintf ( le, "\nPROGRAM: %s\n",argv[0]); - -/*PARAMETER PROTOTYPE: IN */ - list_file=declare_char ( 200, STRING); - n_list=get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-in" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 200 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/ list_file ,\ - /*Def 1*/ "",\ - /*Def 2*/ "stdin" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: IN */ - struc_to_use=declare_char ( 200, STRING); - n_struc_to_use=get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-struc_to_use" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 200 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/ struc_to_use ,\ - /*Def 1*/ "",\ - /*Def 2*/ "stdin" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: COMPARISON IO */ - declare_name (comparison_io); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-io_format" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 200 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/ &comparison_io,\ - /*Def 1*/ "hsgd0123456",\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: ALN */ - declare_name (aln); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-aln" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/ &aln,\ - /*Def 1*/ "",\ - /*Def 2*/ "stdin" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: ALN */ - - declare_name (repeat_seq); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-repeat_seq" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/ &repeat_seq,\ - /*Def 1*/ "",\ - /*Def 2*/ "stdin" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: ALN */ - declare_name (repeat_pdb); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-repeat_pdb" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/ &repeat_pdb,\ - /*Def 1*/ "",\ - /*Def 2*/ "stdin" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: Nb to exclude */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-n_excluded_nb" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Exclude the N Nb on each side of the central residue. -1 triggers an automatic setting equal to the window size corresponding to the sphere" ,\ - /*Parameter*/ &n_excluded_nb ,\ - /*Def 1*/ "-1" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: diatances to count */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-similarity_threshold" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/ &similarity_threshold,\ - /*Def 1*/ "70" ,\ - /*Def 2*/ "70" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: diatances to count */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-filter" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Filter by only keeping the best quantile" ,\ - /*Parameter*/ &filter,\ - /*Def 1*/ "1.00" ,\ - /*Def 2*/ "1.00" ,\ - /*Min_value*/ "-1.00" ,\ - /*Max Value*/ "1.00" \ - ); -/*PARAMETER PROTOTYPE: diatances to count */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-filter_aln" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Lower Case For Residues Filtered Out" ,\ - /*Parameter*/ &filter_aln,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "1" \ - ); -/*PARAMETER PROTOTYPE: diatances to count */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-irmsd_graph" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Outputs the irmsd, position/position" ,\ - /*Parameter*/ &irmsd_graph,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "1" \ - ); -/*PARAMETER PROTOTYPE: diatances to count */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-nirmsd_graph" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Outputs the NIRMSD VS N Removed Residues Curve" ,\ - /*Parameter*/ &nirmsd_graph,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "1" \ - ); -/*PARAMETER PROTOTYPE: -rmsd_threshold */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-md_threshold" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/ &md_threshold ,\ - /*Def 1*/ "1" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: -maximum distances */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-maximum_distance" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/ &maximum_distance ,\ - /*Def 1*/ "10" ,\ - /*Def 2*/ "10" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - - -/*PARAMETER PROTOTYPE: -print_rapdb */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-print_rapdb" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Prints the neighborhood of each pair of aligned residues, along with the associated local score" ,\ - /*Parameter*/ &print_rapdb ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: RUN_NAME */ - declare_name (run_name); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-run_name" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/ &run_name ,\ - /*Def 1*/ "default" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "default" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: OUTFILE */ -/*PARAMETER PROTOTYPE: OUTFILE */ - declare_name ( outfile); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-outfile" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/ &outfile ,\ - /*Def 1*/ "no" ,\ - /*Def 2*/ "default" ,\ - /*Min_value*/ "default" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: OUTFILE */ - declare_name ( apdb_outfile); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-apdb_outfile" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/ &apdb_outfile ,\ - /*Def 1*/ "stdout" ,\ - /*Def 2*/ "default" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: OUTPUT_FORMAT */ - out_aln_format=declare_char ( 200, STRING); - n_out_aln_format=get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-output" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 200 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/ out_aln_format,\ - /*Def 1*/ "score_html" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - - - -/*PARAMETER PROTOTYPE: INFILE */ - declare_name (color_mode); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-color_mode" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/ &color_mode ,\ - /*Def 1*/ "apdb" ,\ - /*Def 2*/ "irmsd" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: INFILE */ - declare_name (output_res_num); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-seqnos" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/ &output_res_num ,\ - /*Def 1*/ "off" ,\ - /*Def 2*/ "on" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - declare_name (cache); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-cache" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "use,ignore,update,local, directory name" ,\ - /*Parameter*/ &cache ,\ - /*Def 1*/ "use" ,\ - /*Def 2*/ "update" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - - declare_name (local_mode); - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-local_mode" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Mode for choosing the Neighborhood (bubble or window)\nWhen selecting window, maximum distance becomes the window 1/2 size, in residues\nWhen using sphere, maximum_distance is the sphere radius in Angstrom" ,\ - /*Parameter*/ &local_mode ,\ - /*Def 1*/ "sphere" ,\ - /*Def 2*/ "window" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: IN */ - template_file_list=declare_char (100, STRING); - n_template_file=get_cl_param( \ - /*argc*/ argc , \ - /*argv*/ argv , \ - /*output*/ &le ,\ - /*Name*/ "-template_file" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1000 ,\ - /*DOC*/ "List of templates file for the sequences",\ - /*Parameter*/ template_file_list , \ - /*Def 1*/ "_SELF_P_",\ - /*Def 2*/ "stdin" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - /*PARAMETER PROTOTYPE: MODE */ - declare_name (mode); - get_cl_param( \ - /*argc*/ argc , \ - /*argv*/ argv , \ - /*output*/ &le ,\ - /*Name*/ "-mode" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Mode: irmsd, ",\ - /*Parameter*/ &mode , \ - /*Def 1*/ "irmsd",\ - /*Def 2*/ "stdin" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - - - - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-prot_min_sim" ,\ - /*Flag*/ &prot_min_sim ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Minimum similarity between a sequence and its PDB target" ,\ - /*Parameter*/ &prot_min_sim ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "20" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - set_int_variable ("prot_min_sim", prot_min_sim); - -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-prot_max_sim" ,\ - /*Flag*/ &prot_max_sim ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Maximum similarity between a sequence and its BLAST relatives" ,\ - /*Parameter*/ &prot_max_sim ,\ - /*Def 1*/ "90" ,\ - /*Def 2*/ "100" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - set_int_variable ("prot_max_sim", prot_max_sim); - -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-prot_min_cov" ,\ - /*Flag*/ &prot_min_cov ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Minimum coverage of a sequence by its BLAST relatives" ,\ - /*Parameter*/ &prot_min_cov ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "0" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -set_int_variable ("prot_min_cov", prot_min_cov); - -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-pdb_min_sim" ,\ - /*Flag*/ &pdb_min_sim ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Minimum similarity between a sequence and its PDB target" ,\ - /*Parameter*/ &pdb_min_sim ,\ - /*Def 1*/ "35" ,\ - /*Def 2*/ "35" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - - set_int_variable ("pdb_min_sim", pdb_min_sim); - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-pdb_max_sim" ,\ - /*Flag*/ &pdb_max_sim ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Maximum similarity between a sequence and its PDB target" ,\ - /*Parameter*/ &pdb_max_sim ,\ - /*Def 1*/ "100" ,\ - /*Def 2*/ "0" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - set_int_variable ("pdb_max_sim", pdb_max_sim); - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-pdb_min_cov" ,\ - /*Flag*/ &pdb_min_cov ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Minimum coverage of a sequence by its PDB target" ,\ - /*Parameter*/ &pdb_min_cov ,\ - /*Def 1*/ "50" ,\ - /*Def 2*/ "25" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -set_int_variable ("pdb_min_cov", pdb_min_cov); - - - -declare_name (pdb_blast_server); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-pdb_blast_server" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/&pdb_blast_server ,\ - /*Def 1*/ "EBI" ,\ - /*Def 2*/ "default" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -declare_name (prot_blast_server); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-blast" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/&prot_blast_server ,\ - /*Def 1*/ "" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - //make sure that -blast and -blast_server are both supported blast>blast_server - if ( !prot_blast_server[0]) - { - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-blast_server" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/&prot_blast_server ,\ - /*Def 1*/ "EBI" ,\ - /*Def 2*/ "default" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - } - set_string_variable ("blast_server", prot_blast_server); - - - - declare_name (pdb_db); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-pdb_db" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Non Redundant PDB database" ,\ - /*Parameter*/&pdb_db ,\ - /*Def 1*/ "pdb" ,\ - /*Def 2*/ "default" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - set_string_variable ("pdb_db", pdb_db); - - -declare_name (prot_db); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-protein_db" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/&prot_db ,\ - /*Def 1*/ "uniprot" ,\ - /*Def 2*/ "default" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - // set the correct mode: - if ( strm (argv[0], "trmsd"))sprintf (mode, "trmsd"); - - set_string_variable ("prot_db", prot_db); - - - if (argc==1){myexit (EXIT_SUCCESS);} - - if ( strm (outfile,"no"))n_out_aln_format=0; - - get_cl_param( argc, argv,&le, NULL,NULL,NULL,0,0,NULL); - prepare_cache (cache); - - - if (strm ( aln, "")) - sprintf ( aln, "%s", argv[1]); - - if (!is_aln (aln)) - { - printf_exit (EXIT_FAILURE, stderr, "\n\n---- ERROR: File %s must be a valid alignment [FATAL:%s-%s]\n\n",aln,argv[0], PROGRAM); - } - - pdb_param=vcalloc ( 1, sizeof(Pdb_param)); - - pdb_param->similarity_threshold=similarity_threshold; - - pdb_param->md_threshold=md_threshold; - pdb_param->maximum_distance=maximum_distance; - - if ( n_excluded_nb>0) - pdb_param->n_excluded_nb=n_excluded_nb; - else if ( n_excluded_nb==-1) - pdb_param->n_excluded_nb=(int)((float)maximum_distance/(float)1.57); - /* Exclude all the nb within the bubble at +1, +2, +n*/ - pdb_param->print_rapdb=print_rapdb; - pdb_param->comparison_io=comparison_io; - - pdb_param->local_mode=local_mode; - pdb_param->color_mode=lower_string (color_mode); - pdb_param->filter=filter; - pdb_param->filter_aln=filter_aln; - pdb_param->irmsd_graph=irmsd_graph; - pdb_param->nirmsd_graph=nirmsd_graph; - - sprintf ( list_file[n_list++], "S%s", aln); - - - if (!strm (repeat_seq, "")) - { - - sprintf ( template_file_list[0], "%s", process_repeat (list_file[0], repeat_seq, repeat_pdb)); - fprintf ( le, "\n##Turn a repeat List into a Template File\n"); - le=display_file_content (le,template_file_list[0]); - fprintf ( le, "\n\n"); - } - S=read_seq_in_n_list (list_file, n_list, NULL, NULL); - - le=display_sequences_names ( S,le,0, 0); - - if ( n_template_file) - { - fprintf ( le, "\nLooking For Sequence Templates:\n"); - for ( a=0; a< n_template_file; a++) - { - fprintf ( le, "\n\tTemplate Type: [%s] Mode Or File: [%s] [Start", template_type2type_name(template_file_list[a]), template_file_list[a]); - S=seq2template_seq(S, template_file_list[a], F); - fprintf ( le, "]"); - } - } - - if ( !strm (run_name, "default")) - { - F=parse_fname(run_name); - sprintf (F->name, "%s", F->full); - } - else - { - F=parse_fname (aln); - } - - for ( a=0; a< S->nseq; a++) - { - char *p; - - p=seq2T_value (S, a, "template_file", "_P_"); - - if (p)sprintf (S->file[a], "%s",p); - } - - CL=declare_constraint_list ( S,NULL, NULL, 0,NULL, NULL); - CL->T=vcalloc (S->nseq,sizeof (Ca_trace*)); - - - for ( n_pdb=0,a=0; anseq; a++) - { - if ( !is_pdb_file ( S->file[a])){CL->T[a]=NULL;continue;} - CL->T[a]=read_ca_trace (S->file[a], "ATOM"); - CL->T[a]=trim_ca_trace (CL->T[a], S->seq[a]); - (CL->T[a])->pdb_param=pdb_param; - n_pdb++; - } - - A=declare_aln (S); - - - A->residue_case=KEEP_CASE; - A=main_read_aln(aln, A); - EA=copy_aln (A, EA); - A->CL=CL; - - if ( strm (apdb_outfile, "default")) - sprintf ( apdb_outfile, "%s.apdb_result", F->name); - - if ( n_pdb<2) - { - FILE *fp; - fp=vfopen (apdb_outfile, "w"); - fprintf (fp, "\nYour Alignment Does Not Contain Enough Sequences With a known Structure\n"); - fprintf (fp, "To Use APDB, your alignment must include at least TWO sequences with a known structure.\n"); - fprintf (fp, "These sequences must be named according to their PDB identifier, followed by the chain index (if any) ex: 1fnkA\n"); - fprintf (fp, "[FATAL:%s]\n", PROGRAM); - vfclose (fp); - } - else if ( strm (mode, "irmsd")) - { - EA=analyse_pdb ( A, EA, apdb_outfile); - } - else if ( strm (mode, "msa2tree") || strm (mode, "trmsd")) - { - EA=msa2struc_dist ( A, EA,F->name); - } - le=display_output_filename ( le, "APDB_RESULT", "APDB_RESULT_FORMAT_01", apdb_outfile, CHECK); - - if ( n_pdb>=2) - { - declare_name (file_name); - for ( a=0; a< n_out_aln_format; a++) - { - if ( strm2( outfile, "stdout", "stderr"))sprintf (file_name, "%s", outfile); - else if ( strm (outfile, "default")) - sprintf (file_name, "%s.%s",F->name, out_aln_format[a]); - else - sprintf (file_name, "%s.%s",outfile,out_aln_format[a]); - - output_format_aln (out_aln_format[a],A,EA,file_name); - le=display_output_filename ( le, "MSA", out_aln_format[a], file_name, CHECK); - } - } - return EXIT_SUCCESS; - } - - - -Constraint_list * set_constraint_list4align_pdb (Constraint_list *CL,int seq, char *dp_mode, char *local_mode, char *param_file) -{ - static Constraint_list *PWCL; - static Pdb_param *pdb_param; - char **x; - int n; - - if ( !CL) - { - free_constraint_list (PWCL); - return NULL; - } - else if ( !PWCL) - { - PWCL=declare_constraint_list ( CL->S,NULL, NULL, 0,NULL, NULL); - - pdb_param=vcalloc ( 1, sizeof(Pdb_param)); - pdb_param->N_ca=0; - pdb_param->max_delta=2.0; - pdb_param->maximum_distance=14; - declare_name (pdb_param->local_mode); - sprintf (pdb_param->local_mode, "%s", local_mode); - pdb_param->scale=50; - - PWCL->pw_parameters_set=1; - PWCL->S=CL->S; - PWCL->lalign_n_top=10; - PWCL->sw_min_dist=10; - - PWCL->T=vcalloc ( (PWCL->S)->nseq, sizeof (Ca_trace*)); - - PWCL->extend_jit=0; - PWCL->maximise=1; - /*PWCL->gop=-40;*/ - PWCL->gop=-50; - PWCL->gep=-20; - sprintf (CL->matrix_for_aa_group, "vasiliky"); - PWCL->use_fragments=0; - PWCL->ktup=0; - PWCL->TG_MODE=1; - } - - - if ( param_file && check_file_exists ( param_file) ) - { - if ( (x=get_parameter ( "-nca", &n, param_file))!=NULL){pdb_param->N_ca=atoi(x[0]);free_char (x, -1);} - if ( (x=get_parameter ( "-max_delta", &n, param_file))!=NULL){pdb_param->max_delta=atof(x[0]);free_char (x, -1);} - if ( (x=get_parameter ( "-maximum_distance", &n, param_file))!=NULL){pdb_param->maximum_distance=atoi(x[0]);free_char (x, -1);} - if ( (x=get_parameter ( "-local_mode", &n, param_file))!=NULL){sprintf (pdb_param->local_mode, "%s",x[0]);free_char (x, -1);} - if ( (x=get_parameter ( "-scale", &n, param_file))!=NULL){pdb_param->scale=atoi(x[0]);free_char (x, -1);} - if ( (x=get_parameter ( "-gapopen", &n, param_file))!=NULL){PWCL->gop=atoi(x[0]);free_char (x, -1);} - if ( (x=get_parameter ( "-gapext" , &n, param_file))!=NULL){PWCL->gep=atof(x[0]);free_char (x, -1);} - - } - - - - - sprintf ( PWCL->dp_mode, "%s", dp_mode); - - if (strm (PWCL->dp_mode, "lalign"))sprintf (PWCL->dp_mode,"sim_pair_wise_lalign"); - else if (strm (PWCL->dp_mode, "sw"))sprintf (PWCL->dp_mode,"gotoh_pair_wise_sw"); - - local_mode=pdb_param->local_mode; - if ( strm ( local_mode, "hasch_ca_trace_nb")) PWCL->evaluate_residue_pair=evaluate_ca_trace_nb; - else if ( strm ( local_mode, "hasch_ca_trace_bubble")) PWCL->evaluate_residue_pair=evaluate_ca_trace_bubble; - else if ( strm ( local_mode, "hasch_ca_trace_sap1_bubble")) PWCL->evaluate_residue_pair=evaluate_ca_trace_sap1_bubble; - else if ( strm ( local_mode, "hasch_ca_trace_sap2_bubble")) PWCL->evaluate_residue_pair=evaluate_ca_trace_sap2_bubble; - - else if ( strm ( local_mode, "hasch_ca_trace_transversal")) PWCL->evaluate_residue_pair=evaluate_ca_trace_transversal; - else if ( strm ( local_mode, "hasch_ca_trace_bubble_2")) PWCL->evaluate_residue_pair=evaluate_ca_trace_bubble_2; - else if ( strm ( local_mode, "hasch_ca_trace_bubble_3")) PWCL->evaluate_residue_pair=evaluate_ca_trace_bubble_3; - else if ( strm ( local_mode, "custom_pair_score_function1")) PWCL->evaluate_residue_pair=custom_pair_score_function1; - else if ( strm ( local_mode, "custom_pair_score_function2")) PWCL->evaluate_residue_pair=custom_pair_score_function2; - else if ( strm ( local_mode, "custom_pair_score_function3")) PWCL->evaluate_residue_pair=custom_pair_score_function3; - else if ( strm ( local_mode, "custom_pair_score_function4")) PWCL->evaluate_residue_pair=custom_pair_score_function4; - else if ( strm ( local_mode, "custom_pair_score_function5")) PWCL->evaluate_residue_pair=custom_pair_score_function5; - else if ( strm ( local_mode, "custom_pair_score_function6")) PWCL->evaluate_residue_pair=custom_pair_score_function6; - else if ( strm ( local_mode, "custom_pair_score_function7")) PWCL->evaluate_residue_pair=custom_pair_score_function7; - else if ( strm ( local_mode, "custom_pair_score_function8")) PWCL->evaluate_residue_pair=custom_pair_score_function8; - else if ( strm ( local_mode, "custom_pair_score_function9")) PWCL->evaluate_residue_pair=custom_pair_score_function9; - else if ( strm ( local_mode, "custom_pair_score_function10")) PWCL->evaluate_residue_pair=custom_pair_score_function10; - - - else - { - fprintf ( stderr, "\n%s is an unknown hasch mode, [FATAL]\n", local_mode); - myexit (EXIT_FAILURE); - } - - if ( PWCL->T[seq]); - else - { - PWCL->T[seq]=read_ca_trace (is_pdb_struc((CL->S)->name[seq]), "ATOM"); - (PWCL->T[seq])->pdb_param=pdb_param; - PWCL->T[seq]=trim_ca_trace (PWCL->T[seq], (CL->S)->seq[seq]); - PWCL->T[seq]=hasch_ca_trace(PWCL->T[seq]); - - } - - - return PWCL; -} - - - -int evaluate_ca_trace_nb (Constraint_list *CL, int s1, int r1, int s2, int r2) - { - - return (int)(neighborhood_match(CL, s1,r1, s2, r2, (CL->T[s1])->Chain,(CL->T[s2])->Chain )); - } -int evaluate_ca_trace_sap2_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2) - { - - - - return sap2_neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble ); - - } -int evaluate_ca_trace_sap1_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2) - { - /* - Function documentation: start - - int evaluate_ca_trace_sap1_bubble (Constraint_list *CL, int s1, int s2, int r1, int r2) - This function evaluates the cost for matching two residues: - - a1 is the cost for matching the two neighborood ( bubble type), using sap - a1: [0,+100], +100 is the best possible match. - a2 is the residue type weight: - min=worst substitution value - best=best of r1/r1, r2/r2-min - - a2=(r1/r2 -min)/best --> a1:[0, 100] - - score=a1*a2-->[-inf, +10000]; - */ - - - - float a1; - - - a1=(int) sap1_neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble ); - - return (int)a1; - - - } -int evaluate_ca_trace_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2) - { - /* - Function documentation: start - - int evaluate_ca_trace_bubble (Constraint_list *CL, int s1, int s2, int r1, int r2) - This function evaluates the cost for matching two residues: - - a1 is the cost for matching the two neighborood ( bubble type) - a1: [-inf,+100-scale], +100-scale is the best possible match. - - */ - - - - float a1; - - - - a1=(int) neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble )-((CL->T[s1])->pdb_param)->scale; - - return a1; - - - } -int evaluate_ca_trace_transversal (Constraint_list *CL, int s1, int r1, int s2, int r2) - { - return (int)(transversal_match (CL, s1, r1, s2, r2, (CL->T[s1])->Transversal,(CL->T[s2])->Transversal )); - } - -int evaluate_ca_trace_bubble_3 (Constraint_list *CL, int s1, int r1, int s2, int r2) - { - /*This Mode evaluates : - - 1-The Bubble - 2-The Match of the transversal residues - */ - - int a1, l1; - int a2, l2; - int a; - - l1=MAX(( (CL->T[s1])->Chain )->nb[r1][0] ,((CL->T[s2])->Chain )->nb[r2][0]); - l2=MAX(( (CL->T[s1])->Bubble)->nb[r1][0], ((CL->T[s2])->Bubble)->nb[r2][0]); - - a1=(int)(neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble )); - a2=(int)(transversal_match (CL, s1, r1, s2, r2, (CL->T[s1])->Transversal,(CL->T[s2])->Transversal )); - - if ( !l1 && !l2)return 0; - a=(a1+a2)/2; - return a; - } -int evaluate_ca_trace_bubble_2 (Constraint_list *CL, int s1, int r1, int s2, int r2) - { - /*This Mode evaluates : - 1-The Ca neighborhood - 2-The Bubble - */ - - - return (int)((neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Chain,(CL->T[s2])->Chain ))); - } - - -/*********************************************************************************************/ -/* */ -/* FUNCTIONS FOR COMPARING TWO NEIGHBORHOODS:START */ -/* */ -/*********************************************************************************************/ -float matrix_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2) - - { - /* - Function documentation: start - - float matrix_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2) - This function evaluates the matrix for matching two residues: - - min=worst substitution value - best=best of r1/r1, r2/r2-min - - a2=(r1/r2 -min)/best --> a1:[0, 100] - - score=a1*a2-->[-inf, +10000]; - */ - - - - float a2; - float m1, m2, m; - static float min=0; - int a, b; - - if ( !CL->M) - { - CL->M=read_matrice ( "pam250mt"); - min=CL->M[0][0]; - for ( a=0; a< 26; a++) - for ( b=0; b< 26; b++)min=MIN(min, CL->M[a][b]); - } - - if ( r1<=0 || r2<=0)return 0; - m1=CL->M[(CL->S)->seq[s1][r1-1]-'A'][(CL->S)->seq[s1][r1-1]-'A']-min; - m2=CL->M[(CL->S)->seq[s2][r2-1]-'A'][(CL->S)->seq[s2][r2-1]-'A']-min; - m=MAX(m1, m2); - a2=(CL->M[(CL->S)->seq[s1][r1-1]-'A'][(CL->S)->seq[s2][r2-1]-'A']-min)/m; - - return a2; - } - - -float transversal_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2) - { - int a, l1, l2; - float score=0; - float delta, max_delta; - float max; - Pdb_param*PP; - - PP=(CL->T[s1])->pdb_param; - max_delta=PP->max_delta; - - l1=nbs1->nb[r1][0]; - l2=nbs2->nb[r2][0]; - - if ( l1!=l2 || l1<(PP->N_ca)) return 0; - - - max=MAX(l1, l2)*max_delta; - for ( delta=0,a=0; a< l2 ; a++) - { - - delta+=max_delta-FABS((nbs1->d_nb[r1][a]-nbs2->d_nb[r2][a])); - } - score=(delta*100)/max; - - - - return score; - } - -float neighborhood_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2) - { - static float **table; - static int table_size; - int a, b, l1, l2; - float score=0; - float ins, del, sub; - float delta, max_delta; - float max; - Pdb_param*PP; - - - PP=(CL->T[s1])->pdb_param; - max_delta=PP->max_delta; - - - if ( r1> 0 && r2 >0) {r1--; r2--;} - else return 0; - - l1=nbs1->nb[r1][0]; - l2=nbs2->nb[r2][0]; - - if (table_size< (MAX(l1, l2)+1)) - { - table_size=MAX(l1, l2)+1; - if ( table)free_float (table, -1); - table=NULL; - } - if ( !table) table=declare_float (table_size, table_size); - - - max=MAX(l1, l2)*max_delta; - if ( max==0)return 0; - - - table[0][0]=0; - for ( b=1; b<=l2; b++) - { - table[0][b]=0; - } - for ( a=1; a<=l1; a++) - { - table[a][0]=0; - for ( b=1; b<=l2 ; b++) - { - - delta=max_delta-FABS((nbs1->d_nb[r1][a]-nbs2->d_nb[r2][b])); - - del=table[a-1][b]; - ins=table[a][b-1]; - sub= table[a-1][b-1]+delta; - - if ( del >= ins && del >= sub)score=del; - else if ( ins >= del && ins >= sub) score=ins; - else score=sub; - table[a][b]=score; - } - } - - - score=((((score)*100)/max)); - - - return score; - } - -float sap1_neighborhood_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2) - { - /* - Function documentation: start - - float sap1_neighborhood_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2) - This function is adapted from Taylor, Orengo, Protein Structure Alignment JMB 1989, (208)1-22 - It is the first function where - score= A/(|dra-drb|+b) - - Function documentation: end - */ - - static float **table; - static int table_size; - int a, b, l1, l2; - float score=0; - float ins, del, sub; - float delta; - float max; - - int A=50; - int B=5; - - - - - - - if ( r1> 0 && r2 >0) {r1--; r2--;} - else return 0; - - l1=nbs1->nb[r1][0]; - l2=nbs2->nb[r2][0]; - - if (table_size< (MAX(l1, l2)+1)) - { - table_size=MAX(l1, l2)+1; - if ( table)free_float (table, -1); - table=NULL; - } - if ( !table) table=declare_float (table_size, table_size); - - - max=MAX(l1, l2)*(A/B); - if ( max==0)return 0; - - - table[0][0]=0; - for ( b=1; b<=l2; b++) - { - table[0][b]=0; - } - for ( a=1; a<=l1; a++) - { - table[a][0]=0; - for ( b=1; b<=l2 ; b++) - { - - delta=A/(FABS((nbs1->d_nb[r1][a]-nbs2->d_nb[r2][b]))+B); - - del=table[a-1][b]; - ins=table[a][b-1]; - sub= table[a-1][b-1]+delta; - if ( del >= ins && del >= sub)score=del; - else if ( ins >= del && ins >= sub) score=ins; - else score=sub; - table[a][b]=score; - } - } - - - score=((score*100))/(max); - - - return score; - } - -float sap2_neighborhood_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2) - { - /* - Function documentation: start - - float sap1_neighborhood_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2) - This function is adapted from Taylor, Orengo, Protein Structure Alignment JMB 1989, (208)1-22 - It is the first function where - score= A/(|dra-drb|+b) - - Function documentation: end - */ - - static float **table; - static int table_size; - int a, b, l1, l2; - float score=0; - float ins, del, sub; - float delta; - float max; - - Amino_acid **pep1; - Amino_acid **pep2; - static Atom *vX_1, *vY_1, *vZ_1; - static Atom *vX_2, *vY_2, *vZ_2; - static Atom *ca1, *ca2; - float val; - - int A=50; - int B=2; - - - - - if ( r1> 0 && r2 >0) {r1--; r2--;} - else return 0; - - /*Make up the referencial*/ - pep1=(CL->T[s1])->peptide_chain; - pep2=(CL->T[s2])->peptide_chain; - - /*Get Referencial for CA1*/ - if ( (pep1[r1])->C)vX_1 =diff_atom(pep1[r1]->C,pep1[r1]->CA, vX_1); - if ( (pep1[r1])->N)vY_1 =diff_atom(pep1[r1]->N,pep1[r1]->CA, vY_1); - if ( (pep1[r1])->CB)vZ_1=diff_atom(pep1[r1]->CB,(pep1[r1])->CA,vZ_1); - else vZ_1=add_atom (vX_1, vY_1, vZ_1); - - - - - - /*Get Referencial for CA2*/ - if ( (pep2[r2])->C)vX_2 =diff_atom((pep2[r2])->C,(pep2[r2])->CA, vX_2); - if ( (pep2[r2])->N)vY_2 =diff_atom((pep2[r2])->N,(pep2[r2])->CA, vY_2); - if ( (pep2[r2])->CB)vZ_2=diff_atom((pep2[r2])->CB,(pep2[r2])->CA, vZ_2); - else vZ_2=add_atom (vX_2, vY_2, vZ_2); - - - - - /*END OF GETTING REFERENCIAL*/ - - /*Test - if ( r1>1 && r2>1) - { - fprintf (stdout,"\n\t*******"); - - fprintf (stdout, "RESIDUE %d %c", r1, (CL->S)->seq[s1][r1]); - if ( (pep1[r1])->CA)fprintf (stdout,"\n\tCA ");print_atom (pep1[r1]->CA ); - if ( (pep1[r1])->C)fprintf (stdout,"\n\tC ");print_atom (pep1[r1]->C ); - if ( (pep1[r1])->N)fprintf (stdout,"\n\tN ");print_atom (pep1[r1]->N ); - if ( (pep1[r1])->CB)fprintf (stdout,"\n\tCB ");print_atom (pep1[r1]->CB ); - fprintf (stdout,"\n\t*******"); - fprintf (stdout,"\n\tvX ");print_atom ( vX_1); - fprintf (stdout,"\n\tvY ");print_atom ( vY_1); - fprintf (stdout,"\n\tvZ ");print_atom ( vZ_1); - - ca1= copy_atom ((pep1[r1-1])->CA, ca1); - ca1 =diff_atom(ca1,(pep1[r1])->CA, ca1); - fprintf (stdout,"\n\tca ");print_atom ( ca1); - fprintf ( stdout, "\n\tSQ1=%d ", (int)square_atom(ca1)); - ca1=reframe_atom(vX_1, vY_1, vZ_1, ca1, ca1); - fprintf ( stdout, "\n\tSQ2=%d ", (int)square_atom(ca1)); - fprintf (stdout,"\n\tca ");print_atom ( ca1); - fprintf (stdout,"\n\n"); - } - */ - - l1=nbs1->nb[r1][0]; - l2=nbs2->nb[r2][0]; - - if (table_size< (MAX(l1, l2)+1)) - { - table_size=MAX(l1, l2)+1; - if ( table)free_float (table, -1); - table=NULL; - } - if ( !table) table=declare_float (table_size, table_size); - - - max=MAX(l1, l2)*(A/B); - - if ( max==0)return 0; - - - table[0][0]=0; - for ( b=1; b<=l2; b++) - { - table[0][b]=0; - } - - for ( a=1; a<=l1; a++) - { - ca1=copy_atom ((CL->T[s1])->structure[nbs1->nb[r1][a]], ca1); - ca1=diff_atom(ca1,(pep1[r1])->CA, ca1); - ca1=reframe_atom(vX_1, vY_1, vZ_1, ca1, ca1); - - table[a][0]=0; - for ( b=1; b<=l2 ; b++) - { - ca2 =copy_atom((CL->T[s2])->structure[nbs2->nb[r2][b]], ca2); - ca2 =diff_atom(ca2,(pep2[r2])->CA, ca2); - ca2 =reframe_atom(vX_2, vY_2, vZ_2, ca2, ca2); - - ca2=diff_atom(ca2,ca1,ca2); - val=square_atom (ca2); - - val=(float)sqrt ((double)val); - - delta=A/(val+B); - - - del=table[a-1][b]; - ins=table[a][b-1]; - sub= table[a-1][b-1]+delta; - - if ( del >= ins && del >= sub)score=del; - else if ( ins >= del && ins >= sub) score=ins; - else score=sub; - table[a][b]=score; - } - } - - - score=(((score*100))/(max)-50); - - - return score; - } - -/*********************************************************************************************/ -/* */ -/* APDB */ -/* */ -/*********************************************************************************************/ -float **** irmsdmin_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL, Pdb_param *PP, FILE *fp) -{ - int s1, s2, a,col1, n,n2=0, t,flag; - int **pos, **list; - float nirmsd, min_nirmsd,max_nirmsd,ref_sum, sum, sum2; - float **normalized_len; - - normalized_len=declare_float (A->nseq+1, A->nseq+1); - for (s1=0; s1nseq; s1++) - { - int l1, l2, r1, r2, p; - for (s2=0; s2nseq; s2++) - { - for ( l1=l2=p=0; p< A->len_aln; p++) - { - r1=A->seq_al[s1][p]; - r2=A->seq_al[s2][p]; - if (!is_gap(r1) && isupper(r1))l1++; - if (!is_gap(r2) && isupper(r2))l2++; - } - normalized_len[s1][s2]=MIN(l1,l2); - } - } - - pos=aln2pos_simple (A, A->nseq); - for ( s1=0; s1< A->nseq; s1++) - for ( s2=0; s2nseq; s2++) - { - if ( s1==s2) continue; - else if (!(CL->T[A->order[s1][0]]) || !(CL->T[A->order[s2][0]]))continue; - - list=declare_int (A->len_aln, 2); - - for ( sum=0,n=0,col1=0; col1< A->len_aln; col1++) - { - if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1]))continue; - else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ) continue; - else if ( residues[s1][s2][pos[s1][col1]-1][0]==0)continue; - - list[n][0]=pos[s1][col1]-1; - list[n][1]=(int)100000*residues[s1][s2][pos[s1][col1]-1][4]; - sum2+=residues[s1][s2][pos[s1][col1]-1][4]; - n++; - } - - if (n==0)return residues; - - sort_int_inv (list, 2, 1,0, n-1); - for (sum=0,a=0; a=max_nirmsd) && flag==1)break; - n2=a; - } - - sum=ref_sum; - for (a=0; anirmsd_graph) - { - fprintf ( stdout, "\n_NIRMSD_GRAPH %s %s POS: %4d Removed: %4d NiRMSD: %.2f", A->name[s1], A->name[s2], list[a][0],a,(nirmsd/100000)*normalized_len[s1][s2]); - } - } - } - - if ( PP->print_rapdb) - { - for ( a=0; a0 && a<=t)fprintf ( stdout, "\nRAPDB QUANTILE REMOVE S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]); - else if ( list[a][1]>0 && a>t)fprintf ( stdout, "\nRAPDB QUANTILE KEEP S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]); - } - } - - fprintf ( stdout, "\n# MINIMISATION FILTER ON: NiRMSD minimsation resulted in the removal of %d [out of %d] Columns On the alignment %s Vs %s\n", t, n, A->name[s1], A->name[s2]); - for ( a=0; a<=t; a++) - { - - residues[s1][s2][list[a][0]][0]=0; - residues[s1][s2][list[a][0]][1]=0; - residues[s1][s2][list[a][0]][2]=0; - residues[s1][s2][list[a][0]][3]=0; - residues[s1][s2][list[a][0]][4]=-1; - - } - - free_int (list, -1); - } - free_float (normalized_len, -1); - return residues; -} -float **** quantile_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL, Pdb_param *PP,FILE *fp) -{ - int s1, s2, a,col1, n, t; - int **pos, **list; - - pos=aln2pos_simple (A, A->nseq); - for ( s1=0; s1< A->nseq; s1++) - for ( s2=0; s2nseq; s2++) - { - if ( s1==s2) continue; - else if (!(CL->T[A->order[s1][0]]) || !(CL->T[A->order[s2][0]]))continue; - - list=declare_int (A->len_aln, 2); - - for ( n=0,col1=0; col1< A->len_aln; col1++) - { - if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1]))continue; - else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ) continue; - - list[n][0]=pos[s1][col1]-1; - list[n][1]=(int)100*residues[s1][s2][pos[s1][col1]-1][4]; - n++; - - } - - sort_int_inv (list, 2, 1,0, n-1); - - t=quantile_rank ( list,1, n,PP->filter); - - if ( PP->print_rapdb) - { - for ( a=0; a0 && a0 && a>t)fprintf ( stdout, "\nRAPDB QUANTILE KEEP S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]); - } - } - - for ( a=0; aCL; - - for ( s1=0; s1< (A->S)->nseq; s1++) - if ( CL->T[s1]){PP=(CL->T[s1])->pdb_param;break;} - - if (PP->irmsd_graph)irmsd_graph =vfopen ((irmsd_file =vtmpnam (NULL)),"w"); - - fprintf ( fp, "\nAPDB_RESULT_FORMAT_02\n"); - residues=analyse_pdb_residues ( A, A->CL,PP); - if ( PP->filter>=0)residues=quantile_apdb_filtration (A, residues, A->CL,PP, fp); - else if ( PP->filter<0)residues=irmsdmin_apdb_filtration (A, residues, A->CL,PP, fp); - - pos=aln2pos_simple (A, A->nseq); - - - - - - /*Compute the alignment length for normalization*/ - normalize_len=declare_float (A->nseq+1, A->nseq+1); - for (s1=0; s1nseq; s1++) - { - int l1, l2, r1, r2; - for (s2=0; s2nseq; s2++) - { - for ( l1=l2=p=0; p< A->len_aln; p++) - { - r1=A->seq_al[s1][p]; - r2=A->seq_al[s2][p]; - if (!is_gap(r1) && isupper(r1))l1++; - if (!is_gap(r2) && isupper(r2))l2++; - } - normalize_len[s1][s2]=MIN(l1,l2); - } - } - - msa_len=msa_tot=msa_m1=msa_m2=msa_m3=msa_m4=msa_m5=0; - - for ( s1=0; s1< A->nseq; s1++) - { - if ( !(CL->T[A->order[s1][0]]))continue; - seq_len=seq_tot=seq_m1=seq_m2=seq_m3=seq_m4=seq_m5=0; - for ( s2=0; s2< A->nseq; s2++) - { - if ( s1==s2)continue; - if ( !(CL->T[A->order[s2][0]]))continue; - pair_tot=pair_m1=pair_m2=pair_m3=pair_m4=pair_m5=0; - for ( p=0; p< A->len_aln; p++) - { - r1=A->seq_al[s1][p]; - r2=A->seq_al[s2][p]; - b=pos[s1][p]-1; - - - if (PP->filter_aln) - { - if (is_gap(r1) || is_gap(r2) || residues[s1][s2][b][0]==0) - { - A->seq_al[s1][p]=tolower(r1); - A->seq_al[s2][p]=tolower(r2); - } - else - { - A->seq_al[s1][p]=toupper(r1); - A->seq_al[s2][p]=toupper(r2); - } - - } - - if ( PP->irmsd_graph && ( is_gap(r1) || is_gap(r2) || residues[s1][s2][b][0]==0)) - { - - fprintf ( irmsd_graph, "\n_IRMSD_GRAPH %10s %10s ALN: %c%c iRMSD: -1.00", A->name[s1], A->name[s2],A->seq_al[s1][p], A->seq_al[s2][p]); - } - - if (is_gap(r1) || is_gap(r2) || residues[s1][s2][b][0]==0)continue; - pair_tot++; - - /*APDB*/ - m2=(residues[s1][s2][b][2]*100)/residues[s1][s2][b][0]; - if (m2>PP->similarity_threshold){pair_m3++;} - - /*iRMSD*/ - - m4=residues[s1][s2][b][4]; - - if ( PP->irmsd_graph ) - { - fprintf ( irmsd_graph, "\nIRMSD_GRAPH %10s %10s ALN: %c%c iRMSD: %.2f", A->name[s1], A->name[s2],A->seq_al[s1][p], A->seq_al[s2][p], m4); - } - pair_m4+=m4; - } - pair_len=normalize_len[s1][s2]; - if ( s1>s2) - { - - fprintf ( pairwise, "\n\n#PAIRWISE: %s Vs %s",A->name[s1], A->name[s2]); - fprintf ( pairwise, "\n\tPAIRWISE EVALUATED: %6.2f %% [%s Vs %s] ", (pair_len==0)?-1:(pair_tot*100)/pair_len,A->name[s1], A->name[s2]); - fprintf ( pairwise, "\n\tPAIRWISE APDB: %6.2f %% [%s Vs %s] ", (pair_tot==0)?-1:(pair_m3*100)/pair_tot,A->name[s1], A->name[s2]); - fprintf ( pairwise, "\n\tPAIRWISE iRMSD: %6.2f Angs [%s Vs %s]", (pair_tot==0)?-1:pair_m4/pair_tot,A->name[s1], A->name[s2]); - fprintf ( pairwise, "\n\tPAIRWISE NiRMSD: %6.2f Angs [%s Vs %s] [%d pos]", (pair_tot==0)?-1:(pair_m4*pair_len)/(pair_tot*pair_tot), A->name[s1], A->name[s2], (int)pair_tot); - fprintf ( pairwise, "\n\tRAPDB PAIRS PAIRWISE N_NONEMPTY_PAIRS %d N_MAXIMUM_PAIRS %d",(int) pair_tot, (int)pair_len); - msa_m3+=pair_m3; - msa_m4+=pair_m4; - msa_tot+=pair_tot; - msa_len+=pair_len; - } - seq_m3+=pair_m3; - seq_m4+=pair_m4; - seq_tot+=pair_tot; - seq_len+=pair_len; - - } - - fprintf ( average, "\n\n#AVERAGE For Sequence %s", A->name[s1]); - fprintf ( average, "\n\tAVERAGE EVALUATED: %6.2f %% [%s]", (seq_len==0)?-1:(seq_tot*100)/seq_len, A->name[s1]); - fprintf ( average, "\n\tAVERAGE APDB: %6.2f %% [%s]", (seq_tot==0)?-1:(seq_m3*100)/seq_tot, A->name[s1]); - fprintf ( average, "\n\tAVERAGE iRMSD: %6.2f Angs [%s]", (seq_tot==0)?-1:seq_m4/seq_tot, A->name[s1]); - fprintf ( average, "\n\tAVERAGE NiRMS: %6.2f Angs [%s]", (seq_tot==0)?-1:(seq_m4*seq_len)/(seq_tot*seq_tot), A->name[s1]); - if ( strm (PP->color_mode, "apdb"))ST->score_seq[s1]=(seq_tot==0)?-1:(seq_m3*100)/pair_tot; - if (PP->print_rapdb)fprintf (average, "\n\tRAPDB PAIRS AVERAGE N_NONEMPTY_PAIRS %d N_MAXIMUM_PAIRS %d", (int)pair_tot, (int)pair_len); - - if ( strm (PP->color_mode, "irmsd"))ST->score_seq[s1]=(seq_tot==0)?-1:10*((seq_m4*pair_len)/(seq_tot*seq_tot)); - - } - fprintf ( total, "\n\n#TOTAL for the Full MSA"); - fprintf ( total, "\n\tTOTAL EVALUATED: %6.2f %% ", (msa_len==0)?-1:(msa_tot*100)/msa_len); - fprintf ( total, "\n\tTOTAL APDB: %6.2f %% ", (msa_tot==0)?-1:(msa_m3*100)/msa_tot); - fprintf ( total, "\n\tTOTAL iRMSD: %6.2f Angs", (msa_tot==0)?-1:msa_m4/msa_tot); - fprintf ( total, "\n\tTOTAL NiRMSD: %6.2f Angs", (msa_tot==0)?-1:(msa_m4*msa_len)/(msa_tot*msa_tot)); - if (PP->print_rapdb)fprintf (total, "\n\tRAPDB PAIRS TOTAL N_NONEMPTY_PAIRS: %d N_MAXIMUM_PAIRS %d", (int)msa_tot, (int)msa_len); - - if ( strm (PP->color_mode, "apdb")) ST->score_aln=ST->score=A->score_aln=A->score=(msa_tot==0)?-1:(msa_m3*100)/msa_tot; - if ( strm (PP->color_mode, "irmsd"))ST->score_aln=ST->score=A->score_aln=A->score=(msa_tot==0)?-1:10*((msa_m4*msa_len)/(msa_tot*msa_tot)); - - vfclose (average);vfclose (total); vfclose (pairwise);if (PP->irmsd_graph)vfclose (irmsd_graph); - fp=display_file_content (fp, pairwise_file); - fp=display_file_content (fp, average_file); - fp=display_file_content (fp, total_file); - if ( PP->irmsd_graph)fp=display_file_content (fp, irmsd_file); - - fprintf ( fp, "\n\n# EVALUATED: Fraction of Pairwise Columns Evaluated\n"); - fprintf ( fp, "# APDB: Fraction of Correct Columns according to APDB\n"); - fprintf ( fp, "# iRMDS: Average iRMSD over all evaluated columns\n"); - fprintf ( fp, "# NiRMDS: iRMSD*MIN(L1,L2)/Number Evaluated Columns\n"); - fprintf ( fp, "# Main Parameter: -maximum_distance %.2f Angstrom\n", PP->maximum_distance); - - fprintf ( fp, "# Undefined values are set to -1 and indicate LOW Alignment Quality\n"); - fp=print_program_information (fp, NULL); - - - - - /*Color Output*/ - for (iRMSD_max=0,iRMSD_min=10000,s1=0; s1nseq; s1++) - for ( s2=0; s2< A->nseq; s2++) - for (p=0; plen_aln; p++) - { - if ( residues[s1][s2][p][4]>0) - { - iRMSD_max=MAX(iRMSD_max, residues[s1][s2][p][4]); - iRMSD_min=MAX(iRMSD_min, residues[s1][s2][p][4]); - } - - } - iRMSD_unit=iRMSD_max/8; - - for (p=0; p< A->len_aln; p++) - for ( s1=0; s1< A->nseq; s1++) - { - - for ( p=0; p< A->len_aln; p++) - { - r1=A->seq_al[s1][p]; - b=pos[s1][p]-1; - if ( is_gap(r1) || !(CL->T[A->order[s1][0]])) - ST->seq_al[s1][p]=NO_COLOR_RESIDUE; - else - { - float tot_m2=0, tot_m4=0, v=0; - seq_m2=seq_m4=0; - - for (s2=0; s2< A->nseq; s2++) - { - r2=A->seq_al[s1][p]; - if ( s1==s2) continue; - if (is_gap(r2) || !(CL->T[A->order[s1][0]]) || residues[s1][s2][b][0]==0)continue; - - seq_m2+=m2=(residues[s1][s2][b][2]*100)/residues[s1][s2][b][0]; - tot_m2++; - - m4=residues[s1][s2][b][4]; - if (m4>=0) - { - seq_m4+=m4; - tot_m4++; - } - } - - if (strm ( PP->color_mode, "apdb")) - { - if (tot_m2==0)v=NO_COLOR_RESIDUE; - else v=MIN((seq_m2/(10*tot_m2)),9); - } - else if ( strm (PP->color_mode, "irmsd")) - { - if ( tot_m4==0)v=NO_COLOR_RESIDUE; - else v=(8-(int)((seq_m4/(iRMSD_unit*tot_m4))))+1; - } - ST->seq_al[s1][p]=v; - - } - } - } - for ( p=0; plen_aln; p++) ST->seq_al[A->nseq][p]=NO_COLOR_RESIDUE; - - - ST->generic_comment=vcalloc ( 100, sizeof (int)); - if ( strm (PP->color_mode, "apdb")) - { - sprintf ( ST->generic_comment, "# APDB Evaluation: Color Range Blue-[0 %% -- 100 %%]-Red\n# Sequence Score: APDB\n# Local Score: APDB\n\n"); - } - else if ( strm (PP->color_mode, "irmsd")) - { - sprintf ( ST->generic_comment, "\n# iRMSD Evaluation:\n# Sequence score: NiRMSD (Angstrom*10)\n# Local Score: iRMSD, Blue-[%.2f Ang. -- 0.00 Ang.]-Red \n", iRMSD_max); - } - - fprintf ( fp, "\n"); - vfclose (fp); - free_int (pos, -1); - return ST; - } -float **** analyse_pdb_residues ( Alignment *A, Constraint_list *CL, Pdb_param *pdb_param) - { - - int **pos; - int s1, s2, rs1, rs2; - int col1, col2; - float ****distances; - - /*Distances[Nseq][len_aln][4] - distances...[0]: Number of residues within the bubble - distances...[1]: Absolute difference of distance of residues within Bubble - distances...[2]: Number of residues within the bubble with Delta dist < md_threshold - distances ..[3]: Sum of squared difference of distances - distances ..[4]: iRMSD - */ - float d1, d2,delta; - int wd1, wd2; - int in_bubble=0; - int real_res1_col1=0; - int real_res1_col2; - int real_res2_col1; - int real_res2_col2; - Pdb_param *PP; - int print_rapdb; - float nrapdb, rapdb; - Alignment *BA=NULL; - - PP=pdb_param; - print_rapdb=PP->print_rapdb; - - distances=declare_arrayN(4, sizeof (float), A->nseq, A->nseq, 0, 0); - - /*Pre-computation of the internal distances----> T[seq]->ca_dist[len][len]*/ - /*Can be avoided if distance_on_request set to 1 */ - - for ( s1=0; s1< A->nseq; s1++) - { - rs1=A->order[s1][0]; - if (CL->T[rs1] && !(CL->T[rs1])->ca_dist)(CL->T[rs1])->ca_dist=measure_ca_distances(CL->T[rs1]); - for ( s2=0; s2< A->nseq; s2++) - { - distances[s1][s2]=declare_float ( A->len_aln, 6); - } - } - pos=aln2pos_simple (A, A->nseq); - - for ( s1=0; s1< A->nseq; s1++) - for ( col1=0; col1< A->len_aln; col1++) - for ( s2=0; s2nseq; s2++) - { - rs1=A->order[s1][0]; - rs2=A->order[s2][0]; - rapdb=0; - nrapdb=0; - if ( s1==s2) continue; - else if (!(CL->T[rs1]) || !(CL->T[rs2]))continue; - else if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1]))continue; - else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ) continue; - - if ( print_rapdb && s2>s1) - { - - fprintf ( stdout, "RAPDB S1: %s S2: %s POS %d %d %c %d %c ", A->name[s1], A->name[s2], col1+1, pos[s1][col1],A->seq_al[s1][col1], pos[s2][col1],A->seq_al[s2][col1]); - BA=copy_aln (A, BA); - lower_string (BA->seq_al[s1]); - lower_string (BA->seq_al[s2]); - BA->seq_al[s1][col1]=toupper (BA->seq_al[s1][col1]); - BA->seq_al[s2][col1]=toupper (BA->seq_al[s2][col1]); - } - - for ( col2=0; col2len_aln; col2++) - { - - if (pos[s1][col2]<=0 || pos[s2][col2]<=0 )continue; - else if ( FABS((pos[s1][col2]-pos[s1][col1]))<=PP->n_excluded_nb)continue; - else if ( FABS((pos[s2][col2]-pos[s2][col1]))<=PP->n_excluded_nb)continue; - else if ( islower (A->seq_al[s1][col2]) || islower ( A->seq_al[s2][col2]))continue; - - real_res1_col1=pos[s1][col1]-1; - real_res1_col2=pos[s1][col2]-1; - - real_res2_col1=pos[s2][col1]-1; - real_res2_col2=pos[s2][col2]-1; - - d1=(CL->T[rs1])->ca_dist[real_res1_col1][real_res1_col2]; - d2=(CL->T[rs2])->ca_dist[real_res2_col1][real_res2_col2]; - - if ( d1==UNDEFINED || d2 == UNDEFINED) continue; - - - - if ( strm ( PP->local_mode, "sphere")) - { - in_bubble= (d1maximum_distance && d2maximum_distance)?1:0; ; - } - else if ( strm ( PP->local_mode, "window")) - { - wd1=FABS((pos[s1][col2]-pos[s1][col1])); - wd2=FABS((pos[s2][col2]-pos[s2][col1])); - in_bubble= (wd1maximum_distance && wd2maximum_distance)?1:0; ; - } - - if (in_bubble) - { - if ( print_rapdb && s2 >s1) - { - fprintf ( stdout, "NB %d %d %c %d %c ", col2, pos[s1][col2], A->seq_al[s1][col2], pos[s2][col2], A->seq_al[s2][col2]); - BA->seq_al[s1][col2]=toupper (BA->seq_al[s1][col2]); - BA->seq_al[s2][col2]=toupper (BA->seq_al[s2][col2]); - } - delta=FABS((d1-d2)); - if (deltamd_threshold) - distances[s1][s2][real_res1_col1][2]++; - distances[s1][s2][real_res1_col1][1]+=delta; - distances[s1][s2][real_res1_col1][0]++; - distances[s1][s2][real_res1_col1][3]+=delta*delta; - nrapdb++; - rapdb+=delta*delta; - } - } - - if ( nrapdb==0)distances[s1][s2][real_res1_col1][4]=-1; - else distances[s1][s2][real_res1_col1][4]=(float)sqrt((double)(rapdb/nrapdb)); - - if ( print_rapdb && s2>s1) - { - if (nrapdb==0) - { - fprintf ( stdout, "APDB: UNDEFINED\n"); - } - else - { - - fprintf ( stdout, " APDB: %.2f ",(float)sqrt((double)(rapdb/nrapdb))); - BA->residue_case=KEEP_CASE;unalign_residues (BA, s1, s2); - fprintf ( stdout,"SEQ1: %s %s SEQ2: %s %s\n", BA->name[s1], BA->seq_al[s1], BA->name[s2], BA->seq_al[s2]); - } - } - - } - - free_aln (BA); - free_int (pos, -1); - return distances; - } - - - -Alignment * msa2struc_dist ( Alignment *A, Alignment *ST, char *results) - { - - int **pos, c; - FILE *tl; - int s1, s2, rs1, rs2; - int col1, col2; - float ****distances; - float **dm; - int **count; - int **dm_int; - float min, max; - - /*Distances[Nseq][len_aln][4] - distances...[0]: Number of residues within the bubble - distances...[1]: Absolute difference of distance of residues within Bubble - distances...[2]: Number of residues within the bubble with Delta dist < md_threshold - distances ..[3]: Sum of squared difference of distances - distances ..[4]: iRMSD - */ - Pdb_param *pdb_param; - Constraint_list *CL; - int a, b, ncol; - float d1, d2,delta; - int wd1, wd2; - int in_bubble=0; - int real_res1_col1=0; - int real_res1_col2; - int real_res2_col1; - int real_res2_col2; - Pdb_param *PP; - int print_rapdb; - float nrapdb, rapdb; - Alignment *BA=NULL; - NT_node *T0,*T1,*T2,*PT, *POS; - NT_node BT0, BT10,BT50, BT100,RBT; - char **pair_pos_list; - - int ntree=0, ntree2; - - Alignment *B; - char *pos_list; - char *tot_pos_list; - char *struc_tree10; - char *struc_tree100; - char *struc_tree50; - char *struc_tree0; - - char *color_struc_tree; - int **score; - int proceed=1; - - - - declare_name(tot_pos_list); - sprintf ( tot_pos_list, "%s.tot_pos_list", results); - - declare_name(pos_list); - sprintf ( pos_list, "%s.pos_list", results); - - declare_name(struc_tree0); - sprintf ( struc_tree0, "%s.struc_tree_full",results); - - declare_name(struc_tree10); - sprintf ( struc_tree10, "%s.struc_tree10",results); - - declare_name(struc_tree100); - sprintf ( struc_tree100, "%s.struc_tree100",results); - - declare_name(struc_tree50); - sprintf ( struc_tree50, "%s.struc_tree50",results); - - declare_name(color_struc_tree); - sprintf ( color_struc_tree, "%s.struc_tree.html", results); - - pair_pos_list=declare_char (A->len_aln*A->len_aln+1, 100); - T1=vcalloc (A->len_aln*A->len_aln+1, sizeof (NT_node)); - T2=vcalloc (A->len_aln+1, sizeof (NT_node)); - - PT=vcalloc (A->len_aln*A->len_aln+1, sizeof (NT_node)); - POS=vcalloc (A->len_aln+1, sizeof (NT_node)); - - CL=A->CL; - - //Check all sequences have a PDB structure - for (a=0; anseq; a++) - { - if ( ! seq2P_template_file(A->S,a)) - { - fprintf ( stderr, "\n--- ERROR: %s has no structural template. All sequence in the MSA must have a known structure [FATAL]\n", (A->name[a])); - proceed=0; - } - } - if (!proceed) - printf_exit (EXIT_FAILURE, stderr, "\n\n---- ERROR: All provided sequences must have a valid PDB identifier [FATAL:tRMSD-%s]\n\n", PROGRAM); - - for ( s1=0; s1< (A->S)->nseq; s1++) - if ( CL->T[s1]){PP=(CL->T[s1])->pdb_param;break;} - - for ( s1=0; s1< A->nseq; s1++) - { - rs1=A->order[s1][0]; - if (CL->T[rs1] && !(CL->T[rs1])->ca_dist)(CL->T[rs1])->ca_dist=measure_ca_distances(CL->T[rs1]); - } - pos=aln2pos_simple (A, A->nseq); - dm=declare_float (A->nseq, A->nseq); - dm_int=declare_int (A->nseq, A->nseq); - count=declare_int (A->nseq, A->nseq); - PP->maximum_distance=10000; - - tl=vfopen (tot_pos_list, "w"); - for (ncol=0,ntree=0, col1=0; col1< A->len_aln; col1++) - { - int tree, cont; - output_completion (stderr, col1, A->len_aln,1, "Sample Columns"); - for (cont=1,s1=0; s1nseq; s1++)if ( is_gap (A->seq_al[s1][col1]))cont=0;//Stop if gap in column 1 - - if ( cont==0)continue; - - for (s1=0; s1nseq; s1++)for ( s2=0; s2nseq; s2++){count[s1][s2]=0;dm[s1][s2]=0;} - - for ( ntree2=0,col2=0; col2len_aln; col2++) - { - for (s1=0; s1< A->nseq-1; s1++) - { - for ( s2=s1+1; s2nseq; s2++) - { - rs1=A->order[s1][0]; - rs2=A->order[s2][0]; - cont=1; - - if ( s1==s2){dm[s1][s2]=0;continue;} - else if (!(CL->T[rs1]) || !(CL->T[rs2])){cont=0;} - else if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1])){cont=0;} - else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ){cont=0;} - if (pos[s1][col2]<=0 || pos[s2][col2]<=0 ){cont=0;}//stop if Gap in Column 2 - else if ( FABS((pos[s1][col2]-pos[s1][col1]))<=PP->n_excluded_nb){cont=0;} - else if ( FABS((pos[s2][col2]-pos[s2][col1]))<=PP->n_excluded_nb){cont=0;} - else if ( islower (A->seq_al[s1][col2]) || islower ( A->seq_al[s2][col2])){cont=0;} - if ( cont==0){continue;} - - - real_res1_col1=pos[s1][col1]-1; - real_res1_col2=pos[s1][col2]-1; - - real_res2_col1=pos[s2][col1]-1; - real_res2_col2=pos[s2][col2]-1; - - d1=(CL->T[rs1])->ca_dist[real_res1_col1][real_res1_col2]; - d2=(CL->T[rs2])->ca_dist[real_res2_col1][real_res2_col2]; - - if ( d1==UNDEFINED || d2 == UNDEFINED) continue; - - if ( strm ( PP->local_mode, "sphere")) - { - in_bubble= (d1maximum_distance && d2maximum_distance)?1:0; ; - } - else if ( strm ( PP->local_mode, "window")) - { - wd1=FABS((pos[s1][col2]-pos[s1][col1])); - wd2=FABS((pos[s2][col2]-pos[s2][col1])); - in_bubble= (wd1maximum_distance && wd2maximum_distance)?1:0; ; - } - if (in_bubble) - { - delta=FABS((d1-d2)); - //delta=delta*delta; - dm[s1][s2]=dm[s2][s1]+=delta; - count[s1][s2]++; - count[s2][s1]++; - } - } - } - } - - - min=max=-1; - for (tree=1,s1=0; s1nseq-1; s1++) - for (s2=s1+1; s2nseq; s2++) - { - if ( count [s1][s2])dm[s1][s2]=dm[s2][s1]=dm[s1][s2]/(float)count[s1][s2]; - else - { - tree=0; - } - if (s1==0 && s2==1)min=max=dm[s1][s2]; - min=MIN(dm[s1][s2], min); - max=MAX(dm[s1][s2], max); - } - if (!tree || min==-1)continue; - for (s1=0; s1nseq-1; s1++) - for (s2=s1+1; s2nseq; s2++) - { - dm_int[s1][s2]=dm_int[s2][s1]=((dm[s1][s2])/(max))*100; - } - POS[col1]=T1[ntree]=compute_std_tree_2 ( A, dm_int, "_TMODE_upgma"); - fprintf (tl, "\n>Tree_%d Column\n", col1+1); - print_tree (T1[ntree], "newick", tl); - ntree++; - } - - vfclose (tl); - if (!ntree) - { - fprintf ( stderr, "\nERROR: No suitable pair of column supporting a tree [FATAL]\n", (A->name[a])); - exit (EXIT_SUCCESS); - } - - score=treelist2avg_treecmp (T1, NULL); - - display_output_filename( stdout,"TreeList","newick",tot_pos_list, CHECK); - - if ((BT10=treelist2filtered_bootstrap (T1, NULL,score, 0.1))) - { - vfclose (print_tree (BT10,"newick", vfopen (struc_tree10, "w"))); - display_output_filename( stdout,"Tree","newick",struc_tree10, CHECK); - } - - if ((BT50=treelist2filtered_bootstrap (T1, NULL, score,0.5))) - { - vfclose (print_tree (BT50,"newick", vfopen (struc_tree50, "w"))); - display_output_filename( stdout,"Tree","newick",struc_tree50, CHECK); - } - - if ((BT100=treelist2filtered_bootstrap (T1, NULL,score, 1.0))) - { - vfclose (print_tree (BT100,"newick", vfopen (struc_tree100, "w"))); - display_output_filename( stdout,"Tree","newick",struc_tree100, CHECK); - } - - - RBT=BT100; - if (RBT) - { - B=copy_aln (A, NULL); - for (a=0; alen_aln; a++) - { - int score; - Tree_sim *S; - - if (POS[a]) - { - S=tree_cmp (POS[a], RBT); - score=S->uw/10; - vfree (S); - } - else - { - score=NO_COLOR_RESIDUE; - } - - for (b=0; bnseq; b++) - { - if ( is_gap (B->seq_al[b][a]) || score == NO_COLOR_RESIDUE) - { - B->seq_al[b][a]=NO_COLOR_RESIDUE; - } - else - { - B->seq_al[b][a]=S->uw/10; - } - } - } - - output_format_aln ("score_html", A,B,color_struc_tree); - display_output_filename( stdout,"Colored MSA","score_html",color_struc_tree, CHECK); - free_aln (BA); - } - free_int (pos, -1); - exit (EXIT_SUCCESS); - return NULL; - } - -float square_atom ( Atom *X) -{ - - return X->x*X->x + X->y*X->y + X->z*X->z; -} -Atom* reframe_atom ( Atom *X, Atom*Y, Atom *Z, Atom *IN, Atom *R) - { - float new_x, new_y, new_z; - - if ( R==NULL)R=vcalloc ( 1, sizeof (Atom)); - - - new_x= X->x*IN->x + Y->x*IN->y +Z->x*IN->z; - new_y= X->y*IN->x + Y->y*IN->y +Z->y*IN->z; - new_z= X->z*IN->x + Y->z*IN->y +Z->z*IN->z; - - R->x=new_x; - R->y=new_y; - R->z=new_z; - return R; - } - -Atom* add_atom ( Atom *A, Atom*B, Atom *R) -{ - if ( R==NULL)R=vcalloc ( 1, sizeof (Atom)); - - R->x=A->x+B->x; - R->y=A->y+B->y; - R->z=A->z+B->z; - - return R; -} -Atom* diff_atom ( Atom *A, Atom*B, Atom *R) -{ - if ( R==NULL)R=vcalloc ( 1, sizeof (Atom)); - - R->x=A->x-B->x; - R->y=A->y-B->y; - R->z=A->z-B->z; - - return R; -} - -Atom * copy_atom ( Atom *A, Atom*R) -{ - if ( R==NULL)R=vcalloc ( 1, sizeof (Atom)); - R->num=A->num; - R->res_num=A->res_num; - R->x=A->x; - R->y=A->y; - R->z=A->z; - - sprintf( R->type, "%s", A->type); - return R; -} - void print_atom (Atom *A) -{ - fprintf ( stdout, "%.2f %.2f %.2f", A->x, A->y, A->z); -} -/************************************************************************/ -/* */ -/* NUSSINOV */ -/* */ -/************************************************************************/ - -/*---------prototypes ----------*/ -static void computeBasePairMatrix(int**M,char*S,int l, int T); -static int backtrack(int a,int b,int**M,char*S,char*P, int T); - - - -static int basePair(char x, char y) -{ - static short **mat; - - if (!mat) - { - char alp[20]; - int a, b, c1, c2, lc1, lc2; - mat=declare_short (256, 256); - sprintf ( alp, "AGCTUagctu"); - for (a=0; amax ){ - max = numBasePairs[i][j-1]; - index = n; - // j not basepaired with some k such that i max ){ - max = val; - index=i; - } - for(k=i; k<=j-THRESHOLD; k++){ - val = basePair(S[k],S[j]) + numBasePairs[i][k-1] - + numBasePairs[k+1][j-1]; - if (val > max) { - max = val; - index=k; - } - } - numBasePairs[i][j]=max; - if (index -#include -#include -// #include -#include -// #include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" -#include "fastal_lib_header.h" -#include "fast_tree_header.h" - - -//TODO: seq_pair2diagonal delete num points from parameters -//TODO: reuse list - - -//Fastal_param *param_set; - - -/*! \mainpage T-Coffee Index Page - * - * \section intro_sec Introduction - * - * This is the introduction. - * - * \section install_sec Installation - * - * \subsection step1 Step 1: Opening the box - * - * etc... - * \section fastal_sec Fastal - * - * This program is a very fast aligner. It is capable of aligning huge sets of sequences because it keeps as much as necessary on hard disk. - */ - - - - - - - -/*! - * \file fastal.c - * \brief Source code for the fastal algorithm - */ - - -/** - * \brief Calculates scores for diagonal segments. - * - * \param seq1 Sequence 1 - * \param seq2 Sequence 2 - * \param *diagonals The diagonals. Three consecutive entries belong togehter. 1. pos in \a seq1 , 2. pos in \a seq2 and 3. length of diagonal - * \param num_diagonals Number of diagonals - * \param s1_length Length of \a seq1 - * \param list length of list. - * \param list An 2-dim array to save the scores in. - * \return new list - */ -int ** -diag2pair_list(char* seq1, - char* seq2, - int *diagonals, - int num_diagonals, - int ***list_in, - int *current_length, - int *current_num_points, - int additional_needed, - Fastal_param *param_set) -{ - int **mat = param_set->M; - int i, j, diag_length, pos1, pos2; - int **list = list_in[0]; - -// printf("NUM: %i\n",num_diagonals); - - int l1 = strlen(seq1), l2 = strlen(seq2); - int x = *current_num_points; - - - int s1_length = strlen(seq1); - int mini; - for (i = 0; i < num_diagonals; ++i) - { - pos1 = diagonals[i*3]; - pos2 = diagonals[i*3+1]; - diag_length = diagonals[i*3+2]; - mini = MIN(pos1, pos2); - pos1 -= mini; - pos2 -= mini; - while ((pos1 < l1) && (pos2 < l2)) - { - if (x==*current_length) - { - *current_length+=1000; - list=vrealloc (list,(*current_length)*sizeof(int*)); - } - if (!list[x]) - list[x]=vcalloc (7, sizeof (int)); - list[x][0] = pos1+1; - list[x][1] = pos2+1; - list[x][2] = mat[toupper(seq1[pos1])-'A'][toupper(seq2[pos2])-'A']; - - ++x; - ++pos1; - ++pos2; - } - } - *current_num_points = x; - list_in[0]=list; -} - -void -guessalignment(Fastal_profile prf) -{ - -} - -int -fastal_compare (const void * a, const void * b) -{ - return (*(int*)a - *(int*)b); -} - -int ** -diagonals2int(int *diagonals, - int num_diagonals, - char *seq1, - char *seq2, - int *num_points, - Fastal_param *param_set) -{ - int l1 = strlen(seq1); - int l2 = strlen(seq2); - int gep = param_set->gep; - - int current_size = l2+l1; - - int **list = vcalloc(current_size, sizeof(int*)); - int *diags = vcalloc(num_diagonals, sizeof(int)); - int i; -// printf("SEQ: %s\nSEQ:%s\n",seq1, seq2); -// printf("X: %i\n",num_diagonals); - for (i = 0; i < num_diagonals; ++i) - { - diags[i] = l1 - diagonals[i*3] + diagonals[i*3+1]; - } - - qsort (diags, num_diagonals, sizeof(int), fastal_compare); - - int *diagx = vcalloc(num_diagonals, sizeof(int)); - int *diagy = vcalloc(num_diagonals, sizeof(int)); - int *old_pos = vcalloc(num_diagonals, sizeof(int)); - - //+1 because diagonals start here at position 1, like in "real" dynamic programming - int a = 0, b = -1; - for (i = 0; i < num_diagonals; ++i) - { - - if (diags[i] < l1) - { - diagx[i] = l1 - diags[i]; - diagy[i] = 0; - a= i; - } - else - break; - } - ++a; - b=a-1; - for (; i < num_diagonals; ++i) - { - diagx[i] = 0; - diagy[i] = diags[i]-l1; - b = i; - } - - int tmpy_pos; - int tmpy_value; - int **M = param_set->M; - int *last_y = vcalloc(l2+1, sizeof(int)); - int *last_x = vcalloc(l1+1, sizeof(int)); - last_y[0] = 0; - - last_x[0] = 0; - list[0] = vcalloc(6, sizeof(int)); - - int list_pos = 1; - int dig_num = l1; - int tmp_l2 = l2 + 1; - - //left border - for (; list_pos < tmp_l2; ++list_pos) - { - list[list_pos] = vcalloc(6, sizeof(int)); - list[list_pos][0] = 0; - list[list_pos][1] = list_pos; - last_y[list_pos] = list_pos; - list[list_pos][2] = list_pos*gep; - list[list_pos][4] = list_pos-1; - } - - int pos_x = 0; - int diags_old = l2; - - int tmp = l1; - int y; - int tmp_l1 = l1-1; - while (pos_x < tmp_l1) - { - if (list_pos + num_diagonals+2 > current_size) - { - current_size += num_diagonals*1000; - list = vrealloc(list, current_size * sizeof(int*)); - } - //upper border - list[list_pos] = vcalloc(6, sizeof(int)); - list[list_pos][0] = ++pos_x; - list[list_pos][1] = 0; - list[list_pos][2] = pos_x * gep; - list[list_pos][3] = last_y[0]; - tmpy_value = list_pos; - tmpy_pos = 0; - last_x[pos_x] = list_pos; - ++list_pos; - - //diagonals - for (i = a; i <= b; ++i) - { - list[list_pos] = vcalloc(6, sizeof(int)); - - list[list_pos][0] = ++diagx[i]; - - list[list_pos][1] = ++diagy[i]; - list[list_pos][3] = last_y[diagy[i]]; - list[list_pos][4] = list_pos-1; - list[list_pos][5] = last_y[diagy[i]-1]; - list[list_pos][2] = M[toupper(seq1[diagx[i]-1])-'A'][toupper(seq2[diagy[i]-1])-'A']; - last_y[tmpy_pos] = tmpy_value; - tmpy_value = list_pos; - tmpy_pos = diagy[i]; - - ++list_pos; - } - last_y[tmpy_pos] = tmpy_value; - - - //lower border - if (list[list_pos-1][1] != l2) - { - list[list_pos] = vcalloc(6, sizeof(int)); - list[list_pos][0] = pos_x; - list[list_pos][1] = l2; - list[list_pos][3] = last_y[l2]; - - list[list_pos][2] = -1000; - list[list_pos][4] = list_pos-1; - if (pos_x > l2) - list[list_pos][5] = last_x[pos_x-l2]; - else - list[list_pos][5] = l2-pos_x; - last_y[l2] = list_pos; - ++list_pos; - - } - - - if ((b >= 0) && (diagy[b] == l2)) - --b; - - if ((a >0) && (diagx[a-1] == pos_x)) - --a; - } - - - dig_num = -1; - if (list_pos + l2+2 > current_size) - { - current_size += list_pos + l2 + 2; - list = vrealloc(list, current_size * sizeof(int*)); - } - - -// right border - list[list_pos] = vcalloc(6, sizeof(int)); - list[list_pos][0] = l1; - list[list_pos][1] = 0; - list[list_pos][3] = last_x[l1-1]; - list[list_pos][2] = -1000; - ++list_pos; - - - - for (i = 1; i <= l2; ++i) - { - list[list_pos] = vcalloc(6, sizeof(int)); - list[list_pos][0] = l1; - list[list_pos][1] = i; - list[list_pos][3] = last_y[i]; - list[list_pos][4] = list_pos-1; - y = last_y[i-1]; - if ((list[y][0] == l1-1) && (list[y][1] == i-1)) - { - list[list_pos][5] = y; - list[list_pos][2] = M[toupper(seq1[l1-1])-'A'][toupper(seq2[i-1])-'A']; - } - else - { - if (i <= l1) - { - list[list_pos][5] = last_x[l1-i]; - } - else - { - list[list_pos][5] = i-l1; - } - list[list_pos][2] = -1000; - } - ++list_pos; - } - - list[list_pos - l2][2] = -1000; - - *num_points = list_pos; - - -// int blb; -// for (blb = 0; blb gep; - - int current_size = l2+l1; - int **list = vcalloc(current_size, sizeof(int*)); - int *diags = vcalloc(num_diagonals, sizeof(int)); - int i; - for (i = 0; i < num_diagonals; ++i) - { - diags[i] = l1 - diagonals[i*3] + diagonals[i*3+1]; - - } - - qsort (diags, num_diagonals, sizeof(int), fastal_compare); - - int *diagx = vcalloc(num_diagonals, sizeof(int)); - int *diagy = vcalloc(num_diagonals, sizeof(int)); - int *old_pos = vcalloc(num_diagonals, sizeof(int)); - - //+1 because diagonals start here at position 1, like in "real" dynamic programming - int a = 0, b = -1; - for (i = 0; i < num_diagonals; ++i) - { - - if (diags[i] < l1) - { - - diagx[i] = l1 - diags[i]; - diagy[i] = 0; - - a= i; - } - else - break; - } - ++a; - b=a-1; - for (; i < num_diagonals; ++i) - { - diagx[i] = 0; - diagy[i] = diags[i]-l1; - b = i; - - } - - int tmpy_pos; - int tmpy_value; - int **M = param_set->M; - - int *last_y = vcalloc(l2+1, sizeof(int)); - int *last_x = vcalloc(l1+1, sizeof(int)); - last_y[0] = 0; - - last_x[0] = 0; - list[0] = vcalloc(6, sizeof(int)); -// list[0][3] = l1; - int list_pos = 1; - int dig_num = l1; - int tmp_l2 = l2 + 1; - - //left border - for (; list_pos < tmp_l2; ++list_pos) - { - list[list_pos] = vcalloc(6, sizeof(int)); - list[list_pos][0] = 0; - list[list_pos][1] = list_pos; - last_y[list_pos] = list_pos; - list[list_pos][2] = list_pos*gep; - list[list_pos][3] = ++dig_num; - list[list_pos][5] = list_pos-1; - } - - int pos_x = 0; - int diags_old = l2; - - int bla; - int bla2, bla3, tmp_x; - - int tmp = l1; - int y; - int tmp_l1 = l1-1; - while (pos_x < tmp_l1) - { - if (list_pos + num_diagonals+2 > current_size) - { - current_size += num_diagonals*50; - list = vrealloc(list, current_size * sizeof(int*)); - } - //upper border - list[list_pos] = vcalloc(6, sizeof(int)); - list[list_pos][0] = ++pos_x; - list[list_pos][1] = 0; - list[list_pos][2] = pos_x * gep; - list[list_pos][3] = --tmp; - list[list_pos][4] = last_y[0]; - tmpy_value = list_pos; - tmpy_pos = 0; - last_x[pos_x] = list_pos; - ++list_pos; - - //diagonals - for (i = a; i <= b; ++i) - { - list[list_pos] = vcalloc(6, sizeof(int)); - list[list_pos][0] = ++diagx[i]; - list[list_pos][1] = ++diagy[i]; - list[list_pos][3] = diags[i]; - - list[list_pos][4] = last_y[diagy[i]]; - list[list_pos][5] = list_pos-1; - list[list_pos][6] = last_y[diagy[i]-1]; - - list[list_pos][2] = 0; - - bla3 = 0; - bla2 = 0; - tmp_x = 0; - for (bla = 0; bla<10; ++bla) - { - - for (bla2 = 0; bla2<10; ++bla2) - { - bla3 += prf2->prf[bla2][diagy[i]-1] * prf1->prf[bla][diagx[i]-1]; - tmp_x += prf2->prf[bla2][diagy[i]-1] * prf1->prf[bla][diagx[i]-1] * M[pos2char[bla]-'A'][pos2char[bla2] -'A']; - - } - } - list[list_pos][2] = (int)tmp_x / bla3; - -// for (bla = 0; bla<10; ++bla) -// bla2 += prf2->prf[bla][diagy[i]-1]; -// bla2 = bla2/prf2->num_sequences; -// -// for (bla = 0; bla<10; ++bla) -// bla3 += prf1->prf[bla][diagy[i]-1]; -// -// bla3 = bla3/prf1->num_sequences; -// -// -// if ((bla2 > 0.7) && (bla3 > 0.7)) -// list[list_pos][2] = M[toupper(seq1[diagx[i]-1])-'A'][toupper(seq2[diagy[i]-1])-'A']; -// else if ((bla< 0.7) && (bla3 < 0.7)) -// list[list_pos][2] = M[toupper(seq1[diagx[i]-1])-'A'][toupper(seq2[diagy[i]-1])-'A'] = 3; -// else -// list[list_pos][2] = M[toupper(seq1[diagx[i]-1])-'A'][toupper(seq2[diagy[i]-1])-'A'] * ((bla< 0.7) && (bla3 < 0.7)); -// list[list_pos][2] = M[toupper(seq1[diagx[i]-1])-'A'][toupper(seq2[diagy[i]-1])-'A'];//* ((bla2+bla3)/2); - last_y[tmpy_pos] = tmpy_value; - tmpy_value = list_pos; - tmpy_pos = diagy[i]; - - ++list_pos; - } - last_y[tmpy_pos] = tmpy_value; - - - //lower border - if (list[list_pos-1][1] != l2) - { - list[list_pos] = vcalloc(6, sizeof(int)); - list[list_pos][0] = pos_x; - list[list_pos][1] = l2; - list[list_pos][4] = last_y[l2]; - - list[list_pos][2] = -1000; - list[list_pos][3] = l1 - pos_x + l2; - list[list_pos][5] = list_pos-1; - if (pos_x > l2) - list[list_pos][6] = last_x[pos_x-l2]; - else - list[list_pos][6] = l2-pos_x; - last_y[l2] = list_pos; - ++list_pos; - } - - - if ((b >= 0) && (diagy[b] == l2)) - --b; - - if ((a >0) && (diagx[a-1] == pos_x)) - --a; - } - - - dig_num = -1; - if (list_pos + l2+2 > current_size) - { - current_size += list_pos + l2 + 2; - list = vrealloc(list, current_size * sizeof(int*)); - } - - -// right border - list[list_pos] = vcalloc(6, sizeof(int)); - list[list_pos][0] = l1; - list[list_pos][1] = 0; - list[list_pos][3] = ++dig_num; - list[list_pos][4] = last_x[l1-1]; - list[list_pos][2] = -1000; - ++list_pos; - - for (i = 1; i <= l2; ++i) - { - list[list_pos] = vcalloc(6, sizeof(int)); - list[list_pos][0] = l1; - list[list_pos][1] = i; - list[list_pos][3] = ++dig_num; - list[list_pos][4] = last_y[i]; - list[list_pos][5] = list_pos-1; - y = last_y[i-1]; - if ((list[y][0] == l1-1) && (list[y][1] == i-1)) - { - list[list_pos][6] = y; - list[list_pos][2] = M[toupper(seq1[l1-1])-'A'][toupper(seq2[i-1])-'A']; - } - else - { - if (i <= l1) - { - list[list_pos][6] = last_x[l1-i]; - } - else - { - list[list_pos][6] = i-l1; - } - list[list_pos][2] = -1000; - } - ++list_pos; - } - - list[list_pos - l2][2] = -1000; - - *num_points = list_pos; - - return list; -} - - - - -//************************** sparse dynamic aligning ********************************************************** - - -void -combine_profiles2file(int **prf1, - int **prf2, - int pos1, - int pos2, - Fastal_param *param_set, - FILE *prof_f, - char state) -{ - int alphabet_size = param_set->alphabet_size; - char *pos2aa = &(param_set->pos2char[0]); - int i; - int x = 0; - if (state == 'M') - { - for (i = 0; i < alphabet_size; ++i) - if (prf1[i][pos1] + prf2[i][pos2] > 0) - { - if (x) - fprintf(prof_f," %c%i", pos2aa[i],prf1[i][pos1]+prf2[i][pos2]); - else - fprintf(prof_f,"%c%i", pos2aa[i],prf1[i][pos1]+prf2[i][pos2]); - x = 1; - } - fprintf(prof_f,"\n"); - } - else if (state == 'D') - { - for (i = 0; i < alphabet_size; ++i) - if (prf2[i][pos2] > 0) - { - if (x) - fprintf(prof_f," %c%i", pos2aa[i],prf2[i][pos2]); - else - fprintf(prof_f,"%c%i", pos2aa[i],prf2[i][pos2]); - x = 1; - } - fprintf(prof_f,"\n"); - } - else - { - for (i = 0; i < alphabet_size; ++i) - if (prf1[i][pos1] > 0) - { - if (x) - fprintf(prof_f," %c%i", pos2aa[i],prf1[i][pos1]); - else - fprintf(prof_f,"%c%i", pos2aa[i],prf1[i][pos1]); - x = 1; - } - fprintf(prof_f,"\n"); - } -} - - - -#define LIN(a,b,c) a[b*5+c] -/** - * Calculates a fast and sparse dynamic programming matrix - * - * \param prf1 Profile of first sequence. - * \param prf2 Profile of second sequence. - * \param param_set The parameter for the alignment. - * \param list The list of diagonals. - * \param n number of dots. - * \param edit_f File to save the edit information. - * \param prof_f File to save the profile. - * \param node_number Number of the new profile. - */ -int -list2linked_pair_wise_fastal(Fastal_profile *prf1, - Fastal_profile *prf2, - Fastal_param *param_set, - int **list, - int n, - FILE *edit_f, - FILE *prof_f, - int node_number) -{ - int a,b,c, i, j, LEN=0, start_trace; - int pi, pj,ij, delta_i, delta_j, prev_i, prev_j; - static int **slist; - static long *MI, *MJ, *MM,*MT2; - static int *sortseq; - static int max_size; - int gop, gep, igop, igep; - int l1, l2, l, ls; - char **al; - char **aln,*char_buf; - int ni=0, nj=0; - long score; - int nomatch = param_set->nomatch; - - l1=prf1->length; - l2=prf2->length; - - al=declare_char (2,l1+l2+1); - - - - igop=param_set->gop; - gep=igep=param_set->gep; - if (n>max_size) - { - max_size=n; - - vfree (MI);vfree (MJ); vfree (MM); - free_int (slist, -1); - - slist=declare_int (n,3); - - MI=vcalloc (5*n, sizeof (long)); - MJ=vcalloc (5*n, sizeof (long)); - MM=vcalloc (5*n, sizeof (long)); - - } - else - { - for (a=0; a1 && (ls=list[a][0]-list[ij][0])==(list[a][1]-list[ij][1])) - { - LIN(MM,a,0)=MAX3(LIN(MM,ij,0),LIN(MI,ij,0),LIN(MJ,ij,0))+list[a][2]-(ls*nomatch); - - LIN(MM,a,1)=ij; - LIN(MM,a,2)=ls; - LIN(MM,a,3)=ls; - if ( LIN(MM,ij,0)>=LIN(MI,ij,0) && LIN(MM,ij,0)>=LIN(MJ,ij,0))LIN(MM,a,4)='m'; - else if ( LIN(MI,ij,0) >= LIN(MJ,ij,0))LIN(MM,a,4)='i'; - else LIN(MM,a,4)='j'; - - } - else - { - LIN(MM,a,0)=UNDEFINED; - LIN(MM,a,1)=-1; - } - } - - a=start_trace; - if (LIN(MM,a,0)>=LIN(MI,a,0) && LIN(MM,a,0) >=LIN(MJ,a,0))MT2=MM; - else if ( LIN(MI,a,0)>=LIN(MJ,a,0))MT2=MI; - else MT2=MJ; - - score=MAX3(LIN(MM,a,0), LIN(MI,a,0), LIN(MJ,a,0)); - - i=l1; - j=l2; - - - while (!(i==0 &&j==0)) - { - int next_a; - l=MAX(LIN(MT2,a,2),LIN(MT2,a,3)); - // HERE ("%c from %c %d %d SCORE=%d [%d %d] [%2d %2d]", T2[a][5],T2[a][4], T2[a][2], T2[a][3], T2[a][0], gop, gep, i, j); - if (i==0) - { - while ( j>0) - { - al[0][LEN]=0; - al[1][LEN]=1; - j--; LEN++; - } - } - else if (j==0) - { - while ( i>0) - { - al[0][LEN]=1; - al[1][LEN]=0; - i--; LEN++; - } - } - - else if (l==0) {HERE ("L=0 i=%d j=%d",l, i, j);exit (0);} - else - { - for (b=0; bprf_number, prf2->prf_number, prf1->is_leaf, prf2->is_leaf); - fprintf(prof_f, "%i\n0\n%i\n1\n", node_number,LEN); - - char statec[] = {'M','D','I'}; - int num = 0; - int state = 0; - i = 0; - j = 0; - - for ( b=0; b< LEN; b++) - { - if ((al[0][b]==1) && (al[1][b]==1)) - { - - combine_profiles2file(prf1->prf, prf2->prf, i, j, param_set, prof_f, 'M'); - ++i; - ++j; - if (state != 0) - { - fprintf(edit_f, "%c%i\n",statec[state], num); - num =1; - state = 0; - } - else - ++num; - } - else if (al[0][b]==1) - { -// prf1->prf[param_set->alphabet_size-1] += prf2->num_sequences; - combine_profiles2file(prf1->prf, prf2->prf, i, j, param_set, prof_f, 'I'); - ++i; - if (state != 2) - { - fprintf(edit_f, "%c%i\n",statec[state], num); - num =1; - state = 2; - } - else - ++num; - } - else if (al[1][b]==1) - { -// prf2->prf[param_set->alphabet_size-1] += prf1->num_sequences; - combine_profiles2file(prf1->prf, prf2->prf, i, j, param_set, prof_f, 'D'); - ++j; - if (state != 1) - { - fprintf(edit_f, "%c%i\n",statec[state], num); - num =1; - state = 1; - } - else - ++num; - } - } - - - fprintf(edit_f, "%c%i\n",statec[state], num); - num =1; - state = 1; - - - fprintf(edit_f,"*\n"); - fprintf(prof_f,"*\n"); - free_char (al, -1); -// exit(0); - return LEN; -} - - - - - - -/** - * \brief Tuns a profile into a consensus sequence. - * - * The character with the highest number of occurences is used as consensus. Gaps are not included. For example: 10 '-' and one 'A' would give 'A' as consensus. - * \param profile The profile. - * \param file_name Name of the file to save the consensus sequence in. - * \param param_set The parameter of the fastal algorithm. - * \return the sequence - */ -char* -profile2consensus(Fastal_profile *profile, char *file_name, Fastal_param *param_set) -{ - FILE *cons_f = fopen(file_name,"w"); - fprintf(cons_f, ">%i\n", profile->prf_number); - char* seq = vcalloc(profile->length+1, sizeof(char)); - int i, j; - int most_pos, most; - int alphabet_size = param_set->alphabet_size; - int **prf = profile->prf; - char *pos2char = param_set->pos2char; - for (i = 0; i < profile->length; ++i) - { - most = -1; - for (j = 0; j < alphabet_size; ++j) - { - if (prf[j][i] > most) - { - most = prf[j][i]; - most_pos = j; - } - } - seq[i] = pos2char[most_pos]; - fprintf(cons_f, "%c",pos2char[most_pos]); - } - seq[i] = '\0'; - fprintf( cons_f, "\n"); - fclose(cons_f); - return seq; -} - - - - -/** - * \brief Calculates the diagonals between two sequences. - * - * Uses bl2seq to calculate the diagonals. - * \param seq_file1 File with sequence 1. - * \param seq_file2 File with sequence 2. - * \param diagonals An array where the diagonal points will be stored. - * \param dig_length length of \a diagonals . - * \param num_points Number of points in all diagonals. - * \return number of diagonals; - */ -int -seq_pair2blast_diagonal(char *seq_file_name1, - char *seq_file_name2, - int **diagonals, - int *dig_length, - int l1, - int l2, - int is_dna) -{ - int *diag = vcalloc(l1 + l2, sizeof(int)); - char *out_file = vtmpnam(NULL); - char blast_command[600]; - - if (is_dna) - sprintf(blast_command, "bl2seq -p blastn -i %s -j %s -D 1 -g F -o %s", seq_file_name1, seq_file_name2, out_file); - else - sprintf(blast_command, "bl2seq -p blastp -i %s -j %s -D 1 -g F -o %s", seq_file_name1, seq_file_name2, out_file); - system(blast_command); - - int *diags = diagonals[0]; - FILE *diag_f = fopen(out_file,"r"); - char line[300]; - fgets(line, 300, diag_f); - fgets(line, 300, diag_f); - fgets(line, 300, diag_f); - - - char delims[] = "\t"; - char *result = NULL; - int length, pos_q, pos_d, i; - int current_pos = 0; - while (fgets(line, 300, diag_f) != NULL) - { - strtok(line, delims); - strtok(NULL, delims); - strtok(NULL, delims); - length = atoi(strtok(NULL, delims)); - strtok(NULL, delims); - strtok(NULL, delims); - pos_q = atoi(strtok(NULL, delims))-1; - strtok(NULL, delims); - pos_d = atoi(strtok(NULL, delims))-1; - - if (current_pos >= *dig_length) - { - (*dig_length) += 90; - diags = vrealloc(diags, sizeof(int)*(*dig_length)); - } - if (diag[l1-pos_q+pos_d] == 0) - { - diag[l1-pos_q+pos_d] =1; - diags[current_pos++] = pos_q; - diags[current_pos++] = pos_d; - diags[current_pos++] = length; - } - } - vfree(diag); - fclose(diag_f); - diagonals[0] = diags; - return current_pos/3; -} - - - - -//******************************* OTHER STUFF *********************** - -/** - * \brief Reads the sequence from a given position in a fasta file and turns it into a profile. - * - * \param seq_file The file where the sequence is stored. - * \param off_set The off_set from the beginning of the file to the position of the sequence name. - * \param profile The profile where the sequence will be stored into. - * \param prf_number The number of this profile. - */ -void -file_pos2profile(FILE *seq_file, //File with sequences - long off_set, //offset of sequence from the beginning of file point to the sequence name, not to the sequence itself - Fastal_profile *profile, //profile to save into - int prf_number, //number of the profile - Fastal_param *param_set) -{ - int alphabet_size = param_set->alphabet_size; - profile->is_leaf = 1; - int *aa2pos = &(param_set->char2pos[0]); - const int LINE_LENGTH = 500; - char line[LINE_LENGTH]; - profile->num_sequences = 1; - profile->prf_number = prf_number; - fseek (seq_file , off_set , SEEK_SET ); - - fgets (line, LINE_LENGTH , seq_file); - int seq_length = 0; - int i, j; - - while(fgets(line, LINE_LENGTH, seq_file)!=NULL) - { - if (line[0] != '>') - { - - line[LINE_LENGTH-1] = '\n'; - if (seq_length + LINE_LENGTH >= profile->allocated_memory) - { - for (i = 0; i < alphabet_size; ++i) - { - profile->prf[i] = vrealloc(profile->prf[i], (profile->allocated_memory+PROFILE_ENLARGEMENT)*sizeof(int)); - } - profile->allocated_memory += PROFILE_ENLARGEMENT; - } - - i = 0; - while (line[i] != '\n') - { - for(j = 0; jprf[j][seq_length+i] = 0; - profile->prf[aa2pos[toupper(line[i])-'A']][seq_length+i] = 1; - ++i; - } - seq_length += i; - - } - else - break; - } - profile->length = seq_length; -} - - - -/** -* constructs index of fasta_file -*/ -int -make_index_of_file(char *file_name, //file with sequences - long **file_positions) //array to save the positions -{ - const int LINE_LENGTH = 150; - (*file_positions) = vcalloc(ENLARGEMENT_PER_STEP, sizeof(long)); - - int current_size = ENLARGEMENT_PER_STEP; - int current_pos = 0; - - FILE *file = fopen(file_name,"r"); - - char *sequence = vcalloc(3*LINE_LENGTH,sizeof(char)); - int seq_length=0; - int allocated_length=3*LINE_LENGTH; - char line[LINE_LENGTH]; - - int num_of_sequences = 0; - int mem_for_pos = ENLARGEMENT_PER_STEP; - - if (file == NULL) - { - printf("FILE NOT FOUND\n"); - exit(1); - } - else - { - (*file_positions)[num_of_sequences] = ftell(file); - while(fgets(line, LINE_LENGTH , file)!=NULL) - { - int length = strlen(line); - if (line[0] == '>') - { - ++num_of_sequences; - - if (num_of_sequences == mem_for_pos) - { - (*file_positions) = vrealloc((*file_positions),(ENLARGEMENT_PER_STEP+mem_for_pos) * sizeof(long)); - mem_for_pos += ENLARGEMENT_PER_STEP; - } - } - (*file_positions)[num_of_sequences] = ftell(file); - } - } - fclose(file); - return num_of_sequences; -} - - -/** -* reads a profile from a profile_file -*/ -profile_file2profile(Fastal_profile *prof, //structure to save the profile in - FILE *profile_f, //file where the profile is stored - long position, //position in profile_f where the profile is stored - Fastal_param *param_set) -{ - - int alphabet_size = param_set->alphabet_size; - - int *aa2pos = &(param_set->char2pos[0]); - - - fseek(profile_f,position,SEEK_SET); - const int LINE_LENGTH = 500; - char line[500]; - - fgets(line, LINE_LENGTH, profile_f); - - prof->prf_number = atoi(line); -// fgets(line, LINE_LENGTH, profile_f); -// prof->num_sequences = atoi(line); -// fgets(line, LINE_LENGTH, profile_f); //is-dna is already known - fgets(line, LINE_LENGTH, profile_f); - prof->is_leaf = atoi(line); - - fgets(line, LINE_LENGTH, profile_f); - prof->length = atoi(line); - fgets(line, LINE_LENGTH, profile_f); - prof->weight = atoi(line); - int i,j; - if (prof->length > prof->allocated_memory) - for (i = 0;i < alphabet_size; ++i) - { - prof->prf[i] = vrealloc(prof->prf[i],prof->length*sizeof(int)); - } - - char delims[] = " "; - char *result = NULL; - char *result_num = NULL; - - int length = prof->length; - - for (i = 0; i < length; ++i) - { - for(j = 0; jprf[j][i] = 0; - fgets(line, LINE_LENGTH , profile_f); - result = strtok( line, delims ); - - while( result != NULL) - { - result_num = &result[1]; - prof->prf[aa2pos[result[0]-'A']][i] = atoi(result_num); - result = strtok( NULL, delims ); - } - } -} - - - -/** -* writes a profile into a file -*/ -void -profile2file(Fastal_profile *profile, //the profile to save - FILE* file, //file to save in - Fastal_param *param_set) -{ - int alphabet_size = param_set->alphabet_size; - - char *pos2aa = &(param_set->pos2char[0]); - - fseek(file,0,SEEK_SET); - - fprintf(file,"%i\n", profile->prf_number); -// fprintf(file,"%i\n", profile->num_sequences); - - fprintf(file,"%i\n", profile->is_leaf); - fprintf(file,"%i\n", profile->length); - fprintf(file,"%i\n", profile->weight); - int i = 0, j = 0; - int max = profile->length; - int x= 0; - --alphabet_size; - while (i < max) - { - for (j = 0; j < alphabet_size; ++j) - if (profile->prf[j][i] > 0) - { - if (x) - fprintf(file," %c%i", pos2aa[j],profile->prf[j][i]); - else - fprintf(file,"%c%i", pos2aa[j],profile->prf[j][i]); - x = 1; - } - if (profile->prf[j][i] > 0) - if (x) - fprintf(file," %c%i", pos2aa[j],profile->prf[j][i]); - else - fprintf(file,"%c%i", pos2aa[j],profile->prf[j][i]); - x = 1; - x = 0; - fprintf(file,"\n"); - ++i; - } - fprintf(file,"*\n"); -} - - - -/** -* Reads the profile out of an alignment -*/ -void -file2profile(FILE* profile_f, //file to read the profile of - Fastal_profile *prof, //profile saved in here - int prf_number, //number of the profile - Fastal_param *param_set) -{ - int alphabet_size = param_set->alphabet_size; - - int *aa2pos = &(param_set->char2pos[0]); - - - fseek(profile_f,0,SEEK_SET); - const int LINE_LENGTH = 500; - char line[500]; - - fgets(line, LINE_LENGTH, profile_f); - prof->prf_number = atoi(line); -// fgets(line, LINE_LENGTH, profile_f); //is-dna is already known - fgets(line, LINE_LENGTH, profile_f); - prof->is_leaf = atoi(line); - - fgets(line, LINE_LENGTH, profile_f); - prof->length = atoi(line); - - fgets(line, LINE_LENGTH, profile_f); - prof->weight = atoi(line); - int i,j; - if (prof->length > prof->allocated_memory) - for (i = 0;i < alphabet_size; ++i) - { - prof->prf[i] = vrealloc(prof->prf[i],prof->length*sizeof(int)); - } - - char delims[] = " "; - char *result = NULL; - char *result_num = NULL; - - int length = prof->length; - - for (i = 0; i < length; ++i) - { - for(j = 0; jprf[j][i] = 0; - fgets(line, LINE_LENGTH , profile_f); - result = strtok( line, delims ); - - while( result != NULL) - { - result_num = &result[1]; - prof->prf[aa2pos[result[0]-'A']][i] = atoi(result_num); - result = strtok( NULL, delims ); - } - } -} - - - -/** -* This method takes a profile and turns it into a sumed up version of same size. -*/ -int** -sumup_profile(Fastal_profile *profile, //profile to sum-up - int **sumup, - Fastal_param *param_set) //summed_up_profile -{ - - char *pos2aa = &(param_set->pos2char[0]); - int alphabet_size = param_set->alphabet_size; - int **M = param_set->M; - int prof_length = profile->length; - - int i,j,k; - - for (i = 0; i < prof_length; ++i) - { - sumup[alphabet_size][i] = 0; - for (k = 0; k < alphabet_size; ++k) - { - sumup[k][i] = 0; - sumup[alphabet_size][i] += profile->prf[k][i]; - for (j = 0; j < alphabet_size; ++j) - { - sumup[k][i] += profile->weight * profile->prf[j][i] * M[pos2aa[j]-'A'][pos2aa[k]-'A']; - } - } - } - - return sumup; -} - - - -/** -* Turns the dynamic programming matrix into a editfile and calculates the new profile -*/ -int -nw_matrix2edit_file(double **prog_matrix, //dynamic programming matrix - Fastal_profile *prf1, //profile of dim1 - Fastal_profile *prf2, //profile of dim2 - FILE *edit_f, //file to safe the edit in - int **prf_field, //space to safe the new profile - int *field_length, - Fastal_param *param_set) //length of prf_field -{ - int **M = param_set->M; - int alphabet_size = param_set->alphabet_size; - double gap_cost = param_set -> gop; - fprintf(edit_f, "%i\n%i\n%i\n%i\n",prf1->prf_number, prf2->prf_number, prf1->is_leaf, prf2->is_leaf); - int sum[] = {0,0,0}; - char sumc[] = {'M','I','D'}; - int last = 0; - int n = 0; - int m = 0; - int field_pos = 0; - int i; - int prf1_length = prf1->length; - int prf2_length = prf2->length; - while ((n < prf1_length) && (m < prf2_length)) - { - //if necesarry allocate more memory for result - if ((*field_length)-alphabet_size < field_pos) - { - (*field_length) += ENLARGEMENT_PER_STEP; - - for (i = 0; i prf[i][n]; - } - ++n; - ++ field_pos; - - if (last != 1) - { - fprintf(edit_f,"%c%i\n",sumc[last],sum[last]); - sum[last] = 0; - } - last = 1; - ++sum[last]; - } - else if (prog_matrix[n][m] == (prog_matrix[n][m+1] +gap_cost)) - { - - for (i = 0; iprf[i][m]; - } - ++m; - ++ field_pos; - if (last != 2) - { - fprintf(edit_f,"%c%i\n",sumc[last],sum[last]); - sum[last] = 0; - } - last = 2; - ++sum[last]; - } - else - { - for (i = 0; iprf[i][n] + prf2->prf[i][m]; - } - ++n; - ++m; - ++ field_pos; - if (last != 0) - { - fprintf(edit_f,"%c%i\n",sumc[last],sum[last]); - sum[last] = 0; - } - last = 0; - ++sum[last]; - } - } - fprintf(edit_f,"%c%i\n",sumc[last],sum[last]); - - //gaps in prf2 - last = 0; - while (n < prf1_length) - { - for (i = 0; iprf[i][n]; - } - ++n; - ++ field_pos; - ++last; - } - if (last > 0) - fprintf(edit_f,"I%i\n",last); - - //gaps in prf1 - last = 0; - while (m < prf2_length) - { - for (i = 0; iprf[i][m]; - } - ++m; - ++ field_pos; - ++last; - } - if (last > 0) - fprintf(edit_f,"D%i\n",last); - fprintf(edit_f,"*\n"); - return field_pos; -} - - - - -/** - * \brief Pairwise alignments of profile is done here. - * - * \param profile1 Profile of sequence 1 - * \param profile2 Profile of sequence 2 - * \param prog_matrix Matrix for dynamic programming - * \param edit_file_name The edit_file_name - * \param sumup_prf The sumup version of profile 1, which later contains the aligned profile. - * \param sumup_length Contains the length of the aligned profile. - * \return length of the aligned profile - */ -int -prf_nw(Fastal_profile *profile1, //profile of sequence 1 - Fastal_profile *profile2, //profile of sequence 2 - double **prog_matrix, //matrix for dynamic programming (at least as long as necessary for alignment) - FILE *edit_file_name, //name of edit file - int **sumup_prf, //sum_up - int *sumup_length, - Fastal_param *param_set) //sum_up length -{ - int alphabet_size = param_set->alphabet_size; - double gap_cost = param_set->gop; - - int i; - if (*sumup_length < profile1->length) - { - for (i = 0; i < alphabet_size+1; ++i) - { - sumup_prf[i] = vrealloc(sumup_prf[i], profile1->length*sizeof(int)); - } - *sumup_length = profile1->length; - } - sumup_prf = sumup_profile(profile1, sumup_prf, param_set); - - - - int j,k; - int prof1_length = profile1->length; - int prof2_length = profile2->length; - - int** M = param_set->M; - double match_score; - int amino_counter; - int residue_pairs = 0; - - for (i = prof2_length; i > 0; --i) - { - prog_matrix[prof1_length][i] = gap_cost * (prof2_length-i); - } - - i = prof1_length-1; - prog_matrix[prof1_length][prof2_length] = 0.0; - while (i >=0) - { - j = prof2_length-1; - - prog_matrix[i][prof2_length] = gap_cost*(prof1_length-i); - while (j >=0) - { - match_score = 0.0; - residue_pairs = 0; - for (k = 0; k < alphabet_size; ++k) - { - residue_pairs += profile2->prf[k][j]; - match_score += (profile2->prf[k][j] * sumup_prf[k][i]); - } - match_score /= (residue_pairs * sumup_prf[alphabet_size][i]); - prog_matrix[i][j] = MAX3(prog_matrix[i+1][j+1]+match_score, prog_matrix[i+1][j]+gap_cost, prog_matrix[i][j+1]+gap_cost); - - --j; - } - --i; - } - return nw_matrix2edit_file(prog_matrix, profile1, profile2, edit_file_name, sumup_prf, sumup_length, param_set); -} - - - -/** - * \brief Writes the sequence into the alignment_file. - * - * \param aligned_sequence Pattern of aligned sequence. - * \param sequence_file File with sequences. - * \param sequence_position Positions of sequences in \a sequence_file. - * \param alignment_file The file to write the sequence into. - * -*/ -void -edit_seq2aligned_seq(char *aligned_sequence, //pattern for aligned sequence - FILE *sequence_file, //file with all the sequences - long sequence_position, //position in sequence file with the correct sequence - FILE *alignment_file) //file to write the alignment into -{ - fseek(sequence_file, sequence_position, SEEK_SET); - const int LINE_LENGTH = 300; - char line[LINE_LENGTH]; - fgets (line, LINE_LENGTH , sequence_file); - fprintf(alignment_file,"%s", line); //writing of sequence name - int pos = 0; - int i = 0; - while(fgets(line, LINE_LENGTH, sequence_file)!=NULL) - { - if (line[0] != '>') - { - line[LINE_LENGTH-1] = '\n'; - i = 0; - while (line[i] != '\n') - { - while (aligned_sequence[pos] == '-') - { - fprintf(alignment_file,"-"); - ++pos; - } - fprintf(alignment_file,"%c",line[i]); - ++i; - ++pos; - } - } - else - break; - } - while (aligned_sequence[pos] != '\n') - { - fprintf(alignment_file,"-"); - ++pos; - } - fprintf(alignment_file,"\n"); -} - - - -/** - * \brief Recursive function to turn the edit_file into the alignment. - * - * \param sequence_file File with all sequences. - * \param sequence_position The array of sequence positions in \a sequence_file - * \param edit_file File to safe the edit profiles in. - * \param edit_positions Array saving the coorespondence between edit profile and position in \a edit_file - * \param node_number The current node. - * \param number_of_sequences The number of sequences. - * \param aligned_sequence The sequence that is edited. - * \param alignment_length The length of the alignment. - * \param edit_seq_file File that saves the edited_sequences of the internal nodes. - * \param offset Saves the size of the edited_sequences. - * \param alignment_file File where the alignment is saved. - * - */ -void -edit2alignment(FILE *sequence_file, //sequence file - long *seq_positions, //sequence positions - FILE *edit_file, //file saving the edit profiles - long *edit_positions, //array saving the correspondence between edit profile and position in edit_file - int node_number, //the current node - int number_of_sequences, //number of sequences - char *aligned_sequence, //the sequence that is edited - int alignment_length, //length of the alignment - and thus of aligned_sequence - FILE *edit_seq_file, //file saving the edited_sequences of the internal nodes - int offset, //saves the size of the edited_sequence - FILE* alignment_file) //file saving the alignments -{ - fseek(edit_file, edit_positions[node_number-number_of_sequences], SEEK_SET); - const LINE_LENGTH = 50; - char line[LINE_LENGTH]; - fgets(line, LINE_LENGTH , edit_file); - int child1 = atoi(line); - fgets(line, LINE_LENGTH , edit_file); - int child2 = atoi(line); - fgets(line, LINE_LENGTH , edit_file); - int is_leaf1 = atoi(line); - fgets(line, LINE_LENGTH , edit_file); - int is_leaf2 = atoi(line); - - static char seq_line[10]; - - char x; - int number; - int pos = 0; - - //first child - while(fgets(line, LINE_LENGTH , edit_file)!=NULL) - { - x = line[0]; - if (x == '*') - break; - number = atoi(&line[1]); - if (x == 'M') - { - while (number > 0) - { - if (aligned_sequence[pos] == 'X') - --number; - ++pos; - } - } - else if (x == 'I') - { - while (number > 0) - { - if (aligned_sequence[pos] == 'X') - --number; - ++pos; - } - } - else if (x == 'D') - { - while (number > 0) - { - if (aligned_sequence[pos] == 'X') - { - aligned_sequence[pos] = '-'; - --number; - } - ++pos; - } - } - } - - if (is_leaf1) - { - edit_seq2aligned_seq(aligned_sequence, sequence_file, seq_positions[child1], alignment_file); - } - else - { - fprintf(edit_seq_file, "%s", aligned_sequence); - edit2alignment(sequence_file, seq_positions, edit_file, edit_positions, child1, number_of_sequences, aligned_sequence, alignment_length, edit_seq_file, offset, alignment_file); - } - - //second child - fseek(edit_seq_file, offset, SEEK_CUR); - fgets(aligned_sequence, alignment_length+3, edit_seq_file); - fseek(edit_seq_file, offset, SEEK_CUR); - - pos = 0; - fseek(edit_file, edit_positions[node_number-number_of_sequences], SEEK_SET); - while(fgets(line, LINE_LENGTH , edit_file)!=NULL) - { - x = line[0]; - if (x == '*') - break; - number = atoi(&line[1]); - if (x == 'M') - { - while (number > 0) - { - if (aligned_sequence[pos] == 'X') - --number; - ++pos; - } - } - else if (x == 'I') - { - while (number > 0) - { - if (aligned_sequence[pos] == 'X') - { - aligned_sequence[pos] = '-'; - --number; - } - ++pos; - } - } - else if (x == 'D') - { - while (number > 0) - { - if (aligned_sequence[pos] == 'X') - --number; - ++pos; - } - } - } - - if (is_leaf2) - { - edit_seq2aligned_seq(aligned_sequence, sequence_file, seq_positions[child2], alignment_file); - } - else - { - fprintf(edit_seq_file, "%s", aligned_sequence); - edit2alignment(sequence_file, seq_positions, edit_file, edit_positions, child2, number_of_sequences, aligned_sequence, alignment_length, edit_seq_file, offset, alignment_file); - } -} - - - - -// * The file has the follwing format (# and text behind are only comments and not included into the file): -// * 1 # Number of profile. -// * 1 # is DNA or not. -// * 5 # Number of columns in the profile. -// * 4A 1C # In this column are 4 'A' and 1 'C' -// * 3G # In this column are 3 'G' -// * 5A # In this column are 5 'A' -// * 2A 3C # In this column are 2 'A' and 3 'C' -// * 5C # In this column are 5 'C' -// * * # Marks the end of this profile - - - -/** - * \brief Writes a profile to a file. - * - * \param sumup_prf The profile array, not a real profile. - * \param length The length of the profile. - * \param file The FILE object to write the the profile into. - * \param is_dna The type of sequence. - * \param number The number of the profile. - */ -void -write2file(int **sumup_prf, - int length, - FILE *file, - int number, - Fastal_param *param_set) -{ - char *pos2aa = &(param_set->pos2char[0]); - fprintf(file,"%i\n0\n%i\n1\n",number, length ); - int i, j; - int alphabet_size = param_set->alphabet_size; - - i = 0; - int x = 0; - while (i < length) - { - for (j = 0; j < alphabet_size; ++j) - if (sumup_prf[j][i] > 0) - { - if (x) - fprintf(file," %c%i", pos2aa[j],sumup_prf[j][i]); - else - fprintf(file,"%c%i", pos2aa[j],sumup_prf[j][i]); - x = 1; - } -// x = 1; - x = 0; - fprintf(file,"\n"); - ++i; - } - fprintf(file,"*\n"); -} - - - - - - - - - -/** -* main of the fastal algorithm -*/ -int -fastal(int argc, //number of arguments - char **argv) //arguments first = fastal, second = tree -{ - - int test; - for (test = 0; test < argc; ++test) - { - printf("%s\n",argv[test]); - } - - struct fastal_arguments arguments; - - arguments.output_file = "out.aln"; - arguments.tree_file = NULL; - arguments.gep = -1; - arguments.gop = -10; - arguments.method = "fast"; - -// argp_parse (&argp, argc, argv, 0, 0, &arguments); - - Fastal_param *param_set = vcalloc(1,sizeof(Fastal_param)); - fill_parameters(arguments.is_dna, param_set, arguments.method); - param_set->gep = arguments.gep; - param_set->gop = arguments.gop; - - - int alphabet_size = param_set->alphabet_size; - - //sequence file management - char **seq_name; - long *file_positions = NULL; - long **tmp = &file_positions; - int number_of_sequences = make_index_of_file(arguments.sequence_file, tmp); - FILE *seq_file = fopen(arguments.sequence_file,"r"); - - - //edit file management - FILE *edit_file = fopen("edit_tmp","w+"); - long current_edit_pos; - long *edit_positions = vcalloc(number_of_sequences,sizeof(long)); - - - //profile management - Fastal_profile **profiles = vcalloc(3,sizeof(Fastal_profile*)); - initiate_profiles(profiles, param_set); - FILE * prof_file = fopen("prf_tmp","w+"); - long* profile_positions = vcalloc(4,sizeof(long*)); - int max_prof = 4; - int saved_prof = 0; - - - //dynamic programming matrix - double ** dyn_matrix = vcalloc(1,sizeof(double*)); - dyn_matrix[0] = vcalloc(1,sizeof(double)); - int *length1 = vcalloc(1,sizeof(int)); - int *length2 = vcalloc(1,sizeof(int)); - *length1 = 1; - *length2 = 2; - int i; - int **sumup_prf = vcalloc(alphabet_size+1,sizeof(int*)); - for (i = 0; i < alphabet_size+1; ++i) - sumup_prf[i] = vcalloc(1,sizeof(int)); - int *sumup_length = vcalloc(1,sizeof(int)); - *sumup_length = 1; - - - - if (arguments.tree_file == NULL) - { - arguments.tree_file = "HUMAN.tree"; - printf("CONSTRUCT TREE\n"); - make_partTree(arguments.sequence_file, arguments.tree_file, 4, 20); - } - - - printf("CONSTRUCT ALIGNMENT\n"); - //tree file management - FILE *tree_file = fopen(arguments.tree_file,"r"); - const int LINE_LENGTH = 100; - char line[LINE_LENGTH]; - char delims[] = " "; - int node[3]; - char *result = NULL; - int j; - int alignment_length; - - - //memory for sparse dynamic - int *diagonals = vcalloc(3,sizeof(int)); - int *dig_length = vcalloc(1,sizeof(int)); - *dig_length = 3; - int **list = NULL;//vcalloc(1,sizeof(int*)); -// list[0] = vcalloc(7,sizeof(int)); - int *list_length = vcalloc(1,sizeof(int)); - - *list_length = 0; - int ***list_p = vcalloc(1,sizeof(int**)); - - - - //bottom-up traversal - while(fgets(line, LINE_LENGTH, tree_file)!=NULL) - { - //read profiles - node[0] = atoi(strtok(line,delims)); - node[1] = atoi(strtok(NULL,delims)); - node[2] = atoi(strtok(NULL,delims)); - //getting profile of second child - if (node[1] < number_of_sequences) - { - file_pos2profile(seq_file, file_positions[node[1]], profiles[1], node[1], param_set); //profile to save into - } - else - { - profile_file2profile(profiles[1], prof_file, profile_positions[--saved_prof], param_set); - fseek (prof_file , profile_positions[saved_prof] , SEEK_SET); - } - - //getting profile of first child - if (node[0] < number_of_sequences) - { - file_pos2profile(seq_file, file_positions[node[0]], profiles[0], node[0], param_set); //profile to save into - } - else - { - profile_file2profile(profiles[0], prof_file, profile_positions[--saved_prof], param_set); - fseek (prof_file , profile_positions[saved_prof] , SEEK_SET); - } - if (saved_prof == max_prof) - { - max_prof += 5; - profile_positions = vrealloc(profile_positions, max_prof*sizeof(long)); - } - edit_positions[node[2]-number_of_sequences] = ftell(edit_file); - profile_positions[saved_prof] = ftell(prof_file); - ++saved_prof; - if (!strcmp(param_set->method,"nw")) - { - dyn_matrix = resize_dyn_matrix(dyn_matrix, length1, length2, profiles[0]->length+1, profiles[1]->length+1); - alignment_length = prf_nw(profiles[0], profiles[1], dyn_matrix, edit_file, sumup_prf, sumup_length, param_set); - write2file(sumup_prf, alignment_length, prof_file, node[2], param_set); - } - else if (!strcmp(param_set->method, "fast")) - { - char *file_name1 = vtmpnam(NULL); - char *file_name2 = vtmpnam(NULL); - char *seq1 = profile2consensus(profiles[0], file_name1, param_set); - char *seq2 = profile2consensus(profiles[1], file_name2, param_set); - int **diagonals_p = &diagonals; - int num_diagonals = seq_pair2blast_diagonal(file_name1, file_name2, diagonals_p, dig_length, strlen(seq1),strlen(seq2), arguments.is_dna); - diagonals = diagonals_p[0]; - char *p = ¶m_set->pos2char[0]; - list = diagonals2int(diagonals, num_diagonals, seq1, seq2, list_length, param_set);//, profiles[0], profiles[1], p); - alignment_length = list2linked_pair_wise_fastal(profiles[0], profiles[1], param_set, list, *list_length, edit_file, prof_file, node[2]); - int x; - - for (x = 0; x < *list_length; ++x) - { - vfree(list[x]); - } - vfree(list); - list = NULL; - vfree(seq1); - vfree(seq2); - } - } - - //free_memory & close files - vfree(diagonals); - fclose(tree_file); - fclose(prof_file); - free_fastal_profile(profiles[0], alphabet_size); - free_fastal_profile(profiles[1], alphabet_size); - vfree(profiles); - vfree(profile_positions); - free_dyn_matrix(*length1,dyn_matrix); - for (i = 0; i <= alphabet_size; ++i) - { - vfree(sumup_prf[i]); - } - vfree(sumup_prf); - vfree(param_set); - - //bottom-down traversal (edit_files --> alignment) - char file_name[FILENAMELEN]; - sprintf(file_name,arguments.output_file); - - FILE *alignment_file = fopen(file_name, "w"); - FILE *edit_seq_file = fopen("edit_seq.tmp","w+"); - - char *aligned_sequence = vcalloc(alignment_length+3, sizeof(char)); - - - long offset = ftell(edit_seq_file); - for (i = 0; i < alignment_length; ++i) - { - fprintf(edit_seq_file, "X"); - aligned_sequence[i] = 'X'; - } - aligned_sequence[i]= '\n'; - aligned_sequence[i+1]= '\0'; - fprintf(edit_seq_file, "\n"); - offset = (ftell(edit_seq_file) - offset)*-1; - - - edit2alignment(seq_file, file_positions, edit_file, edit_positions, node[2], number_of_sequences, aligned_sequence, alignment_length, edit_seq_file, offset, alignment_file); - - - //free_memory & close files - - vfree(edit_positions); - fclose(edit_file); - fclose(seq_file); - - return 0; -} - - - - -//****************** toolbox *************************** - - -/** -* enlargement of the dynamic programming matrix in case it is to small. -*/ -double** -resize_dyn_matrix(double **dyn_matrix, //the dynamic programming matrix - int *old_length1, //old length of dimension 1 - int *old_length2, //old length of dimension 2 - int length1, //new minimum length of dimension 1 - int length2) //new maximum length of dimension 2 -{ - int i; - if (*old_length1 < length1) - { - dyn_matrix = vrealloc(dyn_matrix,length1*sizeof(double*)); - for (i = *old_length1; i < length1; ++i) - dyn_matrix[i] = vcalloc(*old_length2,sizeof(double)); - *old_length1 = length1; - } - if (*old_length2 < length2) - { - for (i = 0;i<*old_length1; ++i) - dyn_matrix[i] = vrealloc(dyn_matrix[i], length2*sizeof(double)); - *old_length2 = length2; - } - return dyn_matrix; -} - - - -/** -* frees the memory of a dynamic programming matrix -*/ -void -free_dyn_matrix(int length1, //length of first dimension - double **dyn_matrix) //dynamic matrix -{ - int i = 0; - for (; ialphabet_size; - int i,j; - for (i =0; i < 3; ++i) - { - profiles[i] = vcalloc(1,sizeof(Fastal_profile)); - profiles[i]->weight = 1; - profiles[i]->is_leaf = 1; - profiles[i]->prf = vcalloc(alphabet_size, sizeof(int*)); - for (j = 0; j < alphabet_size; ++j) - { - profiles[i]->prf[j] = vcalloc(PROFILE_ENLARGEMENT, sizeof(int)); - } - profiles[i]->allocated_memory = PROFILE_ENLARGEMENT; - } -} - - -/** -* initalises the files where the profiles are temporarly stored. -*/ -void -initiate_profile_files(FILE **profile_files) -{ - char names[10]; - int i = 0; - for (;i < 4; ++i) - { - sprintf(names,"tmp_prf_%i",i); - profile_files[i] = fopen(names,"w+"); - } -} - - - -/** - * frees all memory occupied by the profile - */ -void -free_fastal_profile(Fastal_profile* profile, int alphabet_size) -{ - --alphabet_size; - for (;alphabet_size >= 0; --alphabet_size) - vfree(profile->prf[alphabet_size]); - vfree(profile->prf); -} - - -/** -* initialize the parameters -*/ -void -fill_parameters(int is_dna, Fastal_param *param_set, char *method) -{ - sprintf(param_set->method,"%s",method); - int i; - if (is_dna) - { - param_set->alphabet_size = 10; - char tmp1[] = {'A','C','G','T','N','R','Y','D','M','W'}; - int tmp2[] = { 0, -1, 1, 7, -1, -1, 2, -1, -1, -1, -1, -1, 8, 4, -1, -1, -1, 5, -1, 3, -1, -1, 9, -1, 6, -1}; - for (i = 0; ialphabet_size; ++i) - param_set->pos2char[i] = tmp1[i]; - for (i = 0; i<26; ++i) - param_set->char2pos[i] = tmp2[i]; - param_set->M = read_matrice("dna_idmat"); - } - else - { - param_set->alphabet_size = 24; - char tmp1[] = {'A','C','G','T','F','D','H','I','K','L','M','N','P','Q','R','S','E','V','W','Y','B','J','X','Z'}; - int tmp2[] = { 0, 20, 1, 5, 16, 4, 2, 6, 7, 21, 8, 9, 10, 11, -1, 12, 13, 14, 15, 3, -1, 17, 18,22, 19,23}; - for (i = 0; ialphabet_size; ++i) - param_set->pos2char[i] = tmp1[i]; - for (i = 0; i<26; ++i) - param_set->char2pos[i] = tmp2[i]; - param_set->M = read_matrice("blosum62mt"); - } -} -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/fastal_lib_header.h b/binaries/src/tcoffee/t_coffee_source/fastal_lib_header.h deleted file mode 100644 index ab60176..0000000 --- a/binaries/src/tcoffee/t_coffee_source/fastal_lib_header.h +++ /dev/null @@ -1,213 +0,0 @@ - - -#define ENLARGEMENT_PER_STEP 50 -#define PROFILE_ENLARGEMENT 550 - -// static char pos2aa[] = {'A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y'}; - - -/** - * \brief Struct to save all informations of a profile. - * - */ -typedef struct -{ - /// Number of sequences in this profile - int num_sequences; - /// number of the profile - int prf_number; - ///0 = combination of two profiles, 1 = profile of a single sequence -> name1 = seq_name - int is_leaf; - ///length of the profile - int length; - ///weight of the sequence - int weight; - ///saves the amount of allocated memory - int allocated_memory; - ///the profile itself [alphabet_size][profile_length] - int **prf; -} -Fastal_profile; - -/** - * \brief Struct to save all parameters for fastal. - * - */ -typedef struct -{ - /// size of alphabet_size - int alphabet_size; - /// converting char2position (for profile) - int char2pos[26]; - /// converting pos2char (for profile) - char pos2char[26]; - /// gap opening costs - int gop; - /// gap extension costs - int gep; - /// nomatch??? - int nomatch; - ///method to align profile - char method[20]; - ///scoring Matrix; - int **M; -} -Fastal_param; - - - -//tree -void generate_random_tree(int number); - - -Fastal_profile* make_profile_of_sequence(char *seq_name, char *sequence, int number); - - - -//Definite use - -//********************* input/output ********************************** -void file2profile(FILE* profile_f, Fastal_profile *profile, int prf_number, Fastal_param *param_set); -void file_pos2profile(FILE *seq_file, long off_set, Fastal_profile *profile, int prf_number, Fastal_param *param_set); -void profile2file(Fastal_profile *profile, FILE* prf_f, Fastal_param *param_set); - -//index -int make_index_of_file(char *file_name, long **result); - - -//********************* pairwise alignment methods ************************ - - //Needleman-Wunsch - int prf_nw(Fastal_profile *profile1, Fastal_profile *profile2, double **prog_matrix, FILE *edit_file_name, int **sumup_prf, int *sumup_length, Fastal_param *param_set); - int nw_matrix2edit_file(double **prog_matrix, Fastal_profile *profile1, Fastal_profile *profile2, FILE *edit_f, int **prf_field, int *field_length, Fastal_param *param_set); - int** sumup_profile(Fastal_profile *profile, int **sumup_prf, Fastal_param *param_set); - void write2file(int **sumup_prf, int length, FILE *file, int number, Fastal_param *param_set); - - - //Sparse dynamic programming - int seq_pair2blast_diagonal(char *seq_file_name1, char *seq_file_name2, int **diagonals, int *dig_length, int l1, int l2, int is_dna); - - - - - - - - -//edit_files 2 alignment - void edit2alignment(FILE *sequence_file, long *seq_positions, FILE *edit_file, long *edit_positions, int node_number, int number_of_sequences, char *aligned_sequence, int alignment_length, FILE *edit_seq_file, int offset, FILE* alignment_file); -void edit_seq2aligned_seq(char *aligned_sequence, FILE *sequence_file, long sequence_position, FILE *alignment_file); - - -//main -int fastal(int argc, char **argv); - -//toolbox -void initiate_profile_files(FILE **profile_files); -void initiate_profiles(Fastal_profile **profiles, Fastal_param *param_set); -void free_fastal_profile(Fastal_profile *profile, int alphabet_size); -double **resize_dyn_matrix(double **dyn_matrix, int *old_length1, int *old_length2, int length1, int length2); -void free_dyn_matrix(int length1, double **dyn_matrix); -void fill_parameters(int is_dna, Fastal_param *param_set, char *method); - -struct fastal_arguments -{ -// char *args[2]; - char *method; - char *sequence_file; - char *tree_file; - char *output_file; - int is_dna; - int gop; - int gep; -}; - -// static char fastal_doc[] = "Fastal -- a program to align sequences with little memory"; -// // static char args_doc[] = "ARG1 ARG2"; -// static void* fastal_options[]= -// { -// {"in", 'i', "FILE", 0, "Sequence file (FASTA format)" }, -// {"output", 'o', "FILE", 0, "Output to FILE" }, -// {"tree", 't', "FILE", 0, "Tree file" }, -// {"method", 'm', "METHOD", 0, "Methods:\n fast (sparse dynamic),\n nw (Needleman-Wunsch) "}, -// {"is_dna", 'd', 0, 0, "Sequences are DNA"}, -// {"is_aa", 'a', 0, 0, "Sequences are AminoAcids"}, -// {"gop", 'g', "VALUE", 0, "Gap opening costs"}, -// {"gep", 'e', "VALUE", 0, "Gap extension costs"}, -// { 0 } -// }; - - - - - -/* -void -fastal_parse_opt (int key, char *arg) -{ -// struct fastal_arguments *arguments = state->input; - - switch (key) - { - case 'o': - arguments->output_file = arg; - break; - case 't': - arguments->tree_file = arg; - break; - case 'i': - arguments->sequence_file = arg; - break; - case 'm': - arguments->method = arg; - break; - case 'g': - arguments->gop = atoi(arg); - break; - case 'e': - arguments->gep = atoi(arg); - break; - case 'd': - arguments->is_dna = 1; - break; - case 'a': - arguments->is_dna = 0; - break; - default: - printf("UNKNOWN OPTION key"); - exit(1); - } - return 0; -}*/ -// -// -// static struct argp argp = { fastal_options, fastal_parse_opt,/* fastal_args_doc,*/ fastal_doc}; -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/fsa_dp.c b/binaries/src/tcoffee/t_coffee_source/fsa_dp.c deleted file mode 100644 index c51d5ec..0000000 --- a/binaries/src/tcoffee/t_coffee_source/fsa_dp.c +++ /dev/null @@ -1,2541 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" - -#define hmm_add(x,y) ((x==UNDEFINED || y==UNDEFINED)?UNDEFINED:(x+y)) -#define MAX_EMISSION 256 - -/*********************************************************************************/ -/* */ -/* */ -/* Procons dp */ -/* */ -/* */ -/*********************************************************************************/ -char alphabetDefault[] = "ARNDCQEGHILKMFPSTWYV"; -double emitPairsDefault[20][20] = { - {0.02373072f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00244502f, 0.01775118f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00210228f, 0.00207782f, 0.01281864f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00223549f, 0.00161657f, 0.00353540f, 0.01911178f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00145515f, 0.00044701f, 0.00042479f, 0.00036798f, 0.01013470f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00219102f, 0.00253532f, 0.00158223f, 0.00176784f, 0.00032102f, 0.00756604f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00332218f, 0.00268865f, 0.00224738f, 0.00496800f, 0.00037956f, 0.00345128f, 0.01676565f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00597898f, 0.00194865f, 0.00288882f, 0.00235249f, 0.00071206f, 0.00142432f, 0.00214860f, 0.04062876f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00114353f, 0.00132105f, 0.00141205f, 0.00097077f, 0.00026421f, 0.00113901f, 0.00131767f, 0.00103704f, 0.00867996f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00318853f, 0.00138145f, 0.00104273f, 0.00105355f, 0.00094040f, 0.00100883f, 0.00124207f, 0.00142520f, 0.00059716f, 0.01778263f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00449576f, 0.00246811f, 0.00160275f, 0.00161966f, 0.00138494f, 0.00180553f, 0.00222063f, 0.00212853f, 0.00111754f, 0.01071834f, 0.03583921f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00331693f, 0.00595650f, 0.00257310f, 0.00252518f, 0.00046951f, 0.00312308f, 0.00428420f, 0.00259311f, 0.00121376f, 0.00157852f, 0.00259626f, 0.01612228f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00148878f, 0.00076734f, 0.00063401f, 0.00047808f, 0.00037421f, 0.00075546f, 0.00076105f, 0.00066504f, 0.00042237f, 0.00224097f, 0.00461939f, 0.00096120f, 0.00409522f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00165004f, 0.00090768f, 0.00084658f, 0.00069041f, 0.00052274f, 0.00059248f, 0.00078814f, 0.00115204f, 0.00072545f, 0.00279948f, 0.00533369f, 0.00087222f, 0.00116111f, 0.01661038f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00230618f, 0.00106268f, 0.00100282f, 0.00125381f, 0.00034766f, 0.00090111f, 0.00151550f, 0.00155601f, 0.00049078f, 0.00103767f, 0.00157310f, 0.00154836f, 0.00046718f, 0.00060701f, 0.01846071f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00631752f, 0.00224540f, 0.00301397f, 0.00285226f, 0.00094867f, 0.00191155f, 0.00293898f, 0.00381962f, 0.00116422f, 0.00173565f, 0.00250962f, 0.00312633f, 0.00087787f, 0.00119036f, 0.00180037f, 0.01346609f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00389995f, 0.00186053f, 0.00220144f, 0.00180488f, 0.00073798f, 0.00154526f, 0.00216760f, 0.00214841f, 0.00077747f, 0.00248968f, 0.00302273f, 0.00250862f, 0.00093371f, 0.00107595f, 0.00147982f, 0.00487295f, 0.01299436f, 0.0f, 0.0f, 0.0f}, - {0.00039119f, 0.00029139f, 0.00021006f, 0.00016015f, 0.00010666f, 0.00020592f, 0.00023815f, 0.00038786f, 0.00019097f, 0.00039549f, 0.00076736f, 0.00028448f, 0.00016253f, 0.00085751f, 0.00015674f, 0.00026525f, 0.00024961f, 0.00563625f, 0.0f, 0.0f}, - {0.00131840f, 0.00099430f, 0.00074960f, 0.00066005f, 0.00036626f, 0.00070192f, 0.00092548f, 0.00089301f, 0.00131038f, 0.00127857f, 0.00219713f, 0.00100817f, 0.00054105f, 0.00368739f, 0.00047608f, 0.00102648f, 0.00094759f, 0.00069226f, 0.00999315f, 0.0f}, - {0.00533241f, 0.00169359f, 0.00136609f, 0.00127915f, 0.00119152f, 0.00132844f, 0.00178697f, 0.00194579f, 0.00071553f, 0.01117956f, 0.00914460f, 0.00210897f, 0.00197461f, 0.00256159f, 0.00135781f, 0.00241601f, 0.00343452f, 0.00038538f, 0.00148001f, 0.02075171f} -}; - -static void DisplayMatState ( MatState *S, char *s); - - - - - - -void check_viterbiL ( Alignment *A,int *ns, int **ls, Constraint_list *CL); -MatState *viterbi2path2 ( double ***Sc, int ***St, Hmm *H, MatState *S, MatState *E); -void testfunc ( MatState *S, char *s); - -#ifdef IN_PGROGRESS -/*********************************************************************************/ -/* */ -/* */ -/* MSA Analyzer */ -/* */ -/* */ -/*********************************************************************************/ -Alignment * analyze_alignment ( Alignment *A) -{ - evaluate_alignment (A); - H=define_msa_model (-100); - M=seq_viterbi_hmm (A->seq_al[0], H); - path=seq_viterbi2path ( seq, H, M); -} - - -Hmm* define_msa_model(double penalty) -{ - Hmm *H; - double freeT=0; - int n=0; - HmmState *S; - - - H=declare_hmm(2); - H->freeT=freeT=0; - - H->forbiden=FORBIDEN; - H->start=START_STATE; - H->end=END_STATE; - - /*define START*/ - S=H->S[n]; - sprintf (S->name, "START"); S->state=n; - - S->DI=0; - S->DJ=0; - S->em=freeT; - - sprintf ( (S->T[S->nT])->name, "C") ;(S->T[S->nT])->tr=freeT ;S->nT++; - sprintf ( (S->T[S->nT])->name, "W");(S->T[S->nT])->tr=freeT ;S->nT++; - n++; - /*define END*/ - S=H->S[n]; - sprintf (S->name, "END"); S->state=n; - S->DI=0; - S->DJ=0; - S->em=freeT; - n++; - - /*define Correct*/ - S=H->S[n]; - sprintf (S->name, "C"); S->state=n; - S->DI=1; - S->DJ=0; - S->em=H->forbiden; - S->em_func=em_correct_msa; - - sprintf ( (S->T[S->nT])->name, "C") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "W");(S->T[S->nT])->tr=penalty ;S->nT++; - sprintf ( (S->T[S->nT])->name, "END"); (S->T[S->nT])->tr=freeT;S->nT++; - n++; - - /*define Wrong*/ - S=H->S[n]; - sprintf (S->name, "INSERT"); S->state=n; - S->DI=1; - S->DJ=0; - S->em=H->forbiden; - S->em_func=em_wrong_msa; - sprintf ( (S->T[S->nT])->name, "C") ; (S->T[S->nT])->tr=penalty;S->nT++; - sprintf ( (S->T[S->nT])->name, "W"); (S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "END"); (S->T[S->nT])->tr=-gop;S->nT++; - n++; - - /*define LInsert*/ - S=H->S[n]; - sprintf (S->name, "LINSERT"); S->state=n; - S->DI=1; - S->DJ=0; - S->em=lgep; - - sprintf ( (S->T[S->nT])->name, "INSERT") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "LINSERT");(S->T[S->nT])->tr=freeT;S->nT++; - n++; - - H=bound_hmm ( H); - return H; -} -#endif -/*********************************************************************************/ -/* */ -/* */ -/* simple HMM: Viterbi */ -/* */ -/* */ -/*********************************************************************************/ -double pavie_em_func (Hmm*H, HmmState *S, int v); -Hmm* define_full_model(int nstate,char **state_list, char *model_name,Generic_em_func evaluation_func ); -char **produce_state_name (int nstate,char **list, char *model_name, Hmm* H); -double** seq_viterbi_hmm (char *seq, Hmm *H); -int * seq_viterbi2path (char *s, Hmm *H, double **M); -double analyze_sequence ( char *seq, Hmm*H); - -double pavie_emission (Hmm*H, HmmState *S, int v) -{ - char *n; - - - n=S->name; - - if ( v==n[0] || ( v=='*' && n[0]=='E')) return H->freeT; - return H->forbiden; -} -Hmm* define_full_model(int nstate, char **list, char *model_name, Generic_em_func emission_function) -{ - /*list: a list of the state names: does not include START or END*/ - /*model_name: a string that will be appended to the names in list*/ - - Hmm *H; - int a,n; - HmmState *S; - - - H=declare_hmm(nstate+2); - H->freeT=0; - - H->forbiden=FORBIDEN; - H->start=START_STATE; - H->end=END_STATE; - - list=produce_state_name (nstate,list, model_name, H); - nstate+=2; - - for (n=0; nS[n]; - S->state=n; - sprintf ( S->name, "%s", list[n]); - S->em_func2=emission_function; - if (n==H->end || n==H->start){S->DI=0;S->DJ=0;} - else S->DI=1;S->DJ=0; - - /*Emmissions*/ - S->em_func2=emission_function; - for (a=0; a< MAX_EMISSION; a++)S->em2[a]=H->freeT; - - for (a=0; aend; a++) - { - if (a!=H->start && !(n==H->start && a==H->end) ) - { - sprintf ( (S->T[S->nT])->name, "%s", list[a]); - (S->T[S->nT])->tr=H->freeT; - S->nT++; - } - } - } - return H; -} - -char **produce_state_name (int nstate,char **list, char *model_name, Hmm* H) -{ - int a,b,c; - char **new_list; - nstate+=2; - - new_list=declare_char ( nstate, 100); - for ( a=0, b=0, c=0; a< nstate; a++) - { - if ( a==H->start)sprintf ( new_list[a], "START"); - else if ( a==H->end)sprintf ( new_list[a], "END"); - else if ( list==NULL){sprintf ( new_list[a], "%c%s", 'a'+b, (model_name)?model_name:"");b++;} - else {sprintf ( new_list[a], "%c%s", list[a][c], (model_name)?model_name:"");c++;} - } - return new_list; -} - -int seq_viterbi_pair_wise (Alignment *A,int*ns, int **ls,Constraint_list *CL) -{ - ungap(A->seq_al[0]); - analyze_sequence (A->seq_al[0], NULL); - myexit (EXIT_FAILURE); - return 1; -} -double analyze_sequence ( char *seq, Hmm *H) -{ - - double **M; - int *path; - - if ( H==NULL) - { - H=define_full_model(5, NULL,"_first", pavie_emission); - H=bound_hmm(H); - DisplayHmm (H); - } - M=seq_viterbi_hmm (seq, H); - path=seq_viterbi2path (seq, H, M); - return M[H->end][strlen (seq)]; -} - - -double** seq_viterbi_hmm (char *seq, Hmm *H) -{ - /*Given a model H and a sequence seq*/ - double **M; - double e, v, max; - int i,pi, bestk, s, k, l1; - HmmState *S1, *S2; - - - l1=strlen (seq); - M=declare_double (H->nS*2,l1+2); - - /*Handle the start*/ - M[H->start][0]=0; - for ( i=0; i<=l1; i++) - { - for ( s=0; s< H->nS; s++) - { - S1=H->S[s]; - pi=i-S1->DI; - max=H->forbiden; - bestk=H->forbiden; - if ( pi<0){M[s][i]=H->forbiden;}/*Boundary*/ - else - { - if (pi==0) {max=H->T[(int)H->start][s];bestk=H->start;}/*Start*/ - else - { - for (k=1; k<=H->fromM[S1->state][0]; k++) - { - S2=H->S[H->fromM[s][k]]; - if ( S2->state==H->start || S2->state==H->end)continue; - v=hmm_add((M[S2->state][pi]),(H->T[S2->state][S1->state])); - if ( v!=H->forbiden && (max==H->forbiden || v>max)){max=v;bestk=S2->state;} - } - } - if (S1->em2)e=S1->em2[(int)seq[pi]]; - else e=S1->em_func2(H,S1, (int)seq[pi]); - - e=hmm_add (e,max); - - M[s][i]=e; - M[s+H->nS][i]=bestk; - } - } - } - /*Terminate viterbi: connect the path to the END state*/ - max=UNDEFINED; - bestk=UNDEFINED; - for (k=0; k< H->nS; k++) - { - if (k==H->start || k==H->end); - else - { - v=(M[k][l1]==H->forbiden || H->T[k][H->end]==H->forbiden)?H->forbiden:M[k][l1]+H->T[k][H->end]; - if ( max==H->forbiden || v>max){bestk=k;max=v;} - } - } - M[H->end][l1]=max; - M[H->nS+H->end][l1]=bestk; - return M; -} - -int * seq_viterbi2path (char *s, Hmm *H, double **M) -{ - int i,l,l1; - int *path; - HmmState *S1; - int cs; - - l1=strlen (s); - path=vcalloc (l1+1, sizeof (int)); - i=l1; - l=0; - cs=M[H->nS+H->end][i]; - - while (i>0) - { - - S1=H->S[cs]; - path[l++]=cs; - - cs=M[H->nS+cs][i]; - i-=S1->DI; - /*fprintf ( stderr, "%d", cs);*/ - } - invert_list_int (path, l); - path[l++]=H->forbiden; - - return path; -} - -/*********************************************************************************/ -/* */ -/* */ -/* pairHMM: Viterbi */ -/* */ -/* */ -/*********************************************************************************/ -Hmm* define_mnm_model(Constraint_list *CL); -int viterbi_pair_wise_OLD (Alignment *A,int*ns, int **ls,Constraint_list *CL) -{ - int l1, l2, a; - double ***M; - int *path; - Hmm * H; - - A->pos=aln2pos_simple( A, -1, ns, ls); - - // H=define_mnm_model (CL); - H=define_two_mat_model (CL); - - l1=strlen (A->seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - M=viterbi_hmm (A, ns, ls, H, CL); - path=viterbi2path (l1,l2, H,M); - A=viterbipath2aln (A,ns,ls,path, H); - A->score=A->score_aln=M[H->end][l1][l2]; - for ( a=0; a< H->nS*2; a++)free_double (M[a], -1); - vfree (M); - free_int (A->pos, -1); - A->pos=NULL; - - free_Hmm (H); - vfree (path); - - return A->score_aln; -} - -Alignment * viterbipath2aln (Alignment *A, int *ns,int **ls,int *tb, Hmm *H) -{ - char **aln; - char *char_buf; - int a, b, c, len, ch; - HmmState *S; - int l[2]; - - len=0;while (tb[len]!=H->forbiden)len++; - - if ( A->declared_len<=len)A=realloc_aln2 ( A,A->max_n_seq,2*len); - aln=A->seq_al; - - char_buf=vcalloc (len+1, sizeof (char)); - l[0]=strlen ( A->seq_al[ls[0][0]]); - l[1]=strlen ( A->seq_al[ls[1][0]]); - - for ( c=0; c< 2; c++) - for ( a=0; a< ns[c]; a++) - { - for (ch=0, b=0; bS[tb[b]]; - if ( (c==0 && S->DI)|| (c==1 && S->DJ) ) - char_buf[b]=aln[ls[c][a]][ch++]; - else - char_buf[b]='-'; - } - char_buf[b]='\0'; - sprintf (aln[ls[c][a]],"%s", char_buf); - if ( l[c]!=ch){fprintf (stderr, "\nERROR: Wrong Size Of Alignmnent (Real %d, Observed %d)[FATAL:%s]",l[c], ch, PROGRAM);} - } - A->len_aln=len; - A->nseq=ns[0]+ns[1]; - - vfree(char_buf); - return A; -} - -double*** viterbi_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL) -{ - double ***M; - double e, v, max; - int a, i,pi, bestk,j,pj, s, k, l1, l2; - HmmState *S1, *S2; - - l1=strlen (A->seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - - M=vcalloc (H->nS*2, sizeof (double**)); - for ( a=0; anS*2; a++)M[a]=declare_double (l1+2, l2+2); - - /*Handle the start*/ - - M[H->start][0][0]=0; - for ( i=0; i<=l1; i++) - for ( j=0; j<=l2; j++) - { - for ( s=0; s< H->nS; s++) - { - S1=H->S[s]; - pi=i-S1->DI; - pj=j-S1->DJ; - max=H->forbiden; - bestk=H->forbiden; - if ( pi<0 ||pj<0){M[s][i][j]=H->forbiden;}/*Boundary*/ - else - { - if (pi+pj==0) {max=H->T[H->start][s];bestk=H->start;}/*Start*/ - else - { - for (k=1; k<=H->fromM[S1->state][0]; k++) - { - S2=H->S[H->fromM[s][k]]; - if ( S2->state==H->start || S2->state==H->end)continue; - v=(M[S2->state][pi][pj]==H->forbiden)?H->forbiden:(M[S2->state][pi][pj]+H->T[S2->state][S1->state]); - if ( v!=H->forbiden && (max==H->forbiden || v>max)){max=v;bestk=S2->state;} - } - } - - e=(S1->em==H->forbiden)?S1->em_func (A, A->pos, ns[0], ls[0],i-1, A->pos,ns[1], ls[1], j-1, CL):S1->em; - e=(max==H->forbiden || e==H->forbiden)?H->forbiden:e+max; - - M[s][i][j]=e; - M[s+H->nS][i][j]=bestk; - } - } - } - - /*Terminate viterbi: connect the path to the END state*/ - max=UNDEFINED; - bestk=UNDEFINED; - for (k=0; k< H->nS; k++) - { - if (k==H->start || k==H->end); - else - { - v=(M[k][l1][l2]==H->forbiden || H->T[k][H->end]==H->forbiden)?H->forbiden:M[k][l1][l2]+H->T[k][H->end]; - if ( max==H->forbiden || v>max){bestk=k;max=v;} - } - } - M[H->end][l1][l2]=max; - M[H->nS+H->end][l1][l2]=bestk; - - return M; -} - -/*********************************************************************************/ -/* */ -/* */ -/* HMM: Decode/Traceback */ -/* */ -/* */ -/*********************************************************************************/ -int * traceback (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL,MatState *S, MatState *E, int **seg_list) - -{ - int *path; - int l=0; - MatState *N; - int l1, l2; - - l1=strlen (A->seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - path=vcalloc ( l1+l2+1, sizeof (int)); - - while ( S->st!=H->end) - { - DisplayMatState (S, "\n\tTraceback"); - N=S->n; - if ( N && S && (((N->i-S->i)>1) ||((N->j-S->j)>1))) - { - RviterbiD_hmm (A,ns,ls,H,CL,S,N,seg_list); - N=S->n; - } - - path[l++]=S->st; - ManageMatState (FREE,S); - S=N; - } - - path[l]=H->forbiden; - return path; -} - -int * viterbi2path (int l1,int l2, Hmm *H, double ***M) -{ - int i, j,l; - int *path; - HmmState *S1; - int cs; - - l=0; - path=vcalloc (l1+l2+1, sizeof (int)); - i=l1;j=l2; - l=0; - cs=M[H->nS+H->end][i][j]; - - while (i>0|| j>0) - { - - S1=H->S[cs]; - path[l++]=cs; - - cs=M[H->nS+cs][i][j]; - i-=S1->DI; - j-=S1->DJ; - /*fprintf ( stderr, "%d", cs);*/ - } - invert_list_int (path, l); - path[l++]=H->forbiden; - - return path; -} - -/*********************************************************************************/ -/* */ -/* */ -/* HMM Viterbi Linear */ -/* */ -/* */ -/*********************************************************************************/ - - -int viterbiL_pair_wise (Alignment *A,int*ns, int **ls,Constraint_list *CL) -{ - int l1, l2; - int *path; - Hmm * H; - MatState *Start; - MatState *End; - - A->pos=aln2pos_simple( A, -1, ns, ls); - Start=ManageMatState ( DECLARE, NULL); - End=ManageMatState ( DECLARE, NULL); - H=define_simple_model (CL); - l1=strlen (A->seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - - - - Start->i=0 ;Start->j=0 ; Start->st=H->start;Start->sc=0; - End->i =l1; End->j=l2; End ->st=H->end; - Start=RviterbiL_hmm (A, ns, ls, H, CL, Start,End); - path=traceback (A, ns, ls, H, CL, Start,NULL, NULL); - - A=viterbipath2aln (A,ns,ls,path, H); - - free_Hmm (H); - free_int (A->pos, -1); - A->pos=NULL; - - return A->score_aln; -} - - -MatState* RviterbiL_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL,MatState *S, MatState *E) -{ - MatState *MS, *ME; - MS=S; - ME=E; - - viterbiL_hmm (A,ns, ls,H, CL, S, E); - - - if ( S->n==E)return S; - if ( E->sc==H->forbiden) - { - DisplayHmm (H); - fprintf ( stderr, "\nERROR: The Requested Model (Cf Above) Cannot Produce the Pair-Alignment\nYou must allow extra possible transitions\n[FATAL:%s]", PROGRAM); - myexit ( EXIT_FAILURE); - } - E=S->n; - - while (S!=ME) - { - int d1, d2, align; - d1=MinDeltaMatState(S,E); - d2=MaxDeltaMatState(S,E); - align=((d1==1 && d2==1) || ( d1==0))?0:1; - if (align)RviterbiL_hmm (A,ns, ls,H, CL,S,E); - S=E; - E=S->n; - } - return MS; -} - -#define Dim(i,j) (i*LenJ+j) -MatState* viterbiL_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL, MatState *S,MatState *E) -{ - int current, previous,row, prow; - double v; - int a,i,j,pi,pj, s, k; - int start_i, start_j, end_i, end_j, l1, l2; - HmmState *S1, *S2; - MatState *CC, *PCC,*tS, *tE, *mark=NULL; - int midpoint; - - - static MatState ***M; - - - static int LenJ, LenI; - int MaxDelta=50, DeltaI, DeltaJ; - - - - DisplayMatState (S, "\n\tS"); - DisplayMatState (E, "\n\tE"); - - - if ( A==NULL) - { - for ( a=0; a<2; a++)memset(M[a],0,LenJ*LenI*sizeof (MatState*)); - free_arrayN((void **)M, 3);M=NULL; - ManageMatState ( FREE_ALL, NULL); - return NULL; - } - - - if ( MatStateAreIdentical ( S, E))return NULL; - l1=strlen (A->seq_al[ls[0][0]]);l2=strlen (A->seq_al[ls[1][0]]); - - midpoint=S->i+((E->i-S->i)/2); - DeltaI=E->i-S->i; - DeltaJ=E->j-S->j; - - start_i=S->i;end_i=E->i;start_j=S->j;end_j=E->j; - current=0;previous=1; - - - if ( !M) - { - LenI=l2+1; - LenJ=H->nS; - M=declare_arrayN(3, sizeof ( MatState),2,LenI*LenJ,0); - } - - - /*MAKE THE VITERBI FROM S(tart) to E(nd)*/ - mark=ManageMatState ( MARK, mark); - for (i=start_i; i<=end_i; i++) - { - row=current; - for ( j=start_j; j<=end_j; j++) - { - DeltaJ=((FABS(j-i))nS-1;s>=0; s--) - { - S1=H->S[s];pi=i-S1->DI;prow=S1->DI;pj=j-S1->DJ; - - CC=M[row][Dim(j,s)]=CopyMatState(NULL, M[row][Dim(j,s)]); - CC->i=i; CC->j=j; CC->st=s;PCC=NULL; - - if (i==start_i && j==start_j && s==S->st){CC=CopyMatState(S,CC);} - else if ( i==end_i && j==end_j && E->st!=H->end && s!=E->st)CC->sc=H->forbiden; - else if ( pisc=H->forbiden;} - else - { - for (k=1; k<=H->fromM[S1->state][0]; k++) - { - S2=H->S[H->fromM[s][k]]; - PCC=M[prow][Dim((j-S1->DJ),(S2->state))]; - - if ( !PCC)PCC=NULL; - else if ( pi+pj!=0 && S2->state==H->start); - else if ( !(pi==l1 && pj==l2) && s==H->end); - else - { - - v=hmm_add(CC->sc,H->T[PCC->st][CC->st]); - - v=lu_RviterbiD_hmm(A,ns, ls, H, CL,PCC,CC, NULL); - if ( v!=H->forbiden && (CC->sc==H->forbiden || v> CC->sc)){CC->sc=v; CC->pst=S2->state;CC->p=PCC;} - } - } - } - if (CC->sc==H->forbiden); - else if (i==midpoint || DeltaI<=MaxDelta||DeltaJ<=MaxDelta ||(i==start_i && j==start_j && s==S->st) ) - { - CC->m=(CC->p)?(CC->p)->m:NULL; - PCC=CopyMatState(CC,NULL); - PCC->m=CC->m;CC->m=PCC; - } - else CC->m=(CC->p)?(CC->p)->m:NULL; - } - } - prow=previous; - for ( j=start_j; j<=end_j && i!=end_i; j++) - { - for ( s=H->nS-1;s>=0; s--) - { - - CC=(M[prow][Dim(j,s)]);M[prow][Dim(j,s)]=M[row][Dim(j,s)];M[row][Dim(j,s)]=CC; - if (M[prow][Dim(j,s)]) M[row ][Dim(j,s)]=CopyMatState ( M[prow][Dim(j,s)], M[row][Dim(j,s)]); - - } - } - - } - - mark=ManageMatState ( MARK,mark); - row=current; - - - if ( E->st==H->end || E->st==H->forbiden){E=CopyMatState ((M[row][Dim(end_j,E->st)]),E);} - - - - - PCC=CopyMatState (M[row][Dim(end_j,E->st)], NULL); - - if ( MatStateAreIdentical(PCC,PCC->m))PCC=PCC->m; - tS=tE=PCC; - while (PCC->m) - { - tS=CopyMatState (PCC->m,NULL); tS->n=PCC; PCC->p=tS;PCC=tS; - } - - if (tS==tE); - else - { - S->n=tS->n; (S->n)->p=S; - E->p=tE->p; (E->p)->n=E; - } - for ( a=0; a<2; a++)memset(M[a],0,LenJ*LenI*sizeof (MatState*)); - ManageMatState ( FREE_MARK,mark); - - - while (S && S->p!=E){S->m=NULL;S=S->n;}/*Clean the memory of the rturned Cells*/ - return NULL; -} - -/*********************************************************************************/ -/* */ -/* */ -/* HMM Viterbi Diagonals */ -/* */ -/* */ -/*********************************************************************************/ -int viterbiD_pair_wise (Alignment *A,int*ns, int **ls,Constraint_list *CL) -{ - int l1, l2; - int *path; - Hmm * H; - MatState *Start; - MatState *End; - int **seg_list; - int a, b, c; - int main_i; - int main_j; - - - A->pos=aln2pos_simple( A, -1, ns, ls); - - Start=ManageMatState ( DECLARE, NULL); - End=ManageMatState ( DECLARE, NULL); - H=define_simple_model (CL); - l1=strlen (A->seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - - main_i=MAX(1,(l2-l1)+1); - main_j=MAX(1,(l1-l2)+1); - - seg_list=declare_arrayN(2, sizeof (int), l1+l2+3, 3); - seg_list[0][0]=DIAGONALS; - - - c=1; - for ( b=1,a=l1; a>= 1; a--) - { - if (a<50 || (b==main_i && a==main_j)) - { - seg_list[c][0]=a; - seg_list[c][1]=b; - seg_list[c][2]=MIN((l1-a), (l2-b)); - c++; - } - } - - - for ( b=2,a=1; b<= l2; b++, c++) - { - if (b<50 || (b==main_i && a==main_j)) - { - seg_list[c][0]=a; - seg_list[c][1]=b; - seg_list[c][2]=MIN((l1-a), (l2-b)); - } - } - - - seg_list[c][0]=FORBIDEN; - - Start->i=0 ;Start->j=0 ; Start->st=H->start;Start->sc=0; - End->i =l1; End->j=l2; End ->st=H->end; - Start=RviterbiD_hmm (A, ns, ls, H, CL, Start,End,seg_list); - - - path=traceback (A, ns, ls, H, CL, Start,NULL, NULL); - - - - A=viterbipath2aln (A,ns,ls,path, H); - - viterbiD_hmm (NULL, ns, ls, H, CL, Start,End, seg_list); - free_Hmm (H); - free_int (A->pos, -1); - free_arrayN((void **)seg_list, 2); - - A->pos=NULL; - return A->score_aln; -} - - -double lu_RviterbiD_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL,MatState *S, MatState *E, int **seg_list) -{ - HmmState *S1; - double sc, sc2,e, t; - static MatState *cS=NULL, *cE=NULL; - double min, max; - max=MAX((E->i-S->i), (E->j-S->j)); - min=MIN((E->i-S->i), (E->j-S->j)); - - - if ( S->sc==H->forbiden) return H->forbiden; - else if (min==0) - { - e=hmm_add(S->sc,H->T[S->st][E->st]); - if ( H->T[E->st][E->st]!=H->forbiden)e=hmm_add(e, (max-1)*H->T[E->st][E->st]); - if ( (H->S[E->st])->em!=H->forbiden) e=hmm_add(e, max *(H->S[E->st])->em ); - return e; - } - else if ( min>0 && max>1) - { - - fprintf ( stderr, "\nWarning: Disjoined Diagonals"); - DisplayMatState (S, "\n\tS"); - DisplayMatState (E, "\n\tE"); - - - cS=CopyMatState ( S,cS); - cE=CopyMatState ( E,cE); - cE->sc=H->forbiden; - viterbiD_hmm (A,ns,ls, H,CL,cS, cE, NULL); - sc2=cE->sc; - - return sc2; - } - else - { - S1=H->S[E->st]; - t=H->T[S->st][E->st]; - e=(S1->em==H->forbiden)?S1->em_func (A, A->pos, ns[0], ls[0],E->i-1, A->pos,ns[1], ls[1], E->j-1, CL):S1->em; - sc=hmm_add(S->sc,t); - sc=hmm_add(sc,e); - return sc; - } - return H->forbiden; -} - - -MatState* RviterbiD_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL,MatState *S, MatState *E, int **seg_list) -{ - MatState *MS, *ME; - MS=S; - ME=E; - - viterbiD_hmm (A,ns, ls,H, CL, S, E, seg_list); - - - if ( S->n==E)return S; - if ( E->sc==H->forbiden) - { - DisplayHmm (H); - fprintf ( stderr, "\nERROR: The Requested Model (Cf Above) Cannot Produce the Pair-Alignment\nYou must allow extra possible transitions\n[FATAL:%s]", PROGRAM); - myexit ( EXIT_FAILURE); - } - E=S->n; - - while (S!=ME) - { - int d1, d2, align; - d1=MinDeltaMatState(S,E); - d2=MaxDeltaMatState(S,E); - align=((d1==1 && d2==1) || ( d1==0))?0:1; - if (align)RviterbiD_hmm (A,ns, ls,H, CL,S,E, seg_list); - S=E; - E=S->n; - } - return MS; -} - -#define Dim(i,j) (i*LenJ+j) -MatState* viterbiD_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL, MatState *S,MatState *E, int **seg_list) -{ - int current, previous,row, prow; - double v; - int a,b,i,j,pi,pj, s, k; - int start_i, start_j, end_i, end_j, l1, l2; - HmmState *S1, *S2; - MatState *CC, *PCC,*tS, *tE, *mark=NULL; - int midpoint; - - int dj; - int dc; - int *jlist=NULL; - static int **main_jlist; - static MatState ***M; - static int *toclean; - int ntoclean; - static int LenJ, LenI; - int MaxDelta=50, DeltaI, DeltaJ; - int mode; - - DisplayMatState (S, "\n\tS"); - DisplayMatState (E, "\n\tE"); - - - if ( A==NULL) - { - free_arrayN((void **)main_jlist, 2);main_jlist=NULL; - - for ( a=0; a<2; a++)memset(M[a],0,LenJ*LenI*sizeof (MatState*)); - free_arrayN((void **)M, 3);M=NULL; - vfree (toclean); - ManageMatState ( FREE_ALL, NULL); - return NULL; - } - - - if ( MatStateAreIdentical ( S, E))return NULL; - l1=strlen (A->seq_al[ls[0][0]]);l2=strlen (A->seq_al[ls[1][0]]); - - midpoint=S->i+((E->i-S->i)/2); - DeltaI=E->i-S->i; - - - start_i=S->i;end_i=E->i;start_j=S->j;end_j=E->j; - current=0;previous=1; - - - if ( !M) - { - LenI=l2+1; - LenJ=H->nS; - M=declare_arrayN(3, sizeof ( MatState),2,LenI*LenJ,0); - toclean=vcalloc ( LenI*LenJ, sizeof (int)); - } - - if ( !main_jlist)main_jlist= seglist2table(seg_list, l1, l2); - - - /*MAKE THE VITERBI FROM S(tart) to E(nd)*/ - mark=ManageMatState ( MARK, mark); - mode=(!seg_list)?ALL:seg_list[0][0]; - - for (ntoclean=0,i=start_i; i<=end_i; i++) - { - row=current; - - if ( mode==ALL)jlist=main_jlist[0]; - else if ( mode==DIAGONALS)jlist=(i==0)?main_jlist[0]:main_jlist[1]; - else if ( mode==SEGMENTS) jlist=main_jlist[i+2]; - - - for ( dj=1; dj<=jlist[0]; dj++) - { - DeltaJ=((FABS(dj-i))end_j)continue; - for ( s=H->nS-1;s>=0; s--) - { - S1=H->S[s];pi=i-S1->DI;prow=S1->DI; - - if ( S1->DI && S1->DJ){pj=j-S1->DJ;} - else if ( !S1->DJ)pj=j; - else if ( dj>1)pj=jlist[dj-S1->DJ]+dc; - else pj=-1; - - if (!M[row][Dim(j,s)])toclean[ntoclean]=Dim(j,s); - - CC=M[row][Dim(j,s)]=CopyMatState(NULL, M[row][Dim(j,s)]); - CC->i=i; CC->j=j; CC->st=s;PCC=NULL; - - if (i==start_i && j==start_j && s==S->st){CC=CopyMatState(S,CC);} - else if ( i==end_i && j==end_j && E->st!=H->end && s!=E->st)CC->sc=H->forbiden; - else if ( pisc=H->forbiden;} - else - { - for (k=1; k<=H->fromM[S1->state][0]; k++) - { - S2=H->S[H->fromM[s][k]]; - - if ( S1->DI && S1->DJ)PCC=M[prow][Dim((j-S1->DJ),(S2->state))]; - else PCC=M[prow][Dim((jlist[dj-S1->DJ]+dc),(S2->state))]; - - if ( !PCC)PCC=NULL; - else if ( pi+pj!=0 && S2->state==H->start); - else if ( !(pi==l1 && pj==l2) && s==H->end); - else - { - v=lu_RviterbiD_hmm(A,ns, ls, H, CL,PCC,CC, NULL); - if ( v!=H->forbiden && (CC->sc==H->forbiden || v> CC->sc)){CC->sc=v; CC->pst=S2->state;CC->p=PCC;} - } - } - } - if (CC->sc==H->forbiden); - else if (i==midpoint || DeltaI<=MaxDelta||DeltaJ<=MaxDelta ||(i==start_i && j==start_j && s==S->st) ) - { - CC->m=(CC->p)?(CC->p)->m:NULL; - PCC=CopyMatState(CC,NULL); - PCC->m=CC->m;CC->m=PCC; - } - else CC->m=(CC->p)?(CC->p)->m:NULL; - } - } - prow=previous; - for ( dj=1; dj<=jlist[0] && i!=end_i; dj++) - { - dc=(mode==DIAGONALS && dj!=1)?i:0; - j=jlist[dj]+dc; - if ( jend_j)continue; - - for ( s=H->nS-1;s>=0; s--) - { - - CC=(M[prow][Dim(j,s)]);M[prow][Dim(j,s)]=M[row][Dim(j,s)];M[row][Dim(j,s)]=CC; - if (!M[row][Dim(j,s)])toclean[ntoclean++]=Dim(j,s); - if (M[prow][Dim(j,s)]) M[row ][Dim(j,s)]=CopyMatState ( M[prow][Dim(j,s)], M[row][Dim(j,s)]); - - } - } - - } - - mark=ManageMatState ( MARK,mark); - row=current; - - - if ( E->st==H->end || E->st==H->forbiden){E=CopyMatState ((M[row][Dim(end_j,E->st)]),E);} - - - - - PCC=CopyMatState (M[row][Dim(end_j,E->st)], NULL); - - if ( MatStateAreIdentical(PCC,PCC->m))PCC=PCC->m; - tS=tE=PCC; - while (PCC->m) - { - tS=CopyMatState (PCC->m,NULL); tS->n=PCC; PCC->p=tS;PCC=tS; - } - - if (tS==tE); - else - { - S->n=tS->n; (S->n)->p=S; - E->p=tE->p; (E->p)->n=E; - } - - ManageMatState ( FREE_MARK,mark); - - - for ( a=0; ap!=E){S->m=NULL;S=S->n;}/*Clean the memory of the rturned Cells*/ - return NULL; -} - -/*********************************************************************************/ -/* */ -/* */ -/* HMM Viterbi Diagonals GLOBAL/LOCAL */ -/* */ -/* */ -/*********************************************************************************/ - -int viterbiDGL_pair_wise (Alignment *A,int*ns, int **ls,Constraint_list *CL) -{ - int l1, l2; - int *path; - Hmm * H; - MatState *Start; - MatState *End; - int **seg_list; - int a, b, c; - int main_i; - int main_j; - - - A->pos=aln2pos_simple( A, -1, ns, ls); - - Start=ManageMatState ( DECLARE, NULL); - End=ManageMatState ( DECLARE, NULL); - H=define_simple_model (CL); - l1=strlen (A->seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - - main_i=MAX(1,(l2-l1)+1); - main_j=MAX(1,(l1-l2)+1); - - seg_list=declare_arrayN(2, sizeof (int), l1+l2+3, 3); - seg_list[0][0]=DIAGONALS; - - - c=1; - for ( b=1,a=l1; a>= 1; a--) - { - if (a<50 || (b==main_i && a==main_j)) - { - seg_list[c][0]=a; - seg_list[c][1]=b; - seg_list[c][2]=MIN((l1-a), (l2-b)); - c++; - } - } - - - for ( b=2,a=1; b<= l2; b++, c++) - { - if (b<50 || (b==main_i && a==main_j)) - { - seg_list[c][0]=a; - seg_list[c][1]=b; - seg_list[c][2]=MIN((l1-a), (l2-b)); - } - } - - - seg_list[c][0]=FORBIDEN; - - Start->i=0 ;Start->j=0 ; Start->st=H->start;Start->sc=0; - End->i =l1; End->j=l2; End ->st=H->end; - Start=RviterbiDGL_hmm (A, ns, ls, H, CL, Start,End,seg_list); - - - path=traceback (A, ns, ls, H, CL, Start,NULL, NULL); - - - - A=viterbipath2aln (A,ns,ls,path, H); - - viterbiD_hmm (NULL, ns, ls, H, CL, Start,End, seg_list); - free_Hmm (H); - free_int (A->pos, -1); - free_arrayN((void **)seg_list, 2); - - A->pos=NULL; - return A->score_aln; -} - - -double lu_RviterbiDGL_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL,MatState *S, MatState *E, int **seg_list) -{ - HmmState *S1; - double sc, sc2,e, t; - static MatState *cS=NULL, *cE=NULL; - double min, max; - max=MAX((E->i-S->i), (E->j-S->j)); - min=MIN((E->i-S->i), (E->j-S->j)); - - - - - if ( S==NULL || E==NULL || S->sc==H->forbiden) return H->forbiden; - else if ( S->st==H->start) return 0; - else if ( E->st==H->end) return S->sc; - else if (min==0) - { - e=hmm_add(S->sc,H->T[S->st][E->st]); - if ( H->T[E->st][E->st]!=H->forbiden)e=hmm_add(e, (max-1)*H->T[E->st][E->st]); - if ( (H->S[E->st])->em!=H->forbiden) e=hmm_add(e, max *(H->S[E->st])->em ); - return e; - } - else if ( min>0 && max>1) - { - - fprintf ( stderr, "\nWarning: Disjoined Diagonals"); - DisplayMatState (S, "\n\tS"); - DisplayMatState (E, "\n\tE"); - - - cS=CopyMatState ( S,cS); - cE=CopyMatState ( E,cE); - cE->sc=H->forbiden; - viterbiD_hmm (A,ns,ls, H,CL,cS, cE, NULL); - sc2=cE->sc; - - return sc2; - } - else - { - S1=H->S[E->st]; - t=H->T[S->st][E->st]; - e=(S1->em==H->forbiden)?S1->em_func (A, A->pos, ns[0], ls[0],E->i-1, A->pos,ns[1], ls[1], E->j-1, CL):S1->em; - sc=hmm_add(S->sc,t); - sc=hmm_add(sc,e); - return sc; - } - return H->forbiden; -} - - -MatState* RviterbiDGL_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL,MatState *S, MatState *E, int **seg_list) -{ - MatState *MS, *ME; - MS=S; - ME=E; - - viterbiDGL_hmm (A,ns, ls,H, CL, S, E, seg_list); - - - if ( S->n==E)return S; - if ( E->sc==H->forbiden) - { - DisplayHmm (H); - fprintf ( stderr, "\nERROR: The Requested Model (Cf Above) Cannot Produce the Pair-Alignment\nYou must allow extra possible transitions\n[FATAL:%s]", PROGRAM); - myexit ( EXIT_FAILURE); - } - E=S->n; - - while (S!=ME) - { - int d1, d2, align; - d1=MinDeltaMatState(S,E); - d2=MaxDeltaMatState(S,E); - align=((d1==1 && d2==1) || ( d1==0))?0:1; - if (align)RviterbiDGL_hmm (A,ns, ls,H, CL,S,E, seg_list); - S=E; - E=S->n; - } - return MS; -} - - -#define Dim(i,j) (i*LenJ+j) -MatState* viterbiDGL_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL, MatState *S,MatState *E, int **seg_list) -{ - int current, previous,row, prow; - double v; - int a,i,j,pi,pj, s, k; - int start_i, start_j, end_i, end_j, l1, l2; - HmmState *S1, *S2; - MatState *CC, *PCC,*tS, *tE,*bestE,*bestS, *mark=NULL; - int midpoint; - - int dj; - int dc; - int *jlist=NULL; - static int **main_jlist; - static MatState ***M; - static int *toclean; - int ntoclean; - static int LenJ, LenI; - int MaxDelta=50, DeltaI, DeltaJ; - int mode; - - - - DisplayMatState (S, "\n\tS"); - DisplayMatState (E, "\n\tE"); - - - if ( A==NULL) - { - free_arrayN((void **)main_jlist, 2);main_jlist=NULL; - - for ( a=0; a<2; a++)memset(M[a],0,LenJ*LenI*sizeof (MatState*)); - free_arrayN((void **)M, 3);M=NULL; - vfree (toclean); - ManageMatState ( FREE_ALL, NULL); - return NULL; - } - - - if ( MatStateAreIdentical ( S, E))return NULL; - l1=strlen (A->seq_al[ls[0][0]]);l2=strlen (A->seq_al[ls[1][0]]); - - midpoint=S->i+((E->i-S->i)/2); - DeltaI=E->i-S->i; - - - start_i=S->i;end_i=E->i;start_j=S->j;end_j=E->j; - current=0;previous=1; - - - if ( !M) - { - LenI=l2+1; - LenJ=H->nS; - M=declare_arrayN(3, sizeof ( MatState),2,LenI*LenJ,0); - toclean=vcalloc ( LenI*LenJ, sizeof (int)); - } - - if ( !main_jlist)main_jlist= seglist2table(seg_list, l1, l2); - - - /*MAKE THE VITERBI FROM S(tart) to E(nd)*/ - mark=ManageMatState ( MARK, mark); - mode=(!seg_list)?ALL:seg_list[0][0]; - bestE=CopyMatState (E, NULL); - bestS=CopyMatState (NULL, NULL); - for (ntoclean=0,i=start_i; i<=end_i; i++) - { - row=current; - - if ( mode==ALL)jlist=main_jlist[0]; - else if ( mode==DIAGONALS)jlist=(i==0)?main_jlist[0]:main_jlist[1]; - else if ( mode==SEGMENTS) jlist=main_jlist[i+2]; - - - for ( dj=1; dj<=jlist[0]; dj++) - { - DeltaJ=(FABS(dj-i)end_j)continue; - for ( s=H->nS-1;s>=0; s--) - { - if ( s==S->st)continue; - S1=H->S[s];pi=i-S1->DI;prow=S1->DI; - - if ( S1->DI && S1->DJ){pj=j-S1->DJ;} - else if ( !S1->DJ)pj=j; - else if ( dj>1)pj=jlist[dj-S1->DJ]+dc; - else pj=-1; - - if (!M[row][Dim(j,s)])toclean[ntoclean]=Dim(j,s); - - CC=M[row][Dim(j,s)]=CopyMatState(NULL, M[row][Dim(j,s)]); - CC->i=i; CC->j=j; CC->st=s;PCC=NULL; - - if (i==start_i && j==start_j && s==S->st){CC=CopyMatState(S,CC);} - else if ( s==S->st); - else if ( i==end_i && j==end_j && E->st!=H->end && s!=E->st)CC->sc=H->forbiden; - - else if ( pisc=H->forbiden;} - else - { - for (k=1; k<=H->fromM[S1->state][0]; k++) - { - S2=H->S[H->fromM[s][k]]; - - if ( S1->DI && S1->DJ)PCC=M[prow][Dim((j-S1->DJ),(S2->state))]; - else PCC=M[prow][Dim((jlist[dj-S1->DJ]+dc),(S2->state))]; - - if ( S2->state==H->start){PCC=bestS;PCC->st=0;PCC->sc=0;PCC->m=PCC->n=PCC->p=NULL;} - - v=lu_RviterbiDGL_hmm(A,ns, ls, H, CL,PCC,CC, NULL); - if ( v!=H->forbiden && (CC->sc==H->forbiden || v> CC->sc)){CC->sc=v; CC->pst=S2->state;CC->p=PCC;} - } - } - if ( CC->sc==H->forbiden); - else if ( bestE->sc==H->forbiden || bestE->sc>CC->sc) - { - bestE=CopyMatState(CC, bestE); - bestE->m=(CC->p)->m; - } - else if (CC->p && (CC->p)->st==H->start) - { - CC->m=CopyMatState (CC->p, NULL); - } - else if (i==midpoint || DeltaI<=MaxDelta||DeltaJ<=MaxDelta ||(i==start_i && j==start_j && s==S->st) ) - { - CC->m=(CC->p)?(CC->p)->m:NULL; - PCC=CopyMatState(CC,NULL); - PCC->m=CC->m;CC->m=PCC; - } - else CC->m=(CC->p)?(CC->p)->m:NULL; - } - } - prow=previous; - for ( dj=1; dj<=jlist[0] && i!=end_i; dj++) - { - dc=(mode==DIAGONALS && dj!=1)?i:0; - j=jlist[dj]+dc; - if ( jend_j)continue; - - for ( s=H->nS-1;s>=0; s--) - { - - CC=(M[prow][Dim(j,s)]);M[prow][Dim(j,s)]=M[row][Dim(j,s)];M[row][Dim(j,s)]=CC; - /*if (!M[row][Dim(j,s)])toclean[ntoclean++]=Dim(j,s);*/ - if (M[prow][Dim(j,s)]) M[row ][Dim(j,s)]=CopyMatState ( M[prow][Dim(j,s)], M[row][Dim(j,s)]); - - } - } - - } - - mark=ManageMatState ( MARK,mark); - row=current; - - - if ( E->st==H->end || E->st==H->forbiden){E=CopyMatState ((M[row][Dim(end_j,E->st)]),E);} - PCC=CopyMatState (bestE, NULL); - - if ( MatStateAreIdentical(PCC,PCC->m))PCC=PCC->m; - tS=tE=PCC; - while (PCC->m) - { - tS=CopyMatState (PCC->m,NULL); tS->n=PCC; PCC->p=tS;PCC=tS; - } - - if (tS==tE); - else - { - CopyMatState ( tS, S); - CopyMatState ( tE, E); - } - ManageMatState ( FREE_MARK,mark); - - for ( a=0; a<2; a++)memset(M[a],0,LenJ*LenI*sizeof (MatState*)); - - while (S && S->p!=E){S->m=NULL;S=S->n;}/*Clean the memory of the rturned Cells*/ - return NULL; -} - - -/*********************************************************************************/ -/* */ -/* */ -/* HMM Viterbi Diagonals PROCESSING */ -/* */ -/* */ -/*********************************************************************************/ -int **seglist2table ( int **seglist,int l1, int l2) - { - int **valuesT; - int *bvalues; - int line, a,si, sj, ei, j, c; - - /*All: 0*/ - valuesT=vcalloc ((l1+2)+3, sizeof (int*)); - valuesT[0]=vcalloc (l2+2, sizeof (int)); - for (a=0; a<=l2; a++)valuesT[0][++valuesT[0][0]]=a; - - if ( !seglist) return valuesT; - /*Diagonals: 1*/ - valuesT[1]=vcalloc (l1+l2+2, sizeof (int)); - bvalues=vcalloc (l1+l2+2, sizeof (int)); - c=1; - while (seglist[c][0]!=FORBIDEN) - { - - si=seglist[c][0]; - sj=seglist[c][1]; - - bvalues[(sj-si)+l1]=1; - c++; - } - valuesT[1][++valuesT[1][0]]=0; - for (a=0; a<=(l1+l2); a++) - { - if (bvalues[a]) - { - valuesT[1][++valuesT[1][0]]=a-l1; - } - - } - vfree (bvalues); - - /*Segments: 2*/ - valuesT[2]=vcalloc (l2+2, sizeof (int)); - for (a=0; a<=l2; a++)valuesT[2][++valuesT[2][0]]=a; - - bvalues=vcalloc (l2+2, sizeof (int)); - for ( line=1; line<=l1; line++) - { - bvalues[0]=c=0; - bvalues[++bvalues[0]]=0; - while (seglist[c][0]!=FORBIDEN) - { - si=seglist[c][0]; - ei=si+seglist[c][2]; - sj=seglist[c][1]; - j=sj+(line-si); - if ( lineei); - else if (j>=0 && j<=l2 && seglist[c][2]) - { - bvalues[++bvalues[0]]=j; - } - c++; - } - valuesT[line+2]=vcalloc (bvalues[0]+1, sizeof (int)); - for ( a=0; a<=bvalues[0]; a++) valuesT[line+2][a]=bvalues[a]; - } - vfree (bvalues); - return valuesT; - } - - - -/*********************************************************************************/ -/* */ -/* */ -/* HMM modeling */ -/* */ -/* */ -/*********************************************************************************/ - -Hmm* declare_hmm(int n) - { - Hmm *H; - int a, b; - - H=vcalloc (1, sizeof (Hmm)); - H->nS=n; - H->S=vcalloc (H->nS, sizeof (HmmState*)); - for (a=0; anS; a++) - { - H->S[a]=vcalloc (1, sizeof (HmmState)); - (H->S[a])->em2=vcalloc (MAX_EMISSION, sizeof (double)); - - (H->S[a])->T=vcalloc ( H->nS, sizeof (StateTrans*)); - for ( b=0; b< H->nS; b++) - (H->S[a])->T[b]=vcalloc (1, sizeof (StateTrans)); - } - return H; - } - -Hmm* free_Hmm(Hmm*H) - { - int a, b; - - H=vcalloc (1, sizeof (Hmm)); - free_double (H->T, -1); - free_int ( H->fromM, -1); - free_int ( H->toM, -1); - - for (a=0; a< H->nS; a++) - { - - for ( b=0; b< H->nS; b++) - { - vfree ((H->S[a])->em2); - vfree((H->S[a])->T[b]); - } - vfree((H->S[a])->T); - vfree(H->S[a]); - } - vfree (H->S); - vfree (H); - return NULL; - } - -void DisplayHmm ( Hmm *H) -{ - int a, b; - HmmState *S1, *S2; - - for ( a=0; a< H->nS; a++) - { - S1=H->S[a]; - fprintf ( stderr, "\nSTATE %d: %s\n",S1->state,S1->name); - fprintf ( stderr, "\n\tDI %d", S1->DI); - fprintf ( stderr, "\n\tDJ %d", S1->DJ); - fprintf ( stderr, "\n\tE %f", (float)S1->em); - - fprintf ( stderr, "\nReached FROM: "); - for ( b=1; b<=H->fromM[a][0]; b++) - { - S2=H->S[H->fromM[a][b]]; - fprintf ( stderr, "[ %s %f ] ", S2->name, H->T[S2->state][S1->state]); - } - fprintf ( stderr, "\nGoes TO: "); - for ( b=1; b<=H->toM[a][0]; b++) - { - S2=H->S[H->toM[a][b]]; - fprintf ( stderr, "[ %s %f ] ", S2->name, H->T[S1->state][S2->state]); - } - } - return; -} -Hmm * bound_hmm ( Hmm *H) -{ - int a, b, c; - char **name; - HmmState *S; - - name=declare_char(H->nS, 100); - H->T=declare_double ( H->nS, H->nS); - - for ( a=0; a< H->nS; a++) - { - sprintf ( name[a], "%s", (H->S[a])->name); - H->order=MAX(H->order, (H->S[a])->DI); - H->order=MAX(H->order, (H->S[a])->DJ); - } - - for ( a=0; a< H->nS; a++)for (b=0; bnS; b++)H->T[a][b]=H->forbiden; - for (a=0; a< H->nS; a++) - { - S=H->S[a]; - for ( b=0; b< S->nT; b++) - { - c=name_is_in_list ((S->T[b])->name, name, H->nS, 100); - if ( c!=-1)H->T[a][c]=(S->T[b])->tr; - } - } - - /*Bound the model: - bM[state][0]=n_allowed transitions - bM[state][1]=first allowed transition - */ - - H->toM=declare_int ( H->nS, H->nS); - H->fromM=declare_int ( H->nS, H->nS); - - for ( a=0; a< H->nS; a++) - for ( b=0; b< H->nS; b++) - { - if ( H->T[a][b]!=H->forbiden ) - { - {H->fromM[b][0]++; H->fromM[b][H->fromM[b][0]]=a;} - {H->toM[a][0]++; H->toM[a][H->toM[a][0]]=b;} - } - } - for ( a=0; a< H->nS; a++) - { - if (( H->S[a])->em!=H->forbiden)( H->S[a])->em*=SCORE_K; - for ( b=0; b< H->nS; b++) - if ( H->T[a][b]!=H->forbiden)H->T[a][b]*=SCORE_K; - } - free_arrayN((void**)name, 2); - return H; -} - -/*********************************************************************************/ -/* */ -/* */ -/* Memory Management */ -/* */ -/* */ -/*********************************************************************************/ - -MatState * ManageMatState(int Mode, MatState *C) -{ - static MatState *Fheap; - static MatState *Aheap; - MatState *Cmark, *Pmark; - static int alloc, free; - if (!Fheap || Fheap->Hp==NULL) - { - int c=0; - int extension=1000; - if (!Fheap){Fheap=vcalloc (1, sizeof (MatState));Fheap->free=1;free++;} - if (!Aheap)Aheap=vcalloc (1, sizeof (MatState)); - while ( c!=extension) - { - C=vcalloc ( 1, sizeof (MatState)); - C->free=1;Fheap->Hn=C;C->Hp=Fheap; - Fheap=C; - c++; - free++; - } - } - - if ( Mode==DECLARE) - { - - C=Fheap; - Fheap=Fheap->Hp; - C->Hn=C->Hp=NULL; - if ( Aheap){Aheap->Hn=C;C->Hp=Aheap;Aheap=C;} - else Aheap=C; - alloc++; - free--; - C->free=0; - C=CopyMatState(NULL, C); - return C; - } - else if ( Mode==FREE) - { - if ( !C || C->free==1); - else - { - C=CopyMatState(NULL, C); - C->free=1; - if (C->Hp==NULL && C==Aheap)crash (""); - if (C==Aheap)Aheap=C->Hp; - if (C->Hn){(C->Hn)->Hp=C->Hp;} - if (C->Hp){(C->Hp)->Hn=C->Hn;} - C->Hp=C->Hn=NULL; - Fheap->Hn=C;C->Hp=Fheap; - Fheap=C; - alloc--; - free++; - } - return NULL; - } - else if ( Mode==FREE_ALL) - { - while ( Aheap) - { - C=Aheap->Hp; - vfree (Aheap); - Aheap=C; - } - while ( Fheap) - { - C=Fheap->Hp; - vfree (Fheap); - Fheap=C; - } - } - else if ( Mode==INFO) - { - fprintf ( stderr, "\nAllocated: %d Free %d", alloc, free); - } - else if ( Mode==MARK) - { - - if (C==NULL); - else {C->Mn=Aheap;Aheap->Mp=C;} - - return Aheap; - } - else if ( Mode==UNMARK) - { - Pmark=Cmark=NULL; - } - else if ( Mode == FREE_MARK) - { - Cmark=C; - Pmark=C->Mp; - - if ( Cmark==Pmark)return NULL; - else if ( Cmark==Aheap) - {Aheap=Pmark;C=Pmark->Hn;Pmark->Hn=NULL;} - else - { - (Cmark->Hn)->Hp=Pmark; - C=Pmark->Hn; - Pmark->Hn=Cmark->Hn; - } - - Fheap->Hn=C; - C->Hp=Fheap; - Fheap=Cmark; - Fheap->Hn=NULL; - - C=Fheap; - while (C && !C->free) - { - free++;alloc--; - C->free=1; - C=C->Hp; - } - - } - return NULL; -} - - -MatState* CopyMatState ( MatState*I, MatState*O) -{ - if (O==NULL || O->free==1) O=ManageMatState(DECLARE, NULL); - if (I==NULL || I->free==1)I=NULL; - O->i =(I)?I->i:0; - O->j =(I)?I->j:0; - O->st =(I)?I->st:FORBIDEN; - O->pst=(I)?I->pst:FORBIDEN; - O->sc =(I)?I->sc:FORBIDEN; - O->n =(I)?I->n:NULL; - O->p =(I)?I->p:NULL; - O->m =(I)?I->m:NULL; - O->s =(I)?I->m:NULL; - - return O; -} - -/*********************************************************************************/ -/* */ -/* */ -/* Comparisons */ -/* */ -/* */ -/*********************************************************************************/ -int MaxDeltaMatState (MatState*S, MatState*E) -{ - if ( !S || !E) return -1; - else return MAX((E->i-S->i),(E->j-S->j)); -} -int MinDeltaMatState (MatState*S, MatState*E) -{ - if ( !S || !E) return -1; - else return MIN((E->i-S->i),(E->j-S->j)); -} -int MatStateAreIdentical (MatState*I, MatState*O) -{ - if ( !I || !O)return 0; - - if ( I->i!=O->i)return 0; - if ( I->j!=O->j)return 0; - if ( I->st!=O->st)return 0; - return 1; -} - - - -Hmm* define_probcons_model(Constraint_list *CL) -{ - Hmm *H; - double gop=-10; - double gep=-1; - double lgop=-100; - double lgep=-100; - double freeT=0; - int n=0; - HmmState *S; - - - H=declare_hmm(7); - H->freeT=freeT=0; - - H->forbiden=FORBIDEN; - H->start=START_STATE; - H->end=END_STATE; - - /*define START*/ - S=H->S[n]; - sprintf (S->name, "START"); S->state=n; - - S->DI=0; - S->DJ=0; - S->em=freeT; - - sprintf ( (S->T[S->nT])->name, "MATCH") ;(S->T[S->nT])->tr=freeT ;S->nT++; - sprintf ( (S->T[S->nT])->name, "INSERT");(S->T[S->nT])->tr=freeT ;S->nT++; - sprintf ( (S->T[S->nT])->name, "DELETE");(S->T[S->nT])->tr=freeT ;S->nT++; - sprintf ( (S->T[S->nT])->name, "END") ;(S->T[S->nT])->tr=freeT ;S->nT++; - - n++; - /*define END*/ - S=H->S[n]; - sprintf (S->name, "END"); S->state=n; - S->DI=0; - S->DJ=0; - S->em=freeT; - n++; - - /*define Match*/ - S=H->S[n]; - sprintf (S->name, "MATCH"); S->state=n; - S->DI=1; - S->DJ=1; - S->em=H->forbiden; - S->em_func=CL->get_dp_cost; - - sprintf ( (S->T[S->nT])->name, "MATCH") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "INSERT");(S->T[S->nT])->tr=gop ;S->nT++; - sprintf ( (S->T[S->nT])->name, "DELETE");(S->T[S->nT])->tr=gop ;S->nT++; - sprintf ( (S->T[S->nT])->name, "END"); (S->T[S->nT])->tr=freeT;S->nT++; - - n++; - - /*define Insert*/ - S=H->S[n]; - sprintf (S->name, "INSERT"); S->state=n; - S->DI=1; - S->DJ=0; - S->em=gep; - sprintf ( (S->T[S->nT])->name, "MATCH") ; (S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "INSERT"); (S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "LINSERT");(S->T[S->nT])->tr=lgop ;S->nT++; - sprintf ( (S->T[S->nT])->name, "END"); (S->T[S->nT])->tr=-gop;S->nT++; - - n++; - - /*define LInsert*/ - S=H->S[n]; - sprintf (S->name, "LINSERT"); S->state=n; - S->DI=1; - S->DJ=0; - S->em=lgep; - - sprintf ( (S->T[S->nT])->name, "INSERT") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "LINSERT");(S->T[S->nT])->tr=freeT;S->nT++; - - n++; - - - /*define Delete*/ - S=H->S[n]; - sprintf (S->name, "DELETE"); S->state=n; - S->DI=0; - S->DJ=1; - S->em=gep; - - sprintf ( (S->T[S->nT])->name, "MATCH") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "DELETE") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "LDELETE") ;(S->T[S->nT])->tr=lgop ;S->nT++; - sprintf ( (S->T[S->nT])->name, "END") ;(S->T[S->nT])->tr=-gop;S->nT++; - - n++; - - /*define LDelete*/ - S=H->S[n]; - sprintf (S->name, "LDELETE"); S->state=n; - S->DI=0; - S->DJ=1; - S->em=lgep; - sprintf ( (S->T[S->nT])->name, "DELETE") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "LDELETE");(S->T[S->nT])->tr=freeT;S->nT++; - - n++; - - - H=bound_hmm ( H); - return H; -} - -Hmm* define_mnm_model(Constraint_list *CL) -{ - Hmm *H; - double gop=20; - - - - double freeT=0; - int n=0; - HmmState *S; - - - H=declare_hmm(6); - H->freeT=freeT=0; - - H->forbiden=FORBIDEN; - H->start=START_STATE; - H->end=END_STATE; - - /*define START*/ - S=H->S[n]; - sprintf (S->name, "START"); S->state=n; - - S->DI=0; - S->DJ=0; - S->em=freeT; - - sprintf ( (S->T[S->nT])->name, "MATCH") ;(S->T[S->nT])->tr=freeT ;S->nT++; - sprintf ( (S->T[S->nT])->name, "INSERT");(S->T[S->nT])->tr=freeT ;S->nT++; - sprintf ( (S->T[S->nT])->name, "DELETE");(S->T[S->nT])->tr=freeT ;S->nT++; - sprintf ( (S->T[S->nT])->name, "NOMATCH");(S->T[S->nT])->tr=freeT ;S->nT++; - sprintf ( (S->T[S->nT])->name, "END") ;(S->T[S->nT])->tr=freeT ;S->nT++; - - n++; - /*define END*/ - S=H->S[n]; - sprintf (S->name, "END"); S->state=n; - S->DI=0; - S->DJ=0; - S->em=freeT; - n++; - - /*define Match*/ - S=H->S[n]; - sprintf (S->name, "MATCH"); S->state=n; - S->DI=1; - S->DJ=1; - S->em=H->forbiden; - S->em_func=CL->get_dp_cost; - - sprintf ( (S->T[S->nT])->name, "MATCH") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "NOMATCH");(S->T[S->nT])->tr=gop ;S->nT++; - sprintf ( (S->T[S->nT])->name, "END"); (S->T[S->nT])->tr=freeT;S->nT++; - - n++; - - /*define NOMatch*/ - S=H->S[n]; - sprintf (S->name, "NOMATCH"); S->state=n; - S->DI=1; - S->DJ=1; - S->em=freeT; - S->em_func=NULL; - - sprintf ( (S->T[S->nT])->name, "NOMATCH") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "MATCH") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "INSERT");(S->T[S->nT])->tr=freeT ;S->nT++; - sprintf ( (S->T[S->nT])->name, "DELETE");(S->T[S->nT])->tr=freeT ;S->nT++; - sprintf ( (S->T[S->nT])->name, "END"); (S->T[S->nT])->tr=freeT;S->nT++; - - n++; - /*define Insert*/ - S=H->S[n]; - sprintf (S->name, "INSERT"); S->state=n; - S->DI=1; - S->DJ=0; - S->em=freeT; - sprintf ( (S->T[S->nT])->name, "NOMATCH") ; (S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "INSERT") ; (S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "END"); (S->T[S->nT])->tr=freeT;S->nT++; - - n++; - - /*define Delete*/ - S=H->S[n]; - sprintf (S->name, "DELETE"); S->state=n; - S->DI=0; - S->DJ=1; - S->em=freeT; - - sprintf ( (S->T[S->nT])->name, "NOMATCH") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "DELETE") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "END") ;(S->T[S->nT])->tr=freeT;S->nT++; - - n++; - - - H=bound_hmm ( H); - return H; -} - -Hmm* define_simple_model(Constraint_list *CL) -{ - Hmm *H; - double gop=-10; - double gep=-1; - double freeT=0; - int n=0; - HmmState *S; - - - H=declare_hmm(5); - H->freeT=freeT=0; - - H->forbiden=FORBIDEN; - H->start=START_STATE; - H->end=END_STATE; - - /*define START*/ - S=H->S[n]; - sprintf (S->name, "START"); S->state=n; - - S->DI=0; - S->DJ=0; - S->em=freeT; - - sprintf ( (S->T[S->nT])->name, "MATCH") ;(S->T[S->nT])->tr=freeT ;S->nT++; - sprintf ( (S->T[S->nT])->name, "INSERT");(S->T[S->nT])->tr=freeT ;S->nT++; - sprintf ( (S->T[S->nT])->name, "DELETE");(S->T[S->nT])->tr=freeT ;S->nT++; - sprintf ( (S->T[S->nT])->name, "END") ;(S->T[S->nT])->tr=freeT ;S->nT++; - - n++; - /*define END*/ - S=H->S[n]; - sprintf (S->name, "END"); S->state=n; - S->DI=0; - S->DJ=0; - S->em=freeT; - n++; - - /*define Match*/ - S=H->S[n]; - sprintf (S->name, "MATCH"); S->state=n; - S->DI=1; - S->DJ=1; - S->em=H->forbiden; - S->em_func=CL->get_dp_cost; - - sprintf ( (S->T[S->nT])->name, "MATCH") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "INSERT");(S->T[S->nT])->tr=gop ;S->nT++; - sprintf ( (S->T[S->nT])->name, "DELETE");(S->T[S->nT])->tr=gop ;S->nT++; - sprintf ( (S->T[S->nT])->name, "END"); (S->T[S->nT])->tr=freeT;S->nT++; - - n++; - - /*define Insert*/ - S=H->S[n]; - sprintf (S->name, "INSERT"); S->state=n; - S->DI=1; - S->DJ=0; - S->em=gep; - sprintf ( (S->T[S->nT])->name, "MATCH") ; (S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "INSERT"); (S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "DELETE"); (S->T[S->nT])->tr=freeT;S->nT++; - - sprintf ( (S->T[S->nT])->name, "END"); (S->T[S->nT])->tr=-gop;S->nT++; - - n++; - - - /*define Delete*/ - S=H->S[n]; - sprintf (S->name, "DELETE"); S->state=n; - S->DI=0; - S->DJ=1; - S->em=gep; - - sprintf ( (S->T[S->nT])->name, "MATCH") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "DELETE") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "INSERT"); (S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "END") ;(S->T[S->nT])->tr=-gop;S->nT++; - - n++; - - H=bound_hmm ( H); - return H; -} - -Hmm* define_two_mat_model(Constraint_list *CL) -{ - Hmm *H; - double gop=-15; - double gep=-2; - double lgop=-6; - double lgep=-1; - double freeT=0; - int n=0; - HmmState *S; - - - H=declare_hmm(8); - H->freeT=freeT=0; - - H->forbiden=FORBIDEN; - H->start=START_STATE; - H->end=END_STATE; - - /*define START*/ - S=H->S[n]; - sprintf (S->name, "START"); S->state=n; - - S->DI=0; - S->DJ=0; - S->em=freeT; - - sprintf ( (S->T[S->nT])->name, "MATCH1") ;(S->T[S->nT])->tr=freeT ;S->nT++; - sprintf ( (S->T[S->nT])->name, "MATCH2") ;(S->T[S->nT])->tr=freeT ;S->nT++; - - sprintf ( (S->T[S->nT])->name, "INSERT");(S->T[S->nT])->tr=freeT ;S->nT++; - sprintf ( (S->T[S->nT])->name, "DELETE");(S->T[S->nT])->tr=freeT ;S->nT++; - sprintf ( (S->T[S->nT])->name, "END") ;(S->T[S->nT])->tr=freeT ;S->nT++; - - n++; - /*define END*/ - S=H->S[n]; - sprintf (S->name, "END"); S->state=n; - S->DI=0; - S->DJ=0; - S->em=freeT; - n++; - - /*define Match*/ - S=H->S[n]; - sprintf (S->name, "MATCH1"); S->state=n; - S->DI=1; - S->DJ=1; - S->em=H->forbiden; - S->em_func=get_dp_cost_pam_matrix; - - sprintf ( (S->T[S->nT])->name, "MATCH1") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "INSERT");(S->T[S->nT])->tr=gop ;S->nT++; - sprintf ( (S->T[S->nT])->name, "DELETE");(S->T[S->nT])->tr=gop ;S->nT++; - sprintf ( (S->T[S->nT])->name, "END"); (S->T[S->nT])->tr=freeT;S->nT++; - - n++; - - /*define Match*/ - S=H->S[n]; - sprintf (S->name, "MATCH2"); S->state=n; - S->DI=1; - S->DJ=1; - S->em=H->forbiden; - S->em_func=get_dp_cost_blosum_matrix; - - sprintf ( (S->T[S->nT])->name, "MATCH2") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "INSERT");(S->T[S->nT])->tr=gop ;S->nT++; - sprintf ( (S->T[S->nT])->name, "DELETE");(S->T[S->nT])->tr=gop ;S->nT++; - sprintf ( (S->T[S->nT])->name, "END"); (S->T[S->nT])->tr=freeT;S->nT++; - - n++; - - /*define Insert*/ - S=H->S[n]; - sprintf (S->name, "INSERT"); S->state=n; - S->DI=1; - S->DJ=0; - S->em=gep; - sprintf ( (S->T[S->nT])->name, "MATCH2") ; (S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "MATCH1") ; (S->T[S->nT])->tr=freeT;S->nT++; - - sprintf ( (S->T[S->nT])->name, "INSERT"); (S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "LINSERT");(S->T[S->nT])->tr=lgop ;S->nT++; - sprintf ( (S->T[S->nT])->name, "END"); (S->T[S->nT])->tr=-gop;S->nT++; - - n++; - - /*define LInsert*/ - S=H->S[n]; - sprintf (S->name, "LINSERT"); S->state=n; - S->DI=1; - S->DJ=0; - S->em=lgep; - - sprintf ( (S->T[S->nT])->name, "INSERT") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "LINSERT");(S->T[S->nT])->tr=freeT;S->nT++; - - n++; - - - /*define Delete*/ - S=H->S[n]; - sprintf (S->name, "DELETE"); S->state=n; - S->DI=0; - S->DJ=1; - S->em=gep; - - sprintf ( (S->T[S->nT])->name, "MATCH2") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "MATCH1") ;(S->T[S->nT])->tr=freeT;S->nT++; - - sprintf ( (S->T[S->nT])->name, "DELETE") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "LDELETE") ;(S->T[S->nT])->tr=lgop ;S->nT++; - sprintf ( (S->T[S->nT])->name, "END") ;(S->T[S->nT])->tr=-gop;S->nT++; - n++; - - /*define LDelete*/ - S=H->S[n]; - sprintf (S->name, "LDELETE"); S->state=n; - S->DI=0; - S->DJ=1; - S->em=lgep; - sprintf ( (S->T[S->nT])->name, "DELETE") ;(S->T[S->nT])->tr=freeT;S->nT++; - sprintf ( (S->T[S->nT])->name, "LDELETE");(S->T[S->nT])->tr=freeT;S->nT++; - n++; - - if ( n!=H->nS) - { - fprintf ( stderr, "\nERROR in HMM definition [FATAL:%s]", PROGRAM); - myexit (EXIT_FAILURE); - } - - H=bound_hmm ( H); - return H; -} -void DisplayMatState ( MatState *S, char *s) -{ - if ( S==NULL)fprintf ( stderr, "%s: Cell is undefined", s); - else fprintf ( stderr, "%s: i=%d j=%d st=%d pst=%d sc=%d Free %d", s, S->i, S->j, S->st, S->pst, (int)S->sc, S->free); -} -void testfunc ( MatState *S, char *s) -{ - if ( S==NULL)return; - fprintf ( stderr, "\n#### %s ", s); - while ( S){DisplayMatState ( S,"\n\t");S=S->n;} - fprintf ( stderr, "\n"); -} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#ifdef BACKHERE - - if ( i>0 && j>0) - m=emit_pair_default[alphabetDefault[seq1[a]]][alphabetDefault[seq2[a]]]; - /*Match*/ - F[M][i][j]=F[M][i-1][j-1]; - - - - M[Match][i][j]=m+log_add3( M[Match][i-step_i][j-step_j],M[I][i-step_i][j],M[D][i][j-step_j]); - M[D ][i][j]=log_add3(gep,M[Match][i ][j-step_j]+gop,M[D][i ][j-step_j]); - M[I ][i][j]=log_add3(gep,M[Match][i-step_i][j ]+gop,M[I][i-step_i][j ]); - - /*Long gaps - M[Match][i][j]=log_add3(M[Match][i][j], M[LI][i-step_i][j],M[LD][i][j-step_j]); - M[LI ][i][j]=log_add3(lgep, M[I][i-step_i][j ]+lgop,M[LI][i-step_i][j ]); - M[LD ][i][j]=log_add3(lgep, M[D][i ][j-step_j]+lgop,M[LD][i ][j-step_j]); - */ - - } - } - retun M; -MatState* RviterbiL_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL,MatState *S, MatState *E) -{ - MatState *Mid; - Mid=viterbiL_hmm (A,ns, ls,H, CL, S, E); - - if (!Mid) - { - return S; - } - else if ( Mid->n) - { - return Mid; - } - - else - { - Mid->p=S;S->n=Mid; - Mid->n=E;E->p=Mid; - RviterbiL_hmm (A,ns, ls,H, CL,S, Mid); - RviterbiL_hmm (A,ns, ls,H, CL,Mid, E); - return S; - } -} - -MatState* viterbiL_hmm (Alignment *A,int *ns, int **ls, Hmm *H, Constraint_list *CL, MatState *S,MatState *E) -{ - int current,memory, dim; - double e, v,t; - int i,j,pi,pj, s, k; - int start_i, start_j, end_i, end_j, l1, l2; - HmmState *S1, *S2; - static MatState ****M; - static int maxl; - MatState *Mid=NULL; - - MatState *CC, *PCC; - int midpoint; - int Delta; - - - if ( MatStateAreIdentical ( S, E))return NULL; - - l1=strlen (A->seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - - midpoint=S->i+(E->i-S->i)/2; - Delta=E->i-S->i; - - start_i=S->i;end_i=E->i; - start_j=S->j;end_j=E->j; - - dim=H->order+2;current=0;memory=H->order+1; - if (!M || (l2+1)>maxl) - { free_arrayN((void **)M, 4); - M=declare_arrayN(4, sizeof ( MatState), dim, maxl=(l2+1), H->nS,1); - } - - /*MAKE THE VITERBI FROM S(tart) to E(nd)*/ - for ( i=start_i; i<=end_i; i++) - { - M= (MatState****)recycle ( (void **)M,H->order+1,1); - for ( j=start_j; j<=end_j; j++) - { - for ( s=H->nS-1;s>=0; s--) - { - - S1=H->S[s]; - pi=i-S1->DI;pj=j-S1->DJ; - CC=M[current][j][s]; - CC->i=i; CC->j=j; CC->st=s;CC->sc=H->forbiden;CC->p=CC->n=CC->m=NULL;CC->sc=H->forbiden; - if (i==start_i && j==start_j && s==S->st) {CopyMatState(S,CC);} - else if ( i==end_i && j==end_j && s==E->st && s!=H->end) - { - S2=H->S[E->pst]; - CopyMatState(E,CC); - CC->p=M[S1->DI][j-S1->DJ][S2->state]; - } - else if ( pisc=H->forbiden;} - else - { - for (k=1; k<=H->fromM[S1->state][0]; k++) - { - S2=H->S[H->fromM[s][k]]; - PCC=M[S1->DI][j-S1->DJ][S2->state]; - - if ( pi+pj!=0 && S2->state==H->start) {t=H->forbiden;} - else if ( !(pi==l1 && pj==l2) && s==H->end){t=H->forbiden;} - else t=H->T[S2->state][S1->state]; - - v=hmm_add(t,PCC->sc); - if ( v!=H->forbiden && (CC->sc==H->forbiden || v> CC->sc)){CC->sc=v; CC->pst=S2->state;CC->p=PCC;} - } - - e=(S1->em==H->forbiden)?S1->em_func (A, A->pos, ns[0], ls[0],i-1, A->pos,ns[1], ls[1], j-1, CL):S1->em; - CC->sc=hmm_add(CC->sc,e); - } - - if (i==midpoint)CC->m=CopyMatState(CC, M[memory][j][s]); - else if (i>midpoint && CC->sc!=H->forbiden) CC->m=(M[S1->DI][j-S1->DJ][CC->pst])->m; - } - } - } - - if ( E->st==H->end)CopyMatState ((M[current][end_j][E->st]),E); - - if ( Delta>1) - { - Mid=CopyMatState ((M[current][end_j][E->st])->m,NULL); - } - else if ( Delta==1) - { - CC=M[current][E->j][E->st]; - Mid=E; - while (!MatStateAreIdentical (CC->p, S) ) - { - Mid->p=CopyMatState(CC->p,NULL); - (Mid->p)->n=Mid; - Mid=Mid->p;CC=CC->p; - } - Mid->p=S; - S->n=Mid; - Mid=S; - } - - return Mid; -} -#endif -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centre National de la Recherche Scientifique (CNRS) */ -/*and */ -/*Please Cite: Notredame*/ -/*Mon May 17 20:15:35 MDT 2004. */ -/*All rights reserved.*/ -/*NOTICE: |*/ -/* This file is an integral part of the */ -/* ALIGN_TWO_SEQ Software. */ -/* Its content is protected and all */ -/* the conditions mentioned in the licensing */ -/* agreement of the software apply to this file.*/ -/*............................................... |*/ -/* If you need some more information, or if you */ -/* wish to obtain a full license, please contact: */ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/header_list b/binaries/src/tcoffee/t_coffee_source/header_list deleted file mode 100644 index eabfac3..0000000 --- a/binaries/src/tcoffee/t_coffee_source/header_list +++ /dev/null @@ -1,13 +0,0 @@ -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/define_header.h -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/dev1_lib_header.h -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/dev2_lib_header.h -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/dev3_lib_header.h -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/dev4_lib_header.h -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/dp_lib_header.h -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/fast_tree_header.h -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/fastal_lib_header.h -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/io_lib_header.h -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/matrices.h -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/perl_header_lib.h -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/t_coffee.h -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_lib_header.h diff --git a/binaries/src/tcoffee/t_coffee_source/hsearch.c b/binaries/src/tcoffee/t_coffee_source/hsearch.c deleted file mode 100644 index 72dd76f..0000000 --- a/binaries/src/tcoffee/t_coffee_source/hsearch.c +++ /dev/null @@ -1,290 +0,0 @@ - -#include -#include -#include -#include - - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" - -HaschT * hcreate ( int n_elements,struct Hasch_data * declare_data(struct Hasch_entry *), struct Hasch_data *free_data(struct Hasch_data *) ) - { - HaschT *T; - int a; - - n_elements=n_elements*2+1; - - T=vcalloc ( 1, sizeof (HaschT)); - T->ne=n_elements; - T->p=vcalloc (n_elements,sizeof ( Hasch_entry*)); - for ( a=0; ap[a]=allocate_hasch_entry(NULL,DECLARE,declare_data, free_data); - } - return T; - } -HaschT *hdestroy (HaschT *T,struct Hasch_data * declare_data(struct Hasch_entry *), struct Hasch_data *free_data(struct Hasch_data *) ) - - - { - int a; - Hasch_entry *p, *pp; - - if ( T==NULL)return NULL; - - for (a=0; a< T->ne; a++) - { - p=T->p[a]; - while (p) - { - pp=p; - p=p->n; - allocate_hasch_entry(pp,FREE, declare_data, free_data); - } - } - vfree (T->p); - vfree ( T); - return NULL; - } - - -Hasch_entry* hsearch (HaschT *T, int k, int action, struct Hasch_data * declare_data(struct Hasch_entry *), struct Hasch_data *free_data(struct Hasch_data *) ) - - - { - /*action: FIND,ADD, REMOVE*/ - Hasch_entry *p, *pi; - int h; - - - - /* find the key: k->h*/ - - h=k%T->ne; - - - if ( action==ADD || action==FIND) - { - p=pi=T->p[h]; - while (p && p->k!=k){p=p->n;} - if (action==ADD && !p) - { - p=insert_hasch_entry_in_list (pi, NULL, NULL, declare_data, free_data); - p->k=k; - } - else if (action==FIND && !p)p=NULL; - return p; - } - else if ( action==REMOVE) - { - allocate_hasch_entry(hsearch ( T, k, FIND, declare_data, free_data), FREE, declare_data, free_data); - return NULL; - } - return NULL; - } - - -Hasch_entry * extract_hasch_entry_from_list (Hasch_entry *e, struct Hasch_data * declare_data(struct Hasch_entry *), struct Hasch_data *free_data(struct Hasch_data *) ) - - - { - /*extracts entry e and returns p, or next if is NULL*/ - Hasch_entry *p=NULL, *n=NULL; - - if (!e); - else - { - p=e->p; - n=e->n; - - if (p)p->n=n; - if (n)n->p=p; - e->p=e->n=NULL; - } - return e; - } - -Hasch_entry * insert_hasch_entry_in_list (Hasch_entry *p, Hasch_entry *e, Hasch_entry *n, struct Hasch_data * declare_data(struct Hasch_entry *), struct Hasch_data *free_data(struct Hasch_data *) ) - - -{ - /*inserts entry e between entry p and entry n and returns e*/ - - if (!e)e=allocate_hasch_entry (NULL,DECLARE, declare_data, free_data); - - - - if (!p && !n); - else if ( !p)p=n->p; - else if ( !n)n=p->n; - - e->p=p; - if (p)p->n=e; - - e->n=n; - if (n)n->p=e; - - return e; - } - -Hasch_entry * allocate_hasch_entry (Hasch_entry *e, int action,struct Hasch_data * declare_data(struct Hasch_entry *), struct Hasch_data *free_data(struct Hasch_data *) ) - - -{ - static Hasch_entry *s; - Hasch_entry *ns; - - if ( !s)s=vcalloc ( 1, sizeof (Hasch_entry)); - - if ( action==DECLARE) - { - ns=s->p; - e=extract_hasch_entry_from_list (s, declare_data, free_data); - if ( e->free_data)(e->free_data)(e->data); - e->declare_data=declare_data; - e->free_data=free_data; - e->declare_data (e); - e->k=UNDEFINED; - s=ns; - } - else if ( action==FREE) - { - extract_hasch_entry_from_list (e,declare_data, free_data ); - e->k=UNDEFINED; - if ( e->free_data)e->data=(e->free_data)(e->data); - e->free_data=NULL; - e->declare_data=NULL; - s=insert_hasch_entry_in_list (s, e, NULL, declare_data, free_data); - - } - else if ( action==FREE_STACK) - { - while (s) - { - e=s->p; - allocate_hasch_entry (s, FREE, declare_data,free_data); - vfree (s); - s=e; - } - } - else crash ("Unknown MODE for allocate_hasch_entry\n"); - return e; - } - -/*********************************************************************/ -/* */ -/* Get string key */ -/* */ -/* */ -/*********************************************************************/ - - -int string2key (char *s, Char_node *n) -{ - static Char_node *root; - - if ( !root)root=declare_char_node (DECLARE); - - if ( n==NULL && s==NULL) - { - declare_char_node (FREE_STACK); - } - else if (n==NULL) - { - return string2key(s, root); - } - else if ( s[0]=='\0') - { - return n->key; - } - else - { - return string2key(s+1, (n->c[(int)s[0]])?(n->c[(int)s[0]]):(n->c[(int)s[0]]=declare_char_node (DECLARE))); - } - return 0; -} - -Char_node * declare_char_node (int action) -{ -static struct Char_node **heap; -static int heap_size, free_heap, a; -static int key; - if ( action==DECLARE) - { - if ( free_heap==0) - { - free_heap=100; - - heap=vrealloc (heap,(heap_size+free_heap)*sizeof (struct Char_node *)); - for ( a=heap_size; ac=vcalloc ( 256, sizeof (Char_node*)); - (heap[a])->key=key++; - } - heap_size+=free_heap; - } - return heap[heap_size-(free_heap--)]; - } - else if ( action==FREE_STACK) - { - for (a=0; a< heap_size; a++) - { - heap[a]->key=key++; - vfree ( heap[a]->c); - (heap[a])->c=vcalloc ( 256, sizeof (Char_node*)); - } - free_heap=heap_size; - return NULL; - } - return NULL; -} - -/* old declare_char_node (too hungry) -Char_node * declare_char_node (int action) -{ - static int key; - Char_node *cn; - static Char_node *root; - - if ( action==DECLARE) - { - cn=vcalloc (1, sizeof (Char_node)); - cn->key=++key; - cn->c=vcalloc (256, sizeof (Char_node *)); - - } - return cn; -} -*/ -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/io_func.c b/binaries/src/tcoffee/t_coffee_source/io_func.c deleted file mode 100644 index 44ff9e4..0000000 --- a/binaries/src/tcoffee/t_coffee_source/io_func.c +++ /dev/null @@ -1,1492 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "matrices.h" - -#define DEFAULT_COLOR -1 -#define GAP_COLOR -2 -#define INK_COLOR -3 - -Sequence * cw_read_sequences ( char *seq_name) - { - Sequence *S; - - - char **seq=NULL; - char **name=NULL; - int *len=NULL; - int nseq=0; - int min_len=0; - int max_len=0; - int a; - - get_sequence ( seq_name, &nseq, &seq, &name, &len, &min_len, &max_len); - - S=declare_sequence ( min_len, max_len, nseq); - for ( a=0; a< nseq; a++)sprintf ( S->file[a], "%s", seq_name); - - for ( a=0; anseq; a++) - { - S->len[a]=len[a]; - sprintf ( S->name[a],"%s", name[a]); - vfree ( name[a]); - sprintf ( S->seq[a], "%s", seq[a]); - vfree ( seq[a]); - } - vfree (seq); - vfree (name); - vfree (len); - S=get_sequence_type ( S); - return S; - } -char * get_string_type (char *S) - { - int a, l; - int protein=0, dna=0,rna=0, tot=0; - char *type; - static char *ltype; - static int warning; - - if ( !ltype) - declare_name(ltype); - - declare_name(type); - l=(int)strlen (S); - - if (l==0) - { - sprintf ( type, "UNKNOWN"); - return type; - } - - for ( a=0; a98 && rna>10)sprintf ( type, "RNA"); - else if ( dna>98)sprintf ( type, "DNA"); - else sprintf ( type, "PROTEIN"); - - sprintf ( ltype, "%s", type); - return type; - } - -Alignment* get_aln_type (Alignment *A) - { - if ( !A) return A; - - if ( A->S && !(A->S)->type)(A->S)->type=vcalloc (30, sizeof (char)); - - if ( A->S && (A->S)->type[0]!='\0') - { - ; - } - else if (A->S!=NULL && (A->S)->type[0]=='\0') - { - A->S=get_sequence_type (A->S); - } - else if (A->S==NULL) - { - A->S=aln2seq (A); - A->S=get_sequence_type(A->S); - } - return A; - } - - - -char *unset_mode () -{ - return set_mode (UNSET, NULL); -} -char *store_mode (char *val) -{ - return set_mode (SET, val); -} -char *retrieve_mode () -{ - return set_mode (GET,NULL); -} -char *set_mode (int mode, char *val) -{ - static char type[100]; - if (mode==SET) - { - if (!val)printf_exit (EXIT_FAILURE, stderr, "Error: programme mode unset in io_func.c:set_seq_type"); - sprintf ( type,"%s", val); - } - else if ( mode==GET) - { - ; - } - else if ( mode==UNSET) - { - type[0]='\0'; - } - else - { - printf_exit (EXIT_FAILURE, stderr, "Error: unknown mode in function io_func.c:set_seq_type, use SET, GET or UNSET"); - } - return type; -} -/************************************************************/ - - - -char *unset_seq_type () -{ - return set_seq_type (UNSET, NULL); -} -char *store_seq_type (char *val) -{ - return set_seq_type (SET, val); -} -char *retrieve_seq_type () -{ - return set_seq_type (GET,NULL); -} -char *set_seq_type (int mode, char *val) -{ - static char type[100]; - if (mode==SET) - { - if (!val)printf_exit (EXIT_FAILURE, stderr, "Error: sequence type unset in io_func.c:set_seq_type"); - sprintf ( type,"%s", val); - } - else if ( mode==GET) - { - ; - } - else if ( mode==UNSET) - { - type[0]='\0'; - } - else - { - printf_exit (EXIT_FAILURE, stderr, "Error: unknown mode in function io_func.c:set_seq_type, use SET, GET or UNSET"); - } - return type; -} -char * get_array_type (int n, char **seq) -{ - char *buf, *buf2; - int a, tot=0; - buf2=vcalloc ( 100, sizeof (char)); - - - for ( tot=0,a=0; atype, "%s", get_array_type (S->nseq, S->seq)); - return S; -} - - -void get_sequence (char *seq_file,int *NSEQ, char ***SEQ, char ***SN, int **sl, int *min, int *max) - { - int a,b; - int min_len; - int max_len; - int nseq; - - int **SL; - - nseq=NSEQ[0]= readseqs ( seq_file, SEQ, SN, &SL); - sl[0]=vcalloc ( nseq, sizeof (int)); - - - min_len= max_len= (SL)[0][0]; - for ( a=0; a (SL)[a][0])?(SL)[a][0]:min_len; - max_len= ( max_len < (SL)[a][0])?(SL)[a][0]:max_len; - } - min[0]=min_len; - max[0]=max_len; - } - -int ** get_matrix ( char *name, char *format) - { - - if ( strm ( "blast", format))return read_blast_matrix ( name); - else if ( strm ( "clustalw", format))return read_matrice(name); - else - { - fprintf ( stderr, "\nError:\nUnknowm Format %s for Matrix %s[FATAL]", format, name); - myexit (EXIT_FAILURE); - } - return NULL; - } -void display_matrix (int **mat); -int ** read_matrice (char *mat_name_in) - { - int a,b,c, l; - - char AA[]="abcdefghiklmnpqrstvwxyz"; - FILE *fp; - int **matrice; - int **matrix2; - char mat_name[200]; - int *vector=NULL; - - l=strlen(AA); - - - - - if ( strm2 (mat_name_in, "pam", "PAM"))sprintf ( mat_name, "pam250mt"); - else if (strm2 (mat_name_in, "blosum", "BLOSUM"))sprintf ( mat_name, "blosum62mt"); - else if (strm3 (mat_name_in, "id", "ID", "idmat"))sprintf ( mat_name, "idmat"); - else sprintf ( mat_name, "%s", mat_name_in); - - /*Read Header Matrices*/ - if (strm(mat_name, "pam250mt"))vector=pam250mt; - else if (strm(mat_name, "idmat"))vector=idmat; - else if (strm(mat_name, "dna_idmat"))vector=idmat; - else if (strm(mat_name, "est_idmat"))vector=est_idmat; - else if (strm(mat_name, "md_350mt"))vector=md_350mt; - else if (strm(mat_name, "md_250mt"))vector=md_250mt; - else if (strm(mat_name, "md_120mt"))vector=md_120mt; - else if (strm(mat_name, "md_40mt" ))vector= md_40mt; - else if (strm(mat_name, "pam350mt" ))vector=pam350mt; - else if (strm(mat_name, "pam160mt" ))vector=pam160mt; - else if (strm(mat_name, "pam120mt" ))vector=pam120mt; - - else if (strm(mat_name, "blosum80mt" ))vector=blosum80mt; - else if (strm(mat_name, "blosum62mt" ))vector=blosum62mt; - else if (strm(mat_name, "exon2mt" ))vector=blosum62mt; - else if (strm(mat_name, "blosum62mt3" ))vector=blosum62mt3; - - else if (strm(mat_name, "blosum62mt2" ))vector=blosum62mt2; - else if (strm(mat_name, "blosum55mt" ))vector=blosum55mt; - else if (strm(mat_name, "blosum50mt" ))vector=blosum50mt; - else if (strm(mat_name, "blosum45mt" ))vector=blosum45mt; - - else if (strm(mat_name, "blosum40mt" ))vector=blosum40mt; - else if (strm(mat_name, "blosum30mt" ))vector=blosum30mt; - else if (strm(mat_name, "beta_mat" ))vector=beta_mat; - else if (strm(mat_name, "alpha_mat" ))vector=alpha_mat; - else if (strm(mat_name, "coil_mat" ))vector=coil_mat; - - else if (strm(mat_name, "rblosum80mt" ))vector=rblosum80mt; - else if (strm(mat_name, "rblosum62mt" ))vector=rblosum62mt; - else if (strm(mat_name, "rblosum30mt" ))vector=rblosum30mt; - - else if (strm(mat_name, "rpam250mt" ))vector=rpam250mt; - else if (strm(mat_name, "rpam350mt" ))vector=rpam350mt; - else if (strm(mat_name, "rpam160mt" ))vector=rpam160mt; - else if (strm(mat_name, "rpam120mt" ))vector=rpam120mt; - - else if (strm(mat_name, "tmpam250mt" ))vector=tmpam250mt; - else if (strm(mat_name, "rtmpam250mt" ))vector=rtmpam250mt; - - else if (strm(mat_name, "rbeta_mat" ))vector=rbeta_mat; - else if (strm(mat_name, "ralpha_mat" ))vector=ralpha_mat; - else if (strm(mat_name, "rcoil_mat" ))vector=rcoil_mat; - else if (strm (mat_name, "jtttm250mt"))vector=jtttm250mt; - - /*Header Matrices*/ - if(vector) - { - matrice=declare_int ( 256, 256); - for (a=0; a0)return matrix; - else - { - for ( b=0; br; - g=&C->g; - b=&C->b; - - if ( val==10)val--; - sprintf ( C->html_color_class, "value%d",val); - - - if (val<=9 && val>=0) - { - - sprintf ( C->html_color, "%s", html_code[val]); - r[0]=ps_code[val][0]; - g[0]=ps_code[val][1]; - b[0]=ps_code[val][2]; - } - - else if (val==DEFAULT_COLOR || val==NO_COLOR_RESIDUE || val==NO_COLOR_GAP || (val>'A' && val<'z')) - { - C->html_color[0]='\0'; - sprintf ( C->html_color_class, "valuedefault"); - r[0]=1.; - g[0]=1; - b[0]=1; - - } - else if (val==GAP_COLOR) - { - C->html_color[0]='\0'; - sprintf ( C->html_color_class, "valuegap"); - r[0]=1.; - g[0]=1; - b[0]=1; - } - else if (val==INK_COLOR ) - { - sprintf ( C->html_color, "000000"); - sprintf ( C->html_color_class, "valueink"); - r[0]=0.; - g[0]=0; - b[0]=0; - } - return; - - - } -int output_color_format ( Alignment *B,Alignment *Sin,char *name, \ -FILE_format *(*vfopen_format) ( char *),\ -FILE_format *(*print_format_string) ( char * ,Color *, Color *, FILE_format*),\ -FILE_format *(*print_format_char) ( int ,Color *, Color *, FILE_format*),\ -void (*get_rgb_values_format) ( int ,Color *),\ -FILE_format* (*vfclose_format) ( FILE_format *)) - { - int a, b, c; - int max_name_len=15; - int max_len=0; - char *buf2, *buf3; - - static char *buf; - int s; - int *n_residues; - static FILE_format *fps; - Color *ink; - Color *box_c; - Color *white; - Alignment *S; - - - S=copy_aln (B, NULL); - - buf2=vcalloc (Sin->len_aln+1, sizeof (char)); - buf3=vcalloc ( B->len_aln+1, sizeof (char)); - for ( a=0; anseq; a++) - { - int i,n, r; - - i=name_is_in_list ( B->name[a], Sin->name, Sin->nseq, -1); - if (i==-1)continue; - sprintf (buf2, "%s", Sin->seq_al[i]);ungap(buf2); - sprintf (buf3, "%s", S->seq_al[a]);ungap(buf3); - - if ( strlen (buf2) !=strlen(buf3)) - { - - fprintf ( stderr, "\nERROR: Incompatible cache ON sEQ: %s\n", S->name[a]); - fprintf ( stderr, "\n%s\n%s", buf2, buf3); - fprintf ( stderr, "\n\n%s\n%s", Sin->seq_al[i],S->seq_al[a]); exit (EXIT_FAILURE); - } - - for (n=0,b=0;blen_aln; b++) - { - r=S->seq_al[a][b]; - if (!is_gap(r)) - { - S->seq_al[a][b]=buf2[n++]; - } - } - } - - S=aln2number(S); - vfree (buf2); - - box_c=vcalloc ( 1, sizeof (Color)); - get_rgb_values_format (DEFAULT_COLOR, (white=vcalloc ( 1, sizeof (Color)))); - get_rgb_values_format (INK_COLOR, (ink =vcalloc ( 1, sizeof (Color)))); - - n_residues=vcalloc ( B->nseq+1, sizeof (int)); - for ( a=0; anseq; a++)n_residues[a]=B->order[a][1]; - - fps=vfopen_format( name); - if ( buf==NULL) - { - buf=vcalloc (10000, sizeof (int)); - } - - if ( max_len==0) - { - for ( a=0; a< B->nseq; a++) - {if ( strlen (B->name[a])>max_len) - max_len= strlen ( (B->name[a])); - } - } - if ( max_len>max_name_len)max_len=max_name_len; - - sprintf (buf, "\n%s, %s(%s)\n%s\n",PROGRAM,VERSION,DATE, AUTHOR); - fps=print_format_string ( buf,white, ink, fps); - - fps=print_format_string ( "\n\n",white,ink, fps); - - fps->line-=max_len; - fps->line=fps->line-fps->line%3; - - - - - for (a=0; alen_aln; a+=fps->line) - { - - if ( (fps->n_line+(B->nseq+4))>fps->max_line_ppage && !((B->nseq+4)>fps->max_line_ppage)) - { - fps=print_format_char ( fps->eop,white, ink, fps); - } - - for (b=0; b<=B->nseq; b++) - { - sprintf (buf,"%-*.*s ",max_len+2, max_len,(b==B->nseq)?"":S->name[b]); - fps=print_format_string ( buf,white, ink, fps); - if(B->output_res_num) - { - sprintf (buf, " %4d ", n_residues[b]+1); - fps=print_format_string ( buf,white, ink, fps); - } - - for (fps->in_seq=1,c=a;cline && clen_aln;c++) - { - if (b==B->nseq) - { - n_residues[b]++; - get_rgb_values_format (DEFAULT_COLOR,box_c); - s=analyse_aln_column ( B, c); - } - else - { - n_residues[b]+=!is_gap(B->seq_al[b][c]); - s=B->seq_al[b][c]; - if (!is_gap(s) && S->seq_al[b][c]!=NO_COLOR_RESIDUE ) - { - get_rgb_values_format ( S->seq_al[b][c], box_c); - } - else - { - get_rgb_values_format (GAP_COLOR, box_c); - } - } - fps=print_format_char ( s,box_c, ink,fps); - } - fps->in_seq=0; - - if(B->output_res_num) - { - sprintf (buf, " %4d ", n_residues[b]); - fps=print_format_string ( buf,white, ink, fps); - } - - fps=print_format_char ( '\n', white, ink, fps); - - } - fps=print_format_string ( "\n\n",white, ink, fps); - } - fps=print_format_string ( "\n\n\n",white, ink,fps); - - - vfclose_format( fps); - free_aln (S); - vfree (n_residues); - return 1; - - } - -int output_reliability_format ( Alignment *B,Alignment *S,char *name, \ -FILE_format *(*vfopen_format) ( char *),\ -FILE_format *(*print_format_string) ( char * ,Color *, Color *, FILE_format*),\ -FILE_format *(*print_format_char) ( int ,Color *, Color *, FILE_format*),\ -void (*get_rgb_values_format) ( int ,Color *),\ -FILE_format* (*vfclose_format) ( FILE_format *)) - { - int a, b, c,l; - int max_name_len=15; - int max_len=0; - static char *buf,*buf2; - int s; - static FILE_format *fps; - Color *ink; - Color *box_c; - Color *white; - int *n_residues; - - - box_c=vcalloc ( 1, sizeof (Color)); - get_rgb_values_format (DEFAULT_COLOR, (white=vcalloc ( 1, sizeof (Color)))); - get_rgb_values_format (INK_COLOR, (ink =vcalloc ( 1, sizeof (Color)))); - - n_residues=vcalloc ( B->nseq+1, sizeof (int)); - for ( a=0; anseq; a++)n_residues[a]=B->order[a][1]; - - - fps=vfopen_format( name); - if ( buf==NULL) - { - buf=vcalloc (10000, sizeof (int)); - buf2=vcalloc (10000, sizeof (int)); - } - - if ( max_len==0) - { - for ( a=0; a< B->nseq; a++) - {if ( strlen (B->name[a])>max_len) - max_len= strlen ( (B->name[a])); - } - } - - if ( vfopen_format==vfopen_ascii) - { - fps->line+= max_len; - } - else if ( max_len>max_name_len)max_len=max_name_len; - - - - sprintf (buf, "%s, %s(%s)\n%s\nCPU TIME:%d sec.\n%s",PROGRAM,VERSION,DATE, AUTHOR, (B->cpu+get_time())/1000, (S->generic_comment)?S->generic_comment:""); - fps=print_format_string ( buf,white, ink, fps); - sprintf (buf, "SCORE=%d\n*\n", S->score_aln); - fps=print_format_string ( buf,white, ink, fps); - - sprintf ( buf2, " BAD AVG GOOD"); - l=strlen(buf2); - get_rgb_values_format ( DEFAULT_COLOR, box_c); - fps=print_format_char ( buf2[0],box_c, ink, fps); - for ( a=1; ascore_seq && a< B->nseq; a++) - { - get_rgb_values_format (S->score_seq[a]/10, box_c); - sprintf ( buf, "%-*.*s ", max_len+2,max_len, S->name[a]); - fps=print_format_string ( buf,box_c, ink,fps); - sprintf ( buf, ": %3d\n", S->score_seq[a]); - fps=print_format_string ( buf,white, ink,fps); - } - //Print the Consensus score - get_rgb_values_format (S->score_aln/10, box_c); - sprintf ( buf, "%-*.*s ", max_len+2,max_len, S->name[S->nseq]); - fps=print_format_string ( buf,box_c, ink,fps); - sprintf ( buf, ": %3d\n", S->score_aln); - fps=print_format_string ( buf,white, ink,fps); - - fps=print_format_string ( "\n",white, ink,fps); - - - - fps->line-=max_len; - fps->line=fps->line-(fps->line%3); - - for (a=0; alen_aln; a+=fps->line) - { - - if ( (fps->n_line+(B->nseq+4))>fps->max_line_ppage && !((B->nseq+4)>fps->max_line_ppage)) - { - fps=print_format_char ( fps->eop,white, ink, fps); - } - - for (b=0; b<=S->nseq; b++) - { - if ( b==S->nseq && print_format_string !=print_ascii_string) fps=print_format_string ( "\n",white, ink, fps); - sprintf (buf,"%-*.*s ",max_len+2,max_len,S->name[b]); - fps=print_format_string ( buf,white, ink, fps); - if(B->output_res_num) - { - sprintf (buf, " %4d ", n_residues[b]+1); - fps=print_format_string ( buf,white, ink, fps); - } - - for (fps->in_seq=1,c=a;cline && clen_aln;c++) - { - if (b==S->nseq) - { - - if (S->score_seq) - { - int s; - s=S->seq_al[b][c]; - if ( s>='0' && s<='9')s-='0'; - get_rgb_values_format (s,box_c); - } - else get_rgb_values_format (DEFAULT_COLOR,box_c); - n_residues[b]++; - s=analyse_aln_column ( B, c); - } - else - { - n_residues[b]+=!is_gap(B->seq_al[b][c]); - //s=toupper(B->seq_al[b][c]); - s=B->seq_al[b][c]; - if (!is_gap(s) && S->seq_al[b][c]!=NO_COLOR_RESIDUE ) - { - get_rgb_values_format ( S->seq_al[b][c], box_c); - - } - else - { - get_rgb_values_format (GAP_COLOR, box_c); - - } - - } - fps=print_format_char ( s,box_c, ink,fps); - } - fps->in_seq=0; - - if(B->output_res_num) - { - sprintf (buf, " %4d ",n_residues[b]); - fps=print_format_string ( buf,white, ink, fps); - } - - fps=print_format_char ( '\n', white, ink, fps); - - } - fps=print_format_string ( "\n\n",white, ink, fps); - } - fps=print_format_string ( "\n\n\n",white, ink,fps); - vfclose_format( fps); - return 1; - - } - - -/*****************************************************************************/ -/* PDF FUNCTIONS */ -/* */ -/*****************************************************************************/ -int output_color_pdf ( Alignment *B,Alignment *S, char *name) - { - char *tmp_name; - char command[LONG_STRING]; - - -#ifndef PS2PDF - fprintf (stderr, "\nPDF FORMAT IS NOT SUPPORTED: INSTALL THE PROGRAM PS2PDF\n"); - myexit (EXIT_FAILURE); -#else - tmp_name=vtmpnam(NULL); - - output_color_ps (B, S, tmp_name); - sprintf ( command, "%s %s %s", PS2PDF, tmp_name, name); - my_system ( command); - vremove ( tmp_name); -#endif - - - return 1; - } -int output_reliability_pdf ( Alignment *B,Alignment *S, char *name) - { - char *tmp_name; - char command[LONG_STRING]; - - - -#ifndef PS2PDF - fprintf (stderr, "\nPDF FORMAT IS NOT SUPPORTED: INSTALL THE PROGRAM PS2PDF\n"); - myexit (EXIT_FAILURE); -#else - tmp_name=vtmpnam(NULL); - - output_reliability_ps (B, S, tmp_name); - sprintf ( command, "%s %s %s", PS2PDF, tmp_name, name); - my_system ( command); - vremove ( tmp_name); -#endif - - - return 1; - } -/*****************************************************************************/ -/* POST SCRIPT FUNCTIONS */ -/* */ -/*****************************************************************************/ -int output_color_ps ( Alignment *B,Alignment *S, char *name) - { - output_color_format (B, S, name, vfopen_ps,print_ps_string,print_ps_char,get_rgb_values_ps, vfclose_ps); - return 1; - } -int output_reliability_ps ( Alignment *B,Alignment *S, char *name) - { - output_reliability_format (B, S, name, vfopen_ps,print_ps_string,print_ps_char,get_rgb_values_ps, vfclose_ps); - return 1; - } -FILE_format *print_ps_string( char *s, Color *box, Color *ink, FILE_format *fps) - { - int l; - int a; - - l=strlen (s); - - for ( a=0; a< l; a++) - { - fps=print_ps_char (s[a], box, ink, fps); - } - return fps; - } - - -FILE_format * print_ps_char ( int c, Color *box, Color *ink, FILE_format *f) - { - - int ch; - int cw; - - ch=f->font+3; - cw=f->font-2; - - if ( c=='(' || c==')')return f; - else if (c!='\n' && c!=f->eop) - { - fprintf(f->fp,"%d %d moveto\n", f->x,f->y); - fprintf(f->fp,"0 %d rlineto\n%d 0 rlineto\n0 -%d rlineto\nclosepath\n",ch,cw,ch ); - fprintf(f->fp,"%3.1f %3.1f %3.1f setrgbcolor\nfill\n%3.1f %3.1f %3.1f setrgbcolor\n", box->r,box->g,box->b, ink->r, ink->g, ink->b); - fprintf(f->fp,"%d %d moveto\n(%c) show\n", f->x+1,f->y+3, c); - - f->x+=cw; - } - else - { - f->n_line++; - if ( f->n_line==f->max_line_ppage || c==f->eop) - { - - f->n_line=0; - f->x=f->x0; - f->y=f->y0; - fprintf(f->fp,"showpage\n"); - f->n_pages++; - fprintf ( f->fp, "%c%cPage: %d %d\n",'%', '%', f->n_pages, f->n_pages); - } - else - { - f->x=f->x0; - f->y-=ch; - } - } - return f; - } -void get_rgb_values_ps ( int val, Color *C) - { - get_rgb_values ( val, C); - } - - - -FILE_format* vfopen_ps ( char *name) - { - FILE_format*fps; - - fps=vcalloc ( 1, sizeof ( FILE_format)); - fps->font=9; - fps->max_line_ppage=60; - fps->line=get_msa_line_length (0, 0);/*N char per line*/ - fps->x0=15; - fps->y0=750; - fps->eop='^'; - - fps->fp=vfopen ( name, "w"); - fprintf(fps->fp,"%%!PS-Adobe-2.0\n/Courier findfont\n%d scalefont\nsetfont\n",fps->font); - fprintf(fps->fp, "%%%%Pages: (atend)\n"); - fprintf(fps->fp,"newpath\n"); - ++(fps->n_pages); - fprintf (fps->fp, "%%%%Page: %d %d\n", fps->n_pages, fps->n_pages); - fprintf (fps->fp,"%d %d translate\n",fps->x0, fps->y0); - return fps; - } - -FILE_format* vfclose_ps ( FILE_format *fps) - { - - fprintf(fps->fp,"showpage\n"); - fprintf ( fps->fp, "%%%%Pages: %d\n", fps->n_pages); - fprintf(fps->fp,"%%%%EOF"); - fprintf(fps->fp,"%%%%\n"); - vfclose ( fps->fp); - vfree (fps); - return NULL; - } -/*****************************************************************************/ -/* HTML FUNCTIONS */ -/* */ -/*****************************************************************************/ - -/*****************************************************************************/ -//JM_ADD -/*****************************************************************************/ -void output_hit_matrix(char *fileName, float **ffpHitScoreMatrix, int nl) -{ - int i, j; - FILE *fp; - - fp=vfopen(fileName, "w"); - for(i = 0; i < nl; i++) - { - for(j = 0; j < i; j++) - fprintf(fp, "%6.2f ", ffpHitScoreMatrix[j][i-j]); - for(j = i; j < nl; j++) - fprintf(fp, "%6.2f ", ffpHitScoreMatrix[i][j-i]); - fprintf(fp, "\n"); - } - vfclose(fp); -} - -int output_hit_color_html (Alignment *B, float **ffPScoreTable, int nl, char *name) -{ - output_hit_color_format (B, ffPScoreTable, nl, name, vfopen_html,print_html_string,print_html_char,get_rgb_values_html, vfclose_html); - return 1; -} - -int output_hit_color_format (Alignment *B, float **ffPScoreTable, int nl, char *name, \ -FILE_format *(*vfopen_format) ( char *),\ -FILE_format *(*print_format_string) ( char * ,Color *, Color *, FILE_format*),\ -FILE_format *(*print_format_char) ( int ,Color *, Color *, FILE_format*),\ -void (*get_rgb_values_format) ( int ,Color *),\ -FILE_format* (*vfclose_format) ( FILE_format *)) -{ - int a, b; - int max_name_len=15; - int max_len=0; - - static char *buf; - static FILE_format *fps; - Color *ink; - Color *box_c; - Color *white; - - box_c=vcalloc ( 1, sizeof (Color)); - get_rgb_values_format (DEFAULT_COLOR, (white=vcalloc ( 1, sizeof (Color)))); - get_rgb_values_format (INK_COLOR, (ink =vcalloc ( 1, sizeof (Color)))); - - if ( max_len==0) - { - for ( a=0; a< B->nseq; a++) - { - if ( strlen (B->name[a])>max_len) - max_len= strlen ( (B->name[a])); - } - } - if ( max_len>max_name_len)max_len=max_name_len; - - if ( buf==NULL) - buf=vcalloc (10000, sizeof (int)); - int iEmptyChr = 32; //SPACE ASCIICODE - int iColorValue; - fps=vfopen_format(name); - for (a=0; a < nl; a++) - { - sprintf (buf,"%*d ", max_len+2, a); - fps=print_format_string ( buf,white, ink, fps); - for(b = 0; b < a; b++) - { - iColorValue = (int)((ffPScoreTable[b][a-b]*9)/100); - get_rgb_values_format (iColorValue, box_c); - fps=print_format_char (iEmptyChr,box_c, ink,fps); - } - for(b = a; b < nl; b++) - { - iColorValue = (int)((ffPScoreTable[a][b-a]*9)/100); - get_rgb_values_format (iColorValue, box_c); - fps=print_format_char (iEmptyChr,box_c, ink,fps); - } - fps=print_format_char ('\n', white, ink, fps); - } - vfclose_format(fps); - vfree(buf); - vfree(box_c); - return 1; -} - -/*****************************************************************************/ - -int output_color_html ( Alignment *B,Alignment *S, char *name) - { - output_color_format (B, S, name, vfopen_html,print_html_string,print_html_char,get_rgb_values_html, vfclose_html); - return 1; - } -int output_reliability_html ( Alignment *B,Alignment *S, char *name) - { - output_reliability_format (B, S, name, vfopen_html,print_html_string,print_html_char,get_rgb_values_html, vfclose_html); - return 1; - } -FILE_format *print_html_string( char *s, Color *box, Color *ink, FILE_format *fhtml) - { - int l; - int a; - - l=strlen (s); - - for ( a=0; a< l; a++) - { - fhtml=print_html_char (s[a], box, ink, fhtml); - } - fhtml=print_html_char (CLOSE_HTML_SPAN,NULL,NULL,fhtml); - return fhtml; - } - - -FILE_format * print_html_char ( int c, Color *box, Color *ink, FILE_format *f) - { - char html_color[100]; - int in_span, new_color; - char string[1000]; - - - if (c==CLOSE_HTML_SPAN) - { - if (f->in_html_span)fprintf ( f->fp, ""); - f->in_html_span=0; - return f; - } - - - in_span=f->in_html_span; - new_color=1-(strm (box->html_color_class, f->previous_html_color)); - - - - sprintf (f->previous_html_color, "%s", box->html_color_class); - sprintf ( html_color, "class=%s", box->html_color_class); - - - if ( c!=' ')sprintf ( string, "%c", c); - else sprintf ( string, " "); - - if ( !in_span && c!='\n' && c!=f->eop) - { - fprintf ( f->fp, "%s",html_color,string ); - f->in_html_span=1; - } - else if (in_span && !new_color && c!='\n' && c!=f->eop) - { - - fprintf ( f->fp, "%s",string); - } - else if (in_span && new_color && c!='\n' && c!=f->eop) - { - fprintf ( f->fp, "%s",html_color,string); - } - else if ( c=='\n') - { - if ( f->in_html_span)fprintf ( f->fp, ""); - fprintf ( f->fp, "
"); - sprintf ( f->previous_html_color, "no_color_set"); - f->in_html_span=0; - f->n_line++; - } - - - - - - return f; - } - -void get_rgb_values_html ( int val, Color *C) - { - get_rgb_values ( val, C); - } - -FILE_format* vfopen_html ( char *name) - { - FILE_format*fhtml; - Color *color; - int a; - - color=vcalloc ( 1, sizeof (Color)); - - fhtml=vcalloc ( 1, sizeof ( FILE_format)); - fhtml->font=11; - fhtml->max_line_ppage=100000; - fhtml->line=get_msa_line_length (0, 0);/*N char per line*/ - fhtml->x0=15; - fhtml->y0=800; - fhtml->eop='^'; - sprintf ( fhtml->previous_html_color, "no_value_set"); - fhtml->fp=vfopen ( name, "w"); - - fprintf(fhtml->fp,"\n"); - fprintf(fhtml->fp,""); - - return fhtml; - } -FILE_format* vfclose_html ( FILE_format *fhtml) - { - if ( fhtml->in_html_span)fprintf(fhtml->fp,""); - fprintf(fhtml->fp,"\n"); - vfclose ( fhtml->fp); - vfree (fhtml); - return NULL; - } -/*****************************************************************************/ -/* ascii FUNCTIONS */ -/* */ -/*****************************************************************************/ -int output_color_ascii ( Alignment *B,Alignment *S, char *name) - { - output_color_format (B, S, name, vfopen_ascii,print_ascii_string,print_ascii_char,get_rgb_values_ascii, vfclose_ascii); - return 1; - } -int output_reliability_ascii ( Alignment *B,Alignment *S, char *name) - { - output_reliability_format (B, S, name, vfopen_ascii,print_ascii_string,print_ascii_char,get_rgb_values_ascii, vfclose_ascii); - return 1; - } - -FILE_format *print_ascii_string( char *s, Color *box, Color *ink, FILE_format *fascii) - { - int l; - int a; - - l=strlen (s); - - for ( a=0; a< l; a++) - { - fascii=print_ascii_char (s[a], box, ink, fascii); - } - return fascii; - } - - -FILE_format * print_ascii_char ( int c, Color *box, Color *ink, FILE_format *f) - { - if (box->ascii_value>=0 && f->in_seq)fprintf ( f->fp, "%c", box->ascii_value); - else fprintf ( f->fp, "%c",c); - return f; - } - - -void get_rgb_values_ascii ( int val, Color *C) - { - - if ( val==NO_COLOR_RESIDUE)C->ascii_value='-'; - else if ( val==NO_COLOR_GAP)C->ascii_value='*'; - else if ( val>9)C->ascii_value='#'; - else if ( val>=0 && val<=9) C->ascii_value=val+'0'; - else C->ascii_value=val; - } - -FILE_format* vfopen_ascii ( char *name) - { - FILE_format*fascii; - fascii=vcalloc ( 1, sizeof ( FILE_format)); - fascii->font=11; - fascii->max_line_ppage=100000; - fascii->line=get_msa_line_length (0,0);/*N char per line*/ - fascii->x0=15; - fascii->y0=800; - fascii->eop='^'; - fascii->fp=vfopen ( name, "w"); - - - return fascii; - } -FILE_format* vfclose_ascii ( FILE_format *fascii) - { - vfclose ( fascii->fp); - vfree (fascii); - return NULL; - } - - -/*****************************************************************************/ -/* seq_score output */ -/* */ -/*****************************************************************************/ - -int output_seq_reliability_ascii ( Alignment *B,Alignment *S, char *name) -{ - FILE *fp; - int a; - int max_len=0; - for ( a=0; a< B->nseq; a++) - {if ( strlen (B->name[a])>max_len) - max_len= strlen ( (B->name[a])); - } - - fp=vfopen ( name, "w"); - fprintf ( fp, "ALN_SCORE %d\n", S->score_aln); - for ( a=0; a< S->nseq; a++)fprintf (fp, "SEQ_SCORE %*.*s %3d\n", max_len+2,max_len,S->name[a],S->score_seq[a]); - vfclose (fp); - - return 1; -} - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/io_lib_header.h b/binaries/src/tcoffee/t_coffee_source/io_lib_header.h deleted file mode 100644 index 45be7a4..0000000 --- a/binaries/src/tcoffee/t_coffee_source/io_lib_header.h +++ /dev/null @@ -1,1095 +0,0 @@ -/*DEBUGGING*/ -/*#include "mshell.h"*/ -/*MEMORY MANAGEMENT*/ -#include -#define MY_EPS 1000*DBL_EPSILON -//Maximum number of tries for interactibve things -#define MAX_N_TRIES 3 - -//Maximum CACHE and Temporary file size and age (Mb and days, 0: unlimited) -#define TMP_MAX_SIZE 0 -#define TMP_MAX_KEEP 10 -#define CACHE_MAX_SIZE 2000 -#define CACHE_MAX_KEEP 180 -#define MAX_N_PID 65536 -//Importnat Values Affecting the Program Behavior -#define SCORE_K 10 -#define NORM_F 1000 -#define PAVIE_MAT_FACTOR 1000 -#define MAXID 100 -#define CLEAN_FUNCTION NULL -#define MINSIM_4_TCOFFEE 25 //The minimum similarity between a sequence and its PDB template -#define MINCOV_4_TCOFFEE 25 //The minimum similarity between a sequence and its PDB template - - -#define TRACE_TYPE int -#define MAX_LEN_FOR_DP 600 - - -#define GIVE_MEMORY_BACK 0 -#define MEMSET0 1 -#define NO_MEMSET0 0 -/*OUTPUT DEFINITIONS*/ -#define NO_COLOR_RESIDUE 127 -#define NO_COLOR_GAP 126 -#define CLOSE_HTML_SPAN -1 -/*SPECIAL_CODES*/ -#define GAP_CODE 60 -/*TYPE DEFINITIONS*/ - -//Formats -#define BLAST_XML 100 -#define BLAST_TXT 101 - -/*SWITCHES*/ - - -#define USED 1 -#define UNUSED 2 - - -#define TEMPLATES 1 -#define NOTEMPLATES 0 - -#define EXTEND 1 -#define RESIZE 2 - -#define SEN 0 -#define SPE 1 -#define REC 2 -#define SEN2 2 - -#define ALL 1 -#define SEGMENTS 2 -#define DIAGONALS 3 - -#define START_STATE 0 -#define END_STATE 1 - -#define KEEP_CASE 2 /*Hard set in several places*/ -#define LOWER_CASE 0 -#define UPPER_CASE 1 -#define CHANGE_CASE 3 -#define KEEP_GAP 0 -#define RM_GAP 1 - -#define KEEP_NAME 1 - -#define CHECK 0 -#define NO_CHECK 1 -#define FORCE 2 -#define STORE 3 -#define FLUSH 4 - - -#define ON 8 -#define OFF 9 -#define LOCKED_ON 10 -#define LOCKED_OFF 11 - -#define YES 12 -#define NO 13 -#define MAYBE 14 - -#define NEVER 15 -#define ALWAYS 16 -#define SOMETIMES 17 - -#define UPPER 18 -#define LOWER 19 -#define DELETE 20 -#define SWITCHCASE 21 - -#define VECTOR 22 -#define NON_VECTOR 23 -#define NON_PROFILE 24 -#define BOOTSTRAP 25 - -#define HEADER 26 -#define NO_HEADER 27 - -#define VERY_VERBOSE 28 -#define VERBOSE 29 -#define SHORT 30 -#define VERY_SHORT 31 - -#define OVERLAP 32 -#define NO_OVERLAP 33 - -#define PRINT 34 -#define NO_PRINT 35 - -#define FREE_ALN 36 -#define DECLARE_ALN 37 -#define EXTRACT_ALN 38 -#define CLEAN 39 -#define INTERACTIVE 40 -#define NON_INTERACTIVE 41 -#define PAD 42 -#define NO_PAD 43 - -#define SET 44 -#define UNSET 45 -#define RESET 48 -#define ISSET 49 -#define GET 50 - -#define ENV 52 - -#define GOP 0 -#define GCP 1 -#define GEP 2 - -#define BOTTOM 0 -#define TOP 1 - -#define FORWARD -1 -#define BACKWARD 1 - -#define GO_LEFT -1 -#define GO_RIGHT 1 - -#define LOCAL 1 -#define GLOBAL 2 -#define LALIGN 3 -#define MOCCA 4 - -#define TRUE 1 -#define FALSE 0 - -#define NEW 1 -#define OLD 0 - -#define RANDOM 0 -#define DETERMINISTIC 1 - -#define GREEDY 1 -#define NON_GREEDY 0 - -#define IS_FATAL 1 -#define IS_NOT_FATAL 0 -#define NO_REPORT 2 -#define INSTALL 3 -#define INSTALL_OR_DIE 4 - -#define OPTIONAL 1 -#define NON_OPTIONAL 0 - -#define GV_MAXIMISE 1 -#define GV_MINIMISE 0 - -#define MAXIMISE 1 -#define MINIMISE 0 - -#define ALLOWED 0 -#define FORBIDEN -99999999 -#define END_ARRAY -99999990 -#define SOFT_COPY 1 -#define HARD_COPY 2 - -#define VERY_SLOW 0 -#define SLOW 1 -#define FAST 2 -#define VERY_FAST 3 -#define SUPER_FAST 4 -#define ULTRA_FAST 5 - -#define CODE 1 -#define DECODE 2 -#define CODELIST 3 - -/*Identity measure*/ -#define UNGAPED_POSITIONS 1 -#define ALIGNED_POSITIONS 2 -#define AVERAGE_POSITIONS 3 -#define NOMATRIX NULL -#define NOGROUP NULL -#define NOALN NULL - -/*SIZE DEFINITIONS*/ -#define SIZE_OF_INT 10 -#define UNDEFINED FORBIDEN -#define UNDEFINED_INT UNDEFINED -#define UNDEFINED_FLOAT UNDEFINED -#define UNDEFINED_DOUBLE UNDEFINED -#define UNDEFINED_CHAR 125 -#define UNDEFINED_SHORT -125 -#define UNDEFINED_2 0 -#define UNDEFINED_RESIDUE '>' - - - -#define FACTOR 1 -#define MAX_N_SEQ 1 -#define MAX_N_ALN 1 -#define MAX_LEN_ALN 1 -#define MAX_N_LIST 100 - -#define COMMENT_SIZE 1000 -#define MAXNAMES 100 -#define FILENAMELEN 500 /* Max. file name length */ -#define MAX_N_PARAM 2000 -#define MAX_PARAM_LEN 200 -#define MAX_LINE_LENGTH 10000 -#define ALN_LINE_LENGTH 60 -#define SHORT_STRING 10 -#define STRING 300 -#define LONG_STRING 1000 -#define VERY_LONG_STRING 10000 - -#define AA_ALPHABET "acdefghiklmnpqrstvwy-ACDEFGHIKLMNPQRSTVWY" -#define DNA_ALPHABET "AGCTUNRYMKSWHBVD-agctunrymkswhbvd" -#define RNAONLY_ALPHABET "Uu" -#define BLAST_AA_ALPHABET "arndcqeghilkmfpstwyvbzx*" -#define NAMES_ALPHABET "1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_|�-!%@&#-+=." - -#define SIZEOF_AA_MAT 60 -#define GAP_LIST "-.#*~" -#define SSPACE " " - -#define MATCH 1 -#define UNALIGNED 2 -#define GAP 3 - -#define MNE 3 -#define CODE4PROTEINS 10 -#define CODE4DNA 20 - -#define STOCKHOLM_CHAR 'z' -#define STOCKHOLM_STRING "z" - - -/*CODE SHORT CUTS*/ - -/*1-COMMAND LINE PROCESSING*/ -#define GET_COMMAND_LINE_INFO ((strncmp ( argv[1], "-h",2)==0)||(strncmp ( argv[1], "-man",4)==0)||(strncmp ( argv[1], "-",1)!=0)) -#define NEXT_ARG_IS_FLAG ((argc<=(a+1)) ||(( argv[a+1][0]=='-') && !(is_number(argv[a+1])))) - - -/*UTIL MACROS*/ -#define BORDER(p1,l1,p2,l2) ((p1==0 || p2==0 || p1==l1 || p2==l2)?1:0) -#define GET_CASE(f,c) ((f==UPPER_CASE)?toupper(c):((f==LOWER_CASE)?tolower(c):c)) - -#define SWAP(x,y) {x=x+y;y=x+y; x=y-x; y=y-2*x;} -#define SWAPP(x,y,tp) {tp=y;y=x;x=tp;} - -#define MAX(x, y) (((x) >(y)) ? (x):(y)) -#define MAX2(x, y) (((x) >(y)) ? (x):(y)) -#define MAX3(x,y,z) (MAX(MAX(x,y),z)) -#define MAX4(a,b,c,d) (MAX(MAX(a,b),MAX(c,d))) -#define MAX5(a,b,c,d,e) (MAX2((MAX3(a,b,c)),(MAX2(d,e)))) -#define MAX6(a,b,c,d,e,f) (MAX2((MAX3(a,b,c)),(MAX3(c,d,e)))) - -#define MIN(x, y) (((x) <(y)) ? (x):(y)) -#define FABS(x) ((x<0)?(-x):(x)) -#define is_defined(x) ((x==UNDEFINED)?0:1) -#define a_better_than_b(x,y,m) ((m==1)?(((x)>(y))?1:0):(((x)<(y))?1:0)) -#define is_in_range(x,min,max) ((x>=min && x<=max)?1:0) -/*#define bod_a_b(x,y,m) ((m==1)?(MAX((x),(y))):(MIN((x),(y)))) -#define bo_a_b(x,y,m) ((x==UNEFINED)?y:((y==UNDEFINED)?x:bod_a_b(y,y,m))) -#define best_of_a_b(x,y,m) ((x==UNDEFINED && y==UNDEFINED)?(UNDEFINED):(bo_a_b(x,y,m))) -*/ - - -#define DIE(x) HERE(x);exit(0); -#define best_of_a_b(x,y,m) ((m==1)?(MAX((x),(y))):(MIN((x),(y)))) - -#define strm(x,y) ((vstrcmp((x),(y))==0)?1:0) -#define strnm(x,y,n) ((vstrncmp((x),(y),(n))==0)?1:0) -#define strm2(a,b,c) (strm(a,b) || strm(a,c)) -#define strm3(a,b,c,d) (strm2(a,b,c) || strm(a,d)) -#define strm4(a,b,c,d,e) (strm2(a,b,c) || strm2(a,d,e)) -#define strm5(a,b,c,d,e,f) (strm2(a,b,c) || strm3(a,d,e,f)) -#define strm6(a,b,c,d,e,f,g) (strm3(a,b,c,d) || strm3(a,e,f,g)) -#define declare_name(x) (x=vcalloc (MAX(FILENAMELEN,L_tmpnam)+1, sizeof (char))) -#define is_parameter(x) (x[0]=='-' && !isdigit(x[1])) - -/*Freing functions*/ -#define free_2(a, b) free(a);free(b) -#define free_1(a) free(a) -#define free_3(a, b, c) free_2(a,b);free_1(c) -#define free_4(a, b, c,d) free_2(a,b);free_2(c,d) -#define free_5(a, b, c,d,e) free_3(a,b,e);free_2(c,d) -#define free_6(a, b, c,d,e,f) free_3(a,b,e);free_3(c,d,f) -#define free_7(a, b, c,d,e,f,g) free_3(a,b,e);free_4(c,d,f,g) -/*2-FILE PARSING*/ -#define SEPARATORS "\n \t,;" -#define LINE_SEPARATOR "\n#TC_LINE_SEPARATOR\n" -#define TC_REC_SEPARATOR "#### TC REC SEPARATOR ###" - -/*END 1-*/ - - -/*WIDOWS/UNIX DISTINCTIONS -#if defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__) || defined(__MSDOS__) || defined(__DOS__) || defined(__NT__) || defined(__WIN32__) -#define WIN32 -#define TO_NULL_DEVICE " >nul" -#define NULL_DEVICE "nul" -#define CWF "/" -#else -#define TO_NULL_DEVICE " >/dev/null 2>&1" -#define NULL_DEVICE "/dev/null" -*/ - -#if defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__) || defined(__MSDOS__) || defined(__DOS__) || defined(__NT__) || defined(__WIN32__) -#define WIN32 -#define TO_NULL_DEVICE " >>t_coffee.log" -#define NULL_DEVICE "t_coffee.log" -#define CWF "/" /*ClustalW Flag*/ -#else -#define TO_NULL_DEVICE " >>/dev/null 2>&1" -#define NULL_DEVICE "/dev/null" - - -#define CWF "-" /*ClustaW Flag*/ -#endif - -/*Generic Data*/ -#define EMAIL "cedric.notredame@europe.com" -#define URL "http://www.tcoffee.org" - -#define PERL_HEADER "#!/usr/bin/env perl" - -//Optimize the Score Computation in DP -#define TC_SCORE_2(x,y) (SCORE_K*CL->M[Aln->seq_al[l_s[0][0]][x]-'A'][Aln->seq_al[l_s[1][0]][y]-'A']-SCORE_K*CL->nomatch) -#define TC_SCORE_N(x,y) ((CL->get_dp_cost)(Aln, pos, ns[0], l_s[0], x, pos, ns[1], l_s[1], y, CL)) -#define TC_SCORE(x,y) ((CL->get_dp_cost==slow_get_dp_cost && CL->evaluate_residue_pair==evaluate_matrix_score && ns[0]+ns[1]==2 && x>=0 && j>=0)? (TC_SCORE_2(x,y)):(TC_SCORE_N(x,y))) - -#define NULL_2 NULL,NULL -#define NULL_3 NULL_2,NULL -#define NULL_4 NULL_2,NULL_2 -#define NULL_5 NULL_3,NULL_2 -#define NULL_6 NULL_4,NULL_2 -#define NULL_7 NULL_5,NULL_2 -typedef struct - { - char *mode; - char *comments; - int nseq; - char **seq_name; - float **PW_SD; - float **PW_ID; - float *SEQ_W; - }Weights; - -typedef struct - { - int **list; - int tot_list; - int **stem; - int tot_stem; - int n_fields; - int nseq; - int *len; - int ***struc; - struct Sequence *S; - }Structure; - -struct Sequence - { - char **file; /* file[Nseq][FILENAMELEN] name of the file that contributed each sequence*/ - char **seq_comment; /* seq_comment[Nseq][LONG_STRING] comment read in the file */ - char **aln_comment; /*id*/ - char **seq; /*seq[Nseq][sequence] sequences*/ - int *len; /*len[Nseq] length of each sequence*/ - int max_len; /*Lenght of the longest seq */ - int min_len; /*Length of the shortest seq*/ - int nseq; /*nseq*/ - int max_nseq; /*Maximum number of sequences in the datastruct*/ - char **name; /*name[Nseq][MAXNAMELEN]*/ - int **dc; /*coordinates on the disk. Coordinates set if seq[i]==NULL -/*Constraint list*/ - struct Constraint_list *CL; - int contains_gap; /*set to 1 if gaps are to be kept*/ - char *type; /*PROTEIN, DNA*/ - Weights *W; /*Associated weights*/ - char template_file[FILENAMELEN+1]; - struct Template **T; - -}; -typedef struct Sequence Sequence; - -//_E_ -struct Template -{ - char seq_type[10]; - struct X_template *P;//PDB structure - struct X_template *F;//RNA secondary structure - struct X_template *S;//sequence - struct X_template *R;//Profile - struct X_template *G;//Genomic structure - struct X_template *T;//transmembrane - struct X_template *E;//secondary structure - struct X_template *U;//Unicode, strings - - struct X_template *RB; -}; -typedef struct Template Template; -//_E_ -struct X_template -{ - char seq_name[FILENAMELEN+1]; - char template_type[FILENAMELEN+1]; - char template_format[100]; - char template_name[FILENAMELEN+1]; - char template_file[FILENAMELEN+1]; - - struct P_template *VP; - struct F_template *VF; - struct S_template *VS; - struct R_template *VR; - struct G_template *VG; - struct T_template *VT; - struct E_template *VE; - struct U_template *VU; - - -}; -typedef struct X_template X_template; - -// -struct P_template -{ - char pdb_id[100]; -}; -typedef struct P_template P_template; - -//RNA secondary Structure -struct F_template -{ - int l; -}; -typedef struct F_template F_template; - - -struct S_template -{ - Sequence *S; -}; -typedef struct S_template S_template; - -//Prile associated with a sequence -struct R_template -{ - struct Alignment *A; -}; -typedef struct R_template R_template; - -//Genomic Information -struct G_template -{ - Sequence *S; -}; -typedef struct G_template G_template; - - -struct T_template -{ - Sequence *S; -}; -typedef struct T_template T_template; - -//_E_ -struct E_template -{ - Sequence *S; -}; -typedef struct E_template E_template; - -struct U_template -{ - int *list; -}; -typedef struct U_template U_template; - - -typedef struct - { - int max_len; - int alp_size; - char *alphabet; - int **count3; - int **count; - int **count2; - }Profile; - -struct Alignment - { -/*Size*/ - int max_len; - int min_len; - int * len; - //int *weight; - int declared_len; - int max_n_seq; - int nseq; - int len_aln; -/*Generic Information*/ - char *generic_comment; -/*Sequence Information*/ - char **file; - char **seq_comment; - char **aln_comment; - char **name; - - char **expanded_order; - char **tree_order; - char **seq_al; - - int **order; - Profile *P; - Sequence *S; - struct Dp_Result *Dp_result; - struct Constraint_list *CL; - - int **seq_cache; /*Contains the index of the residues: - The sequence Numbering is relative to the sequences, and not to the alignmnent - - seq_cache[0][1]=3 - indicates that in the aln residue (0)1 corresponds to [order[0][0]][3] - residues: 1...N - Sequences 0...M - */ - int **cdna_cache; /*Contains the information about wheather a nucleotide is coding or not*/ - /*Only defined if used */ - - - -/*Score*/ - int * score_seq; - int ** score_res; - int score_aln; - int score; - - int cpu; - int finished; - -/*Input/Output Options*/ - int output_res_num; - int residue_case; /*1 for lower, 0 for Upper, 2 for keeping unchanged*/ - int expand; -/*Must Not be copied*/ - int used; - int num; - int **pos; -/*For linked lists*/ - struct Alignment * A; - /*Misc*/ - int random_tag; - - }; - -typedef struct Alignment Alignment; -typedef struct - { - int in_seq; - FILE *fp; - int font; - int x0; - int y0; - int x; - int y; - int n_pages; - int max_line_ppage; - int n_line; - int line; - int eop; - int in_html_span; - char previous_html_color[100]; - - } -FILE_format; - -typedef struct - { - float r; - float g; - float b; - char html_color[30]; - char html_color_class[30]; - int ascii_value; - } -Color; - - -Sequence * fill_sequence_struc ( int nseq, char **sequences, char **seq_name); -Sequence * cw_read_sequences ( char *seq_name); -Sequence * get_sequence_type (Sequence *S); -char * get_array_type (int n, char **s); -Alignment* get_aln_type (Alignment *A); - -char * get_string_type (char *string); - -char *store_mode (char *val); -char *retrieve_mode (); -char *unset_mode (); -char *set_mode (int mode, char *val); - -char *store_seq_type (char *val); -char *retrieve_seq_type (); -char *unset_seq_type (); -char *set_seq_type (int mode, char *val); - -void get_sequence (char *seq_file,int *NSEQ, char ***SEQ, char ***SN, int **sl, int *min, int *max); - -int ** get_matrix ( char *name, char *format); -int ** read_matrice (char *mat_name); -int **neg_matrix2pos_matrix ( int **matrix); - - -void print_aln ( Alignment *B); - -int output_reliability_ps ( Alignment *B,Alignment *S, char *name); -int output_reliability_pdf ( Alignment *B,Alignment *S, char *name); -int output_reliability_html ( Alignment *B,Alignment *S, char *name); -int output_color_ps ( Alignment *B,Alignment *S, char *name); -int output_color_pdf ( Alignment *B,Alignment *S, char *name); -int output_color_html ( Alignment *B,Alignment *S, char *name); -int output_hit_color_html (Alignment *B, float **ffPScoreTable, int nl, char *name); //JM_ADD -void output_hit_matrix(char *fileName, float **ffpHitScoreMatrix, int nl); //JM_ADD -void get_rgb_values(int val, Color *C); -int output_reliability_format ( Alignment *B,Alignment *S, char *name, \ -FILE_format *(*vfopen_format) ( char *),\ -FILE_format *(*print_format_string) ( char * ,Color *, Color *, FILE_format*),\ -FILE_format *(*print_format_char) ( int ,Color *, Color *, FILE_format*),\ -void (*get_rgb_values_format) ( int ,Color *),\ -FILE_format* (*vfclose_format) ( FILE_format *)); -int output_score_format ( Alignment *B,Alignment *S, char *name, \ -FILE_format *(*vfopen_format) ( char *),\ -FILE_format *(*print_format_string) ( char * ,Color *, Color *, FILE_format*),\ -FILE_format *(*print_format_char) ( int ,Color *, Color *, FILE_format*),\ -void (*get_rgb_values_format) ( int ,Color *),\ -FILE_format* (*vfclose_format) ( FILE_format *)); - - -FILE_format * print_ps_string ( char *s , Color *box, Color *ink, FILE_format *f); -FILE_format * print_ps_char ( int c, Color *box, Color *ink, FILE_format *f); - - - -void get_rgb_values_ps ( int val, Color *C); -FILE_format* vfopen_ps ( char *name); -FILE_format* vfclose_ps ( FILE_format *fps); - -FILE_format *print_html_string( char *s, Color *box, Color *ink, FILE_format *fhtml); -FILE_format * print_html_char ( int c, Color *box, Color *ink, FILE_format *f); -void get_rgb_values_html ( int val, Color *C); -FILE_format* vfopen_html ( char *name); -FILE_format* vfclose_html ( FILE_format *fhtml); - -int output_reliability_ascii ( Alignment *B,Alignment *S, char *name); -int output_color_ascii ( Alignment *B,Alignment *S, char *name); - -FILE_format *print_ascii_string( char *s, Color *box, Color *ink, FILE_format *fascii); -FILE_format * print_ascii_char ( int c, Color *box, Color *ink, FILE_format *f); -void get_rgb_values_ascii ( int val, Color *C); - -FILE_format* vfopen_ascii ( char *name); -FILE_format* vfclose_ascii ( FILE_format *fascii); -int output_seq_reliability_ascii ( Alignment *B,Alignment *S, char *name); -/*********************CLUSTALW.H*********************************************/ -/****************************************************************************/ - - /* - Main header file for ClustalW. Uncomment ONE of the following 4 lines - depending on which compiler you wish to use. - */ - -#define VMS 1 /*VAX or ALPHA VMS */ - -/*#define MAC 1 Think_C for MacIntosh */ - -/*#define MSDOS 1 Turbo C for PC's */ - -/*#define UNIX 1 Ultrix/Decstation, Gnu C for - Sun, IRIX/SGI, OSF1/ALPHA */ - -/***************************************************************************/ -/***************************************************************************/ - - - - - -#define MAXTITLES 60 /* Title length */ - - -#define UNKNOWN 0 -#define EMBLSWISS 1 -#define PIR 2 -#define PEARSON 3 -#define GDE 4 -#define CLUSTAL 5 /* DES */ -#define MSF 6 /* DES */ -#define USER 7 /* DES */ - -#define PAGE_LEN 22 /* Number of lines of help sent to screen */ - -#ifdef VMS /* Defaults for VAX VMS */ -#define DIRDELIM ']' /* Last character before file name in full file - specs */ -#define SEQ_MAX_LEN 10000 /* Max Sequence Length */ -#define MAXN 500 /* Max Number of Sequences */ -#define FSIZE 25000 /* Work space for pairwise alignments */ -#define MAXTREE 5000 /* Max Nodes for phylogenetic tree */ -#define LINELENGTH 60 /* Output line length */ -#define GCG_LINELENGTH 50 /* Output line length for GCG output */ - -#elif MAC -#define DIRDELIM ':' -#define SEQ_MAX_LEN 1000 -#define MAXN 30 -#define FSIZE 5000 -#define MAXTREE 1000 -#define LINELENGTH 50 -#define GCG_LINELENGTH 50 - - -#elif MSDOS -#define DIRDELIM '\\' -#define SEQ_MAX_LEN 1300 -#define MAXN 30 -#define FSIZE 5000 -#define MAXTREE 1000 -#define LINELENGTH 50 -#define GCG_LINELENGTH 50 - -#elif UNIX -#define DIRDELIM '/' -#define SEQ_MAX_LEN 10000 -#define MAXN 500 -#define FSIZE 25000 -#define MAXTREE 5000 -#define LINELENGTH 60 -#define GCG_LINELENGTH 50 -#endif - -#define NUMRES 26 /* max size of comparison matrix */ - -#define INPUT 0 -#define ALIGNED 1 - -#define LEFT 1 -#define RIGHT 2 - -#define NODE 0 -#define LEAF 1 - -#define GAPCOL 32 /* position of gap open penalty in profile */ -#define LENCOL 33 /* position of gap extension penalty in profile */ - -typedef struct node { /* phylogenetic tree structure */ - struct node *left; - struct node *right; - struct node *parent; - float dist; - int leaf; - int order; - char name[64]; -} stree, *treeptr; - -void *ckalloc(size_t); -void * ckvrealloc(void *,size_t); -void ckfree(void *); - -int readseqs(char *saga_file,char ***SAGA_SEQ, char*** SAGA_NAMES, int ***SAGA_LEN) ;/*first_seq is the #no. of the first seq. to read */ - - -typedef struct treesim{ - float w; - float uw; - float d; - - float max_w; - float max_uw; - float max_d; - - int rf; - int n;//n nodes; - int nseq;// nseq in the common subset - }Tree_sim; - - -typedef struct tnode *NT_node; - -/** -* Node of a tree -*/ -typedef struct tnode{ - int visited; - char *name; - char *file; - - ///The parent node - NT_node parent; - ///Left child node - NT_node left; - ///Right child node - NT_node right; - NT_node bot; - /// is leaf? - int isseq; - int seq; - int maxnseq; - int nseq; - - ///contains a list of the sequences - int *lseq; - ///contains a coded version of the node: 10010101 - int *lseq2; - ///contains distances to the root, in nodes - int *idist; - ///contains real distances *1000 - int *ldist; - float dist; - float bootstrap; - float dp; - int order; - int aligned; - ///Number of leave below the considered node - int leaf; - ///Number of nodes below the considered node - int node; - int group; - float score; - int align; - char *seqal; - int index; - int fork; - }Treenode; - -typedef struct split_struc Split; - -typedef struct split_struc{ - char *split; - int n; - int tot; - float score; - char **tlist;//Not used yet - Sequence *S; - NT_node *L; -}Split_struc; - -NT_node main_prune_tree ( NT_node T, Sequence *S); -NT_node prune_tree ( NT_node T, Sequence *S); -/*********************************************************************/ -/* */ -/* dpa_tree_manipulation */ -/* */ -/* */ -/*********************************************************************/ -char *tree2Ngroup (Alignment *A, NT_node T, int max_n, char *fname, char *mat4dist); -int tree2group_file ( NT_node T,Sequence *S, int maxnseq, int minsim, char *name); - -NT_node seq2dpa_tree (Sequence *S, char *align_mode); -NT_node tree2dpa_tree (NT_node T, Alignment *A, char *matrix4distance); -FILE * tree2group ( NT_node T,Sequence *S,int maxnseq, int mindist,char *name, FILE *fp); - - -NT_node tree2collapsed_tree (NT_node T, int n, char **string); - -/*********************************************************************/ -/* */ -/* tree comparison */ -/* */ -/* */ -/*********************************************************************/ -int main_compare_cog_tree (NT_node T1, char *cogfile); -int main_compare_aln_tree (NT_node T1, Alignment *A, FILE *fp); -int compare_aln_tree (NT_node T, Alignment *A, int *n, FILE *fp); - -int main_compare_splits (NT_node T1, NT_node T2, char *mode, FILE *fp); -Tree_sim * tree_cmp( NT_node T1, NT_node T2); -NT_node tree_scan (Alignment *A,NT_node RT, char *pscan, char *ptree); - - -NT_node main_compare_trees ( NT_node T1, NT_node T2, FILE *fp); -float compare_trees ( NT_node T1, NT_node T2, int nseq, int mode); -float search_node ( NT_node B, NT_node T, int nseq, int mode); -float evaluate_node_similarity ( NT_node B, NT_node T, int nseq, int mode); - -int compare_node ( int *b1, int *b2, int n); -void display_node (NT_node N, char *string,int nseq); -NT_node index_tree_node (NT_node T); -NT_node simple_recode_tree (NT_node T, int nseq); -NT_node recode_tree ( NT_node T, Sequence *S); -int compare_branch2 ( int *b1, int *b2, int n); - -/*********************************************************************/ -/* */ -/* FJ_tree Computation */ -/* */ -/* */ -/*********************************************************************/ -NT_node similarities_file2tree (char *mat); -NT_node tree_compute ( Alignment *A, int n, char ** arg_list); -static NT_node compute_std_tree (Alignment *A, int n, char **arg_list); -NT_node compute_std_tree_2 (Alignment *A, int **s, char *arg_list); -NT_node aln2fj_tree(NT_node T, Alignment *A, int limit,char* mode); -Alignment * filter_aln4tree (Alignment *A, int n,int fg,char* mode); - -/*********************************************************************/ -/* */ -/* Tree Filters and MAnipulation */ -/* */ -/* */ -/*********************************************************************/ -int tree2star_nodes (NT_node R, int n_max); -NT_node aln2tree (Alignment *A); -NT_node reset_boot_tree ( NT_node R, int n); -NT_node tree_dist2normalized_tree_dist ( NT_node R, float max); -NT_node reset_dist_tree ( NT_node R, float n); -NT_node* free_treelist ( NT_node *R); -NT_node free_tree ( NT_node R); -NT_node realloc_tree( NT_node R, int n); -NT_node free_tree_node ( NT_node R); - -Sequence * tree2seq (NT_node R, Sequence *S); -NT_node rename_seq_in_tree ( NT_node R, char ***list); - -NT_node balance_tree (NT_node); -int tree2nseq ( NT_node R); -int tree_file2nseq ( char *file); - -int tree2nleaf ( NT_node R); -int tree2nnode ( NT_node R); -int tree2_nnode_unresolved (NT_node R, int *l); - -FILE* display_tree ( NT_node R, int n, FILE *fp); -void clear_tree (NT_node T); -int display_leaf ( NT_node T, FILE *fp); -int display_leaf_below_node ( NT_node T, FILE *fp); -NT_node display_leaf_nb (NT_node T, int n, FILE *fp, char *name); -NT_node display_splits (NT_node T,Sequence *S, FILE *fp); -int tree2split_list (NT_node T, int nseq, int **split_list, int *n); - -NT_node reroot_tree ( NT_node TREE, NT_node T); -NT_node straighten_tree ( NT_node P, NT_node C, float new_dist); -NT_node unroot_tree ( NT_node T); -FILE* print_tree_list ( NT_node *T,char *format, FILE *fp); -FILE* print_tree ( NT_node T,char *format, FILE *fp); -char *tree2string (NT_node T); -char *tree2file (NT_node T, char *name, char *mode); - -int print_newick_tree ( NT_node T, char *name); -FILE * rec_print_tree ( NT_node T, FILE *fp); - - -NT_node find_longest_branch ( NT_node T, NT_node L); -NT_node shift_root ( NT_node R); - -int ** tree2cluster (NT_node T, float thres); -int ** make_sub_tree_list ( NT_node **T, int nseq, int n_node); -void make_all_sub_tree_list ( NT_node N, int **list, int *n); -void make_one_sub_tree_list ( NT_node T, int *list); -NT_node main_read_tree(char *treefile); - -NT_node new_read_tree ( char *teefile); -NT_node new_get_node (NT_node T, FILE *fp); - - -NT_node** simple_read_tree(char *treefile); -void free_read_tree (NT_node **BT); -NT_node** read_tree(char *treefile, int *nnodes,int nseq, char **seq_names); -FILE * create_linear_tree ( char **name, int n, FILE *fp); -FILE * create_tree(NT_node ptree, NT_node parent,int *numseq,int *ntotal,int *nnodes,NT_node **lu, FILE *fp); -NT_node declare_tree_node (int nseq); -void set_info(NT_node p, NT_node parent, int pleaf, char *pname, float pdist, float bootstrap); -NT_node insert_tree_node(NT_node pptr); -FILE * skip_space(FILE *fd); -void create_tree_node(NT_node pptr, NT_node parent); -float calc_mean(NT_node nptr, float *maxdist, int nseq,NT_node **lu); -NT_node insert_root(NT_node p, float diff); -float calc_root_mean(NT_node root, float *maxdist, int neq, NT_node **lu); -NT_node reroot(NT_node ptree, int nseq, int ntotal, int nnodes, NT_node **lu); - - -Alignment *seq2seq_chain (Alignment *A,Alignment *B, char *arg); - -float display_avg_bootstrap ( NT_node T); -float tree2tot_dist ( NT_node T, int mode); -int tree2n_branches(NT_node T, int mode); -int **display_tree_from_node (NT_node T, int up, int down, int **array); -NT_node tree2node ( char *name, NT_node T); -NT_node * tree2node_list (NT_node T, NT_node *L); -NT_node tree2root ( NT_node T); -int new_tree_sort ( char *name, NT_node T); - - -NT_node split2tree ( NT_node RT,Sequence *LIST, char *param); -NT_node * read_tree_list (Sequence *S); - -int count_groups( Sequence *S, char *s); - -Split ** count_splits( NT_node RT, Sequence *S, char *s); -NT_node *treelist2prune_treelist (Sequence *S, Sequence *TS, FILE *out); -int** treelist2groups (Sequence *S, Sequence *ST, char *depth, FILE *out); -int treelist2splits (Sequence *S, Sequence *ST); -int treelist2leafgroup ( Sequence *S, Sequence *TS, char *taxon); -int ***tree2dist ( NT_node T, Sequence *S, int ***d); -int treelist2frame (Sequence *S, Sequence *TS); -int** treelist2lti ( Sequence *S, Sequence *TS, int nb, FILE *out); - -float simple_tree_cmp (NT_node T1, NT_node T2,Sequence *S, int mode); - -int treelist2dmat ( Sequence *S); -NT_node new_declare_tree_node (); -int count_tree_groups( Sequence *LIST, char *group_file); -int node_sort ( char *name, NT_node T); -int treelist2n (NT_node *L); -int ** treelist2avg_treecmp (NT_node *L, char *file); -NT_node treelist2bootstrap ( NT_node *L, char *file); -NT_node treelist2filtered_bootstrap ( NT_node *L, char *file, int **score,float f); - -Sequence * treelist2seq ( Sequence *S); -Sequence * treelist2sub_seq ( Sequence *S, int f); - -/* General purpose header file - rf 12/90 */ - -#ifndef _H_general -#define _H_general - - - -#define pint int /* cast ints in printf statements as pint */ -typedef int Boolean; /* Is already defined in THINK_C */ - -#undef TRUE -#undef FALSE -#define TRUE 1 -#define FALSE 0 - -#define EOS '\0' /* End-Of-String */ -#define MAXLINE 512 /* Max. line length */ - - -#endif /* ifndef _H_general */ -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/makefile b/binaries/src/tcoffee/t_coffee_source/makefile deleted file mode 100644 index 4c17909..0000000 --- a/binaries/src/tcoffee/t_coffee_source/makefile +++ /dev/null @@ -1,11 +0,0 @@ - -t_coffee: util_constraints_list.o util_job_handling.o util_dps.o util_domain_constraints_list.o util_analyse_constraints_list.o util_aln_analyze.o aln_convertion_util.o util_declare.o hsearch.o random.o util_make_tree.o util.o reformat_struc.o reformat.o aln_compare.o io_func.o pb_util_read_sequence.o pb_util_read_seq_util.o tree_util.o util_graph_maln.o util_dp_clean_maln.o util_dp_ssec_pwaln.o util_dp_sim.o util_dp_mm_nw.o util_dp_gotoh_nw.o util_dp_suboptimal_nw.o util_dp_cdna_fasta_nw.o util_dp_generic_fasta_nw.o util_dp_fasta_nw.o util_dp_fasta_sw.o util_dp_gotoh_sw.o util_dp_est.o util_domain_dp_drivers.o util_dp_drivers.o util_domain_dp.o CUSTOM_evaluate_for_struc.o evaluate_for_struc.o evaluate_for_domain.o evaluate_dirichlet.o evaluate.o showpair.o fsa_dp.o pavie_dp.o dev1.o dev2.o dev3.o dev4.o fastal.o parttree.o t_coffee.o - $(CC) $(CFLAGS) -o t_coffee util_constraints_list.o util_job_handling.o util_dps.o util_domain_constraints_list.o util_analyse_constraints_list.o util_aln_analyze.o aln_convertion_util.o util_declare.o hsearch.o random.o util_make_tree.o util.o reformat_struc.o reformat.o aln_compare.o io_func.o pb_util_read_sequence.o pb_util_read_seq_util.o tree_util.o util_graph_maln.o util_dp_clean_maln.o util_dp_ssec_pwaln.o util_dp_sim.o util_dp_mm_nw.o util_dp_gotoh_nw.o util_dp_suboptimal_nw.o util_dp_cdna_fasta_nw.o util_dp_generic_fasta_nw.o util_dp_fasta_nw.o util_dp_fasta_sw.o util_dp_gotoh_sw.o util_dp_est.o util_domain_dp_drivers.o util_dp_drivers.o util_domain_dp.o CUSTOM_evaluate_for_struc.o evaluate_for_struc.o evaluate_for_domain.o evaluate_dirichlet.o evaluate.o showpair.o fsa_dp.o pavie_dp.o dev1.o dev2.o dev3.o dev4.o fastal.o parttree.o t_coffee.o -lm - -all: t_coffee TMalign - -clean: - rm *.o - -TMalign: - $(FCC) TMalign.f -o TMalign diff --git a/binaries/src/tcoffee/t_coffee_source/matrices.h b/binaries/src/tcoffee/t_coffee_source/matrices.h deleted file mode 100644 index 8340059..0000000 --- a/binaries/src/tcoffee/t_coffee_source/matrices.h +++ /dev/null @@ -1,960 +0,0 @@ - -char *amino_acid_order = "ABCDEFGHIKLMNPQRSTVWXYZ"; - -//Jones Taylor Thornton, tm matrix, -//H JOND940101 -//D The 250 PAM transmembrane protein exchange matrix (Jones et al., 1994) -//R LIT:2006072 PMID:8112466 -//A Jones, D.T., Taylor, W.R. and Thornton, J.M. -//T A mutation data matrix for transmembrane proteins -//J FEBS Lett. 339, 269-275 (1994) -int jtttm250mt[]={ - 2, - 0, 0, - 0, 0, 6, - 0, 0, -3, 12, - 0, 0, -3, 8, 13, - -2, 0, 1, -6, -6, 5, - 1, 0, -1, 3, 3, -4, 6, - -3, 0, -1, 3, 2, -3, -3, 11, - 0, 0, -1, -3, -4, -1, -2, -4, 2, - -2, 0, -3, 3, 1, -5, -1, 4, -4, 12, - -2, 0, -1, -5, -5, 1, -4, -4, 1, -4, 3, - -1, 0, -1, -3, -3, 0, -3, -3, 1, -1, 1, 3, - -1, 0, -1, 6, 1, -4, -2, 3, -3, 5, -4, -2, 11, - 0, 0, -4, -2, -3, -4, -2, -4, -3, -4, -1, -3, -2, 11, - -2, 0, -3, 2, 7, -4, -1, 7, -4, 6, -2, -2, 3, 0, 11, - -1, 0, -1, 1, 2, -4, 0, 5, -3, 9, -3, 0, 2, -3, 6, 7, - 2, 0, 1, 0, 0, -1, 1, -2, -1, -1, -2, -2, 2, -1, -1, -1, 3, - 1, 0, 0, 0, -1, -2, 0, -2, 0, -2, -1, 0, 1, -1, -2, -1, 2, 3, - 0, 0, 0, -3, -2, -1, -1, -4, 2, -4, 0, 1, -3, -3, -4, -2, -1, 0, 2, - -4, 0, 1, -4, -3, -3, -2, -1, -3, 3, -2, -2, -3, -6, 0, 5, -3, -4, -2, 12, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -3, 0, 3, -2, -5, 2, -5, 6, -4, 1, -3, -3, -1, -5, 0, -1, 0, -3, -4, -2, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - - -int blosum30mt[]={ - 4, - 0, 5, - -3, -2, 17, - 0, 5, -3, 9, - 0, 0, 1, 1, 6, - -2, -3, -3, -5, -4, 10, - 0, 0, -4, -1, -2, -3, 8, - -2, -2, -5, -2, 0, -3, -3, 14, - 0, -2, -2, -4, -3, 0, -1, -2, 6, - 0, 0, -3, 0, 2, -1, -1, -2, -2, 4, - -1, -1, 0, -1, -1, 2, -2, -1, 2, -2, 4, - 1, -2, -2, -3, -1, -2, -2, 2, 1, 2, 2, 6, - 0, 4, -1, 1, -1, -1, 0, -1, 0, 0, -2, 0, 8, - -1, -2, -3, -1, 1, -4, -1, 1, -3, 1, -3, -4, -3, 11, - 1, -1, -2, -1, 2, -3, -2, 0, -2, 0, -2, -1, -1, 0, 8, - -1, -2, -2, -1, -1, -1, -2, -1, -3, 1, -2, 0, -2, -1, 3, 8, - 1, 0, -2, 0, 0, -1, 0, -1, -1, 0, -2, -2, 0, -1, -1, -1, 4, - 1, 0, -2, -1, -2, -2, -2, -2, 0, -1, 0, 0, 1, 0, 0, -3, 2, 5, - 1, -2, -2, -2, -3, 1, -3, -3, 4, -2, 1, 0, -2, -4, -3, -1, -1, 1, 5, - -5, -5, -2, -4, -1, 1, 1, -5, -3, -2, -2, -3, -7, -3, -1, 0, -3, -5, -3, 20, - 0, -1, -2, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, -1, 0, -1, 0, 0, 0, -2, -1, - -4, -3, -6, -1, -2, 3, -3, 0, -1, -1, 3, -1, -4, -2, -1, 0, -2, -1, 1, 5, -1, 9, - 0, 0, 0, 0, 5, -4, -2, 0, -3, 1, -1, -1, -1, 0, 4, 0, -1, -1, -3, -1, 0, -2, 4}; - -int blosum40mt[]={ - 5, - -1, 5, - -2, -2, 16, - -1, 6, -2, 9, - -1, 1, -2, 2, 7, - -3, -3, -2, -4, -3, 9, - 1, -1, -3, -2, -3, -3, 8, - -2, 0, -4, 0, 0, -2, -2, 13, - -1, -3, -4, -4, -4, 1, -4, -3, 6, - -1, 0, -3, 0, 1, -3, -2, -1, -3, 6, - -2, -3, -2, -3, -2, 2, -4, -2, 2, -2, 6, - -1, -3, -3, -3, -2, 0, -2, 1, 1, -1, 3, 7, - -1, 4, -2, 2, -1, -3, 0, 1, -2, 0, -3, -2, 8, - -2, -2, -5, -2, 0, -4, -1, -2, -2, -1, -4, -2, -2, 11, - 0, 0, -4, -1, 2, -4, -2, 0, -3, 1, -2, -1, 1, -2, 8, - -2, -1, -3, -1, -1, -2, -3, 0, -3, 3, -2, -1, 0, -3, 2, 9, - 1, 0, -1, 0, 0, -2, 0, -1, -2, 0, -3, -2, 1, -1, 1, -1, 5, - 0, 0, -1, -1, -1, -1, -2, -2, -1, 0, -1, -1, 0, 0, -1, -2, 2, 6, - 0, -3, -2, -3, -3, 0, -4, -4, 4, -2, 2, 1, -3, -3, -3, -2, -1, 1, 5, - -3, -4, -6, -5, -2, 1, -2, -5, -3, -2, -1, -2, -4, -4, -1, -2, -5, -4, -3, 19, - 0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -2, -1, -1, 0, 0, -1, -2, -1, - -2, -3, -4, -3, -2, 4, -3, 2, 0, -1, 0, 1, -2, -3, -1, -1, -2, -1, -1, 3, -1, 9, - -1, 2, -3, 1, 5, -4, -2, 0, -4, 1, -2, -2, 0, -1, 4, 0, 0, -1, -3, -2, -1, -2, 5}; - -int blosum45mt[]={ - 5, - -1, 4, - -1, -2, 12, - -2, 5, -3, 7, - -1, 1, -3, 2, 6, - -2, -3, -2, -4, -3, 8, - 0, -1, -3, -1, -2, -3, 7, - -2, 0, -3, 0, 0, -2, -2, 10, - -1, -3, -3, -4, -3, 0, -4, -3, 5, - -1, 0, -3, 0, 1, -3, -2, -1, -3, 5, - -1, -3, -2, -3, -2, 1, -3, -2, 2, -3, 5, - -1, -2, -2, -3, -2, 0, -2, 0, 2, -1, 2, 6, - -1, 4, -2, 2, 0, -2, 0, 1, -2, 0, -3, -2, 6, - -1, -2, -4, -1, 0, -3, -2, -2, -2, -1, -3, -2, -2, 9, - -1, 0, -3, 0, 2, -4, -2, 1, -2, 1, -2, 0, 0, -1, 6, - -2, -1, -3, -1, 0, -2, -2, 0, -3, 3, -2, -1, 0, -2, 1, 7, - 1, 0, -1, 0, 0, -2, 0, -1, -2, -1, -3, -2, 1, -1, 0, -1, 4, - 0, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 2, 5, - 0, -3, -1, -3, -3, 0, -3, -3, 3, -2, 1, 1, -3, -3, -3, -2, -1, 0, 5, - -2, -4, -5, -4, -3, 1, -2, -3, -2, -2, -2, -2, -4, -3, -2, -2, -4, -3, -3, 15, - 0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, -1, -2, -1, - -2, -2, -3, -2, -2, 3, -3, 2, 0, -1, 0, 0, -2, -3, -1, -1, -2, -1, -1, 3, -1, 8, - -1, 2, -3, 1, 4, -3, -2, 0, -3, 1, -2, -1, 0, -1, 4, 0, 0, -1, -3, -2, -1, -2, 4}; - -int blosum50mt[] = { - 5, - -2, 5, - -1, -3, 13, - -2, 5, -4, 8, - -1, 1, -3, 2, 6, - -3, -4, -2, -5, -3, 8, - 0, -1, -3, -1, -3, -4, 8, - -2, 0, -3, -1, 0, -1, -2, 10, - -1, -4, -2, -4, -4, 0, -4, -4, 5, - -1, 0, -3, -1, 1, -4, -2, 0, -3, 6, - -2, -4, -2, -4, -3, 1, -4, -3, 2, -3, 5, - -1, -3, -2, -4, -2, 0, -3, -1, 2, -2, 3, 7, - -1, 4, -2, 2, 0, -4, 0, 1, -3, 0, -4, -2, 7, - -1, -2, -4, -1, -1, -4, -2, -2, -3, -1, -4, -3, -2, 10, - -1, 0, -3, 0, 2, -4, -2, 1, -3, 2, -2, 0, 0, -1, 7, - -2, -1, -4, -2, 0, -3, -3, 0, -4, 3, -3, -2, -1, -3, 1, 7, - 1, 0, -1, 0, -1, -3, 0, -1, -3, 0, -3, -2, 1, -1, 0, -1, 5, - 0, 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 2, 5, - 0, -4, -1, -4, -3, -1, -4, -4, 4, -3, 1, 1, -3, -3, -3, -3, -2, 0, 5, - -3, -5, -5, -5, -3, 1, -3, -3, -3, -3, -2, -1, -4, -4, -1, -3, -4, -3, -3, 15, - -1, -1, -2, -1, -1, -2, -2, -1, -1, -1, -1, -1, -1, -2, -1, -1, -1, 0, -1, -3, -1, - -2, -3, -3, -3, -2, 4, -3, 2, -1, -2, -1, 0, -2, -3, -1, -1, -2, -2, -1, 2, -1, 8, - -1, 2, -3, 1, 5, -4, -2, 0, -3, 1, -3, -1, 0, -1, 4, 0, 0, -1, -3, -2, -1, -2, 5}; - - -int blosum55mt[]={ - 5, - -2, 5, - 0, -4, 13, - -2, 5, -4, 8, - -1, 1, -4, 2, 7, - -3, -5, -3, -5, -4, 9, - 0, -1, -3, -2, -3, -4, 8, - -2, 0, -4, -1, -1, -1, -2, 11, - -2, -4, -2, -4, -4, 0, -5, -4, 6, - -1, 0, -4, -1, 1, -4, -2, 0, -4, 6, - -2, -4, -2, -5, -4, 1, -5, -3, 2, -3, 6, - -1, -3, -2, -4, -3, 0, -3, -2, 2, -2, 3, 8, - -2, 4, -3, 2, 0, -4, 0, 1, -4, 0, -4, -3, 8, - -1, -2, -3, -2, -1, -5, -3, -3, -3, -1, -4, -3, -2, 10, - -1, 0, -4, 0, 2, -4, -2, 1, -4, 2, -3, 0, 0, -1, 7, - -2, -1, -4, -2, 0, -3, -3, 0, -4, 3, -3, -2, -1, -3, 1, 8, - 2, 0, -1, 0, 0, -3, 0, -1, -3, 0, -3, -2, 1, -1, 0, -1, 5, - 0, -1, -1, -1, -1, -3, -2, -2, -1, -1, -2, -1, 0, -1, -1, -1, 2, 6, - 0, -4, -1, -4, -3, -1, -4, -4, 4, -3, 1, 1, -4, -3, -3, -3, -2, 0, 5, - -4, -5, -4, -5, -3, 2, -3, -3, -3, -4, -3, -2, -5, -5, -2, -3, -4, -3, -4, 15, - -1, -1, -2, -2, -1, -2, -2, -1, -1, -1, -1, -1, -1, -2, -1, -1, -1, -1, -1, -3, -1, - -2, -3, -3, -3, -2, 4, -4, 2, -1, -2, -1, -1, -2, -4, -1, -2, -2, -2, -2, 3, -1, 9, - -1, 2, -4, 1, 5, -4, -3, 0, -4, 1, -3, -2, 0, -1, 4, 0, 0, -1, -3, -3, -1, -2, 5}; - -int blosum62mt[]={ - 4, - -2, 4, - 0, -3, 9, - -2, 4, -3, 6, - -1, 1, -4, 2, 5, - -2, -3, -2, -3, -3, 6, - 0, -1, -3, -1, -2, -3, 6, - -2, 0, -3, -1, 0, -1, -2, 8, - -1, -3, -1, -3, -3, 0, -4, -3, 4, - -1, 0, -3, -1, 1, -3, -2, -1, -3, 5, - -1, -4, -1, -4, -3, 0, -4, -3, 2, -2, 4, - -1, -3, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, - -2, 3, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, - -1, -2, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, - -1, 0, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, - -1, -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, - 1, 0, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, - 0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, - 0, -3, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, - -3, -4, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11, - 0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, 0, 0, -1, -2, -1, - -2, -3, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, -1, 7, - -1, 1, -3, 1, 4, -3, -2, 0, -3, 1, -3, -1, 0, -1, 3, 0, 0, -1, -2, -3, -1, -2, 4}; - -int blosum62mt3[]={ - 0, - -2, 0, - 0, -3, 0, - -2, 4, -3, 0, - -1, 1, -4, 2, 0, - -2, -3, -2, -3, -3, 0, - 0, -1, -3, -1, -2, -3, 0, - -2, 0, -3, -1, 0, -1, -2, 0, - -1, -3, -1, -3, -3, 0, -4, -3, 0, - -1, 0, -3, -1, 1, -3, -2, -1, -3, 0, - -1, -4, -1, -4, -3, 0, -4, -3, 2, -2, 0, - -1, -3, -1, -3, -2, 0, -3, -2, 1, -1, 2, 0, - -2, 3, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 0, - -1, -2, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 0, - -1, 0, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 0, - -1, -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 0, - 1, 0, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 0, - 0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 0, - 0, -3, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 0, - -3, -4, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 0, - 0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, 0, 0, -1, -2, 0, - -2, -3, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, -1, 0, - -1, 1, -3, 1, 4, -3, -2, 0, -3, 1, -3, -1, 0, -1, 3, 0, 0, -1, -2, -3, -1, -2, 0}; - -int blosum62mt2[]={ - 0, - -2, 4, - 0, -3, 0, - -2, 4, -3, 2, - -1, 1, -4, 2, 2, - -2, -3, -2, -3, -3, 3, - 0, -1, -3, -1, -2, -3, 0, - -2, 0, -3, -1, 0, -1, -2, 2, - -1, -3, -1, -3, -3, 0, -4, -3, 2, - -1, 0, -3, -1, 1, -3, -2, -1, -3, 2, - -1, -4, -1, -4, -3, 0, -4, -3, 2, -2, 1, - -1, -3, -1, -3, -2, 0, -3, -2, 1, -1, 2, 0, - -2, 3, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 0, - -1, -2, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 0, - -1, 0, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 2, - -1, -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 0, - 1, 0, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 1, - 0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 1, - 0, -3, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 1, - -3, -4, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 1, - 0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, 0, 0, -1, -2, -1, - -2, -3, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, -1, 3, - -1, 1, -3, 1, 4, -3, -2, 0, -3, 1, -3, -1, 0, -1, 3, 0, 0, -1, -2, -3, -1, -2, 4}; - - -int blosum80mt[]={ - 7, - -3, 6, - -1, -6, 13, - -3, 6, -7, 10, - -2, 1, -7, 2, 8, - -4, -6, -4, -6, -6, 10, - 0, -2, -6, -3, -4, -6, 9, - -3, -1, -7, -2, 0, -2, -4, 12, - -3, -6, -2, -7, -6, -1, -7, -6, 7, - -1, -1, -6, -2, 1, -5, -3, -1, -5, 8, - -3, -7, -3, -7, -6, 0, -7, -5, 2, -4, 6, - -2, -5, -3, -6, -4, 0, -5, -4, 2, -3, 3, 9, - -3, 5, -5, 2, -1, -6, -1, 1, -6, 0, -6, -4, 9, - -1, -4, -6, -3, -2, -6, -5, -4, -5, -2, -5, -4, -4, 12, - -2, -1, -5, -1, 3, -5, -4, 1, -5, 2, -4, -1, 0, -3, 9, - -3, -2, -6, -3, -1, -5, -4, 0, -5, 3, -4, -3, -1, -3, 1, 9, - 2, 0, -2, -1, -1, -4, -1, -2, -4, -1, -4, -3, 1, -2, -1, -2, 7, - 0, -1, -2, -2, -2, -4, -3, -3, -2, -1, -3, -1, 0, -3, -1, -2, 2, 8, - -1, -6, -2, -6, -4, -2, -6, -5, 4, -4, 1, 1, -5, -4, -4, -4, -3, 0, 7, - -5, -8, -5, -8, -6, 0, -6, -4, -5, -6, -4, -3, -7, -7, -4, -5, -6, -5, -5, 16, - -1, -3, -4, -3, -2, -3, -3, -2, -2, -2, -2, -2, -2, -3, -2, -2, -1, -1, -2, -5, -2, - -4, -5, -5, -6, -5, 4, -6, 3, -3, -4, -2, -3, -4, -6, -3, -4, -3, -3, -3, 3, -3, 11, - -2, 0, -7, 1, 6, -6, -4, 0, -6, 1, -5, -3, -1, -2, 5, 0, -1, -2, -4, -5, -1, -4, 6}; - -int pam120mt[]={ - 3, - 0, 4, - -3, -6, 9, - 0, 4, -7, 5, - 0, 3, -7, 3, 5, - -4, -5, -6, -7, -7, 8, - 1, 0, -4, 0, -1, -5, 5, - -3, 1, -4, 0, -1, -3, -4, 7, - -1, -3, -3, -3, -3, 0, -4, -4, 6, - -2, 0, -7, -1, -1, -7, -3, -2, -3, 5, - -3, -4, -7, -5, -4, 0, -5, -3, 1, -4, 5, - -2, -4, -6, -4, -3, -1, -4, -4, 1, 0, 3, 8, - -1, 3, -5, 2, 1, -4, 0, 2, -2, 1, -4, -3, 4, - 1, -2, -4, -3, -2, -5, -2, -1, -3, -2, -3, -3, -2, 6, - -1, 0, -7, 1, 2, -6, -3, 3, -3, 0, -2, -1, 0, 0, 6, - -3, -2, -4, -3, -3, -5, -4, 1, -2, 2, -4, -1, -1, -1, 1, 6, - 1, 0, 0, 0, -1, -3, 1, -2, -2, -1, -4, -2, 1, 1, -2, -1, 3, - 1, 0, -3, -1, -2, -4, -1, -3, 0, -1, -3, -1, 0, -1, -2, -2, 2, 4, - 0, -3, -3, -3, -3, -3, -2, -3, 3, -4, 1, 1, -3, -2, -3, -3, -2, 0, 5, - -7, -6, -8, -8, -8, -1, -8, -3, -6, -5, -3, -6, -4, -7, -6, 1, -2, -6, -8, 12, - -1, -1, -4, -2, -1, -3, -2, -2, -1, -2, -2, -2, -1, -2, -1, -2, -1, -1, -1, -5, -2, - -4, -3, -1, -5, -5, 4, -6, -1, -2, -5, -2, -4, -2, -6, -5, -5, -3, -3, -3, -2, -3, 8, - -1, 2, -7, 3, 4, -6, -2, 1, -3, -1, -3, -2, 0, -1, 4, -1, -1, -2, -3, -7, -1, -5, 4}; - -int pam160mt[]={ - 2, - 0, 3, - -2, -4, 9, - 0, 3, -5, 4, - 0, 2, -5, 3, 4, - -3, -4, -5, -6, -5, 7, - 1, 0, -3, 0, 0, -4, 4, - -2, 1, -3, 0, 0, -2, -3, 6, - -1, -2, -2, -3, -2, 0, -3, -3, 5, - -2, 0, -5, 0, -1, -5, -2, -1, -2, 4, - -2, -4, -6, -4, -3, 1, -4, -2, 2, -3, 5, - -1, -3, -5, -3, -2, 0, -3, -3, 2, 0, 3, 7, - 0, 2, -4, 2, 1, -3, 0, 2, -2, 1, -3, -2, 3, - 1, -1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -2, -1, 5, - -1, 1, -5, 1, 2, -5, -2, 2, -2, 0, -2, -1, 0, 0, 5, - -2, -1, -3, -2, -2, -4, -3, 1, -2, 3, -3, -1, -1, -1, 1, 6, - 1, 0, 0, 0, 0, -3, 1, -1, -2, -1, -3, -2, 1, 1, -1, -1, 2, - 1, 0, -2, -1, -1, -3, -1, -2, 0, 0, -2, -1, 0, 0, -1, -1, 1, 3, - 0, -2, -2, -3, -2, -2, -2, -2, 3, -3, 1, 1, -2, -2, -2, -3, -1, 0, 4, - -5, -5, -7, -6, -7, -1, -7, -3, -5, -4, -2, -4, -4, -5, -5, 1, -2, -5, -6, 12, - 0, -1, -3, -1, -1, -3, -1, -1, -1, -1, -2, -1, 0, -1, -1, -1, 0, 0, -1, -4, -1, - -3, -3, 0, -4, -4, 5, -5, 0, -2, -4, -2, -3, -2, -5, -4, -4, -3, -3, -3, -1, -3, 8, - 0, 2, -5, 2, 3, -5, -1, 1, -2, 0, -3, -2, 1, -1, 3, 0, -1, -1, -2, -6, -1, -4, 3}; - -int pam250mt[]={ - 2, - 0, 3, - -2, -4, 12, - 0, 3, -5, 4, - 0, 3, -5, 3, 4, - -3, -4, -4, -6, -5, 9, - 1, 0, -3, 1, 0, -5, 5, - -1, 1, -3, 1, 1, -2, -2, 6, - -1, -2, -2, -2, -2, 1, -3, -2, 5, - -1, 1, -5, 0, 0, -5, -2, 0, -2, 5, - -2, -3, -6, -4, -3, 2, -4, -2, 2, -3, 6, - -1, -2, -5, -3, -2, 0, -3, -2, 2, 0, 4, 6, - 0, 2, -4, 2, 1, -3, 0, 2, -2, 1, -3, -2, 2, - 1, -1, -3, -1, -1, -5, 0, 0, -2, -1, -3, -2, 0, 6, - 0, 1, -5, 2, 2, -5, -1, 3, -2, 1, -2, -1, 1, 0, 4, - -2, -1, -4, -1, -1, -4, -3, 2, -2, 3, -3, 0, 0, 0, 1, 6, - 1, 0, 0, 0, 0, -3, 1, -1, -1, 0, -3, -2, 1, 1, -1, 0, 2, - 1, 0, -2, 0, 0, -3, 0, -1, 0, 0, -2, -1, 0, 0, -1, -1, 1, 3, - 0, -2, -2, -2, -2, -1, -1, -2, 4, -2, 2, 2, -2, -1, -2, -2, -1, 0, 4, - -6, -5, -8, -7, -7, 0, -7, -3, -5, -3, -2, -4, -4, -6, -5, 2, -2, -5, -6, 17, - 0, -1, -3, -1, -1, -2, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, 0, 0, -1, -4, -1, - -3, -3, 0, -4, -4, 7, -5, 0, -1, -4, -1, -2, -2, -5, -4, -4, -3, -3, -2, 0, -2, 10, - 0, 2, -5, 3, 3, -5, 0, 2, -2, 0, -3, -2, 1, 0, 3, 0, 0, -1, -2, -6, -1, -4, 3}; - -int pam350mt[]={ - 2, - 1, 3, - -2, -5, 18, - 1, 3, -6, 4, - 1, 3, -6, 4, 4, - -4, -5, -5, -6, -6, 13, - 2, 1, -4, 1, 1, -6, 5, - -1, 1, -4, 1, 1, -2, -2, 7, - 0, -2, -3, -2, -2, 2, -2, -2, 5, - -1, 1, -6, 1, 0, -6, -1, 1, -2, 5, - -2, -4, -7, -4, -4, 3, -4, -2, 4, -3, 8, - -1, -2, -6, -3, -2, 1, -3, -2, 3, 0, 5, 6, - 0, 2, -4, 2, 2, -4, 1, 2, -2, 1, -3, -2, 2, - 1, 0, -3, 0, 0, -5, 0, 0, -2, -1, -3, -2, 0, 6, - 0, 2, -6, 2, 3, -5, -1, 3, -2, 1, -2, -1, 1, 1, 4, - -1, 0, -4, -1, 0, -5, -2, 2, -2, 4, -3, 0, 1, 0, 2, 7, - 1, 1, 0, 1, 0, -4, 1, -1, -1, 0, -3, -2, 1, 1, 0, 0, 1, - 1, 0, -2, 0, 0, -3, 1, -1, 0, 0, -2, -1, 1, 1, 0, -1, 1, 2, - 0, -2, -2, -2, -2, -1, -1, -2, 4, -2, 3, 2, -2, -1, -2, -3, -1, 0, 5, - -7, -6,-10, -8, -8, 1, -8, -3, -6, -4, -2, -5, -5, -7, -5, 4, -3, -6, -7, 27, - 0, 0, -3, -1, 0, -2, -1, 0, 0, -1, -1, 0, 0, 0, 0, -1, 0, 0, 0, -5, -1, - -4, -4, 1, -5, -5, 11, -6, 0, 0, -5, 0, -2, -3, -6, -5, -5, -3, -3, -2, 1, -2, 14, - 0, 2, -6, 3, 3, -6, 0, 2, -2, 1, -3, -2, 2, 0, 3, 1, 0, 0, -2, -7, 0, -5, 3}; - -int md_40mt[]={ - 9, - 0, 0, - -7, 0, 16, - -6, 0,-13, 11, - -5, 0,-15, 3, 11, --11, 0, -5,-15,-16, 13, - -3, 0, -7, -4, -4,-15, 10, - -9, 0, -6, -4, -8, -7,-10, 14, - -6, 0,-11,-12,-12, -5,-13,-11, 11, - -8, 0,-12, -8, -3,-16, -9, -6,-11, 11, - -9, 0,-10,-14,-13, -1,-14, -7, -1,-12, 9, - -6, 0, -9,-12,-11, -7,-12, -9, 1, -7, 1, 14, - -6, 0, -8, 1, -5,-12, -5, 0, -8, -1,-12, -9, 12, - -2, 0,-11,-11,-11,-11, -9, -4,-11,-10, -5,-10, -9, 12, - -7, 0,-12, -6, 0,-14, -9, 2,-12, -1, -6, -8, -5, -3, 12, - -7, 0, -5,-10, -8,-15, -4, 0,-10, 3, -9, -8, -6, -6, 0, 11, - 0, 0, -2, -6, -8, -6, -2, -6, -8, -7, -7, -8, 1, -1, -7, -5, 9, - 1, 0, -7, -8, -8,-11, -7, -7, -2, -5, -9, -2, -2, -4, -7, -6, 1, 10, - -1, 0, -7, -9, -8, -6, -8,-12, 4,-12, -2, 0,-10, -9,-11,-11, -7, -4, 10, --14, 0, -4,-15,-15, -7, -7,-13,-13,-13, -8,-11,-14,-14,-11, -4, -9,-12,-10, 18, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, --13, 0, -2, -8,-14, 2,-13, 2, -9,-13, -9,-11, -6,-13, -9,-10, -7,-10,-11, -6, 0, 14, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - -int md_120mt[]={ - 6, - 0, 0, - -3, 0, 14, - -2, 0, -7, 8, - -2, 0, -8, 5, 8, - -6, 0, -2, -9,-10, 11, - 0, 0, -3, 0, -1, -9, 8, - -4, 0, -2, -1, -3, -2, -4, 11, - -1, 0, -5, -7, -7, -1, -6, -6, 7, - -4, 0, -6, -2, 0, -9, -4, -1, -6, 8, - -4, 0, -5, -8, -8, 2, -8, -4, 2, -6, 7, - -2, 0, -5, -7, -6, -2, -6, -5, 3, -4, 3, 10, - -1, 0, -3, 3, -1, -6, -1, 2, -4, 1, -6, -5, 8, - 0, 0, -5, -5, -5, -5, -4, -1, -5, -4, -2, -5, -3, 9, - -3, 0, -6, -1, 2, -7, -4, 4, -6, 2, -3, -4, -1, 0, 9, - -3, 0, -2, -4, -3, -8, -1, 2, -6, 4, -5, -4, -2, -2, 2, 8, - 2, 0, 0, -2, -3, -3, 0, -2, -3, -3, -3, -3, 2, 1, -3, -2, 5, - 2, 0, -3, -3, -4, -6, -2, -3, 0, -2, -4, 0, 1, 0, -3, -3, 2, 6, - 1, 0, -3, -5, -5, -2, -4, -6, 5, -6, 1, 2, -5, -4, -6, -6, -3, 0, 7, - -8, 0, 0, -9, -9, -3, -3, -6, -7, -6, -4, -6, -8, -8, -6, -1, -5, -7, -6, 17, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -7, 0, 2, -4, -7, 5, -8, 4, -5, -7, -4, -6, -2, -7, -4, -5, -3, -6, -6, -2, 0, 12, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - -int md_250mt[]={ - 2, - 0, 0, - -1, 0, 11, - -1, 0, -3, 5, - -1, 0, -4, 4, 5, - -3, 0, 0, -5, -5, 8, - 1, 0, -1, 1, 1, -5, 5, - -2, 0, 0, 0, 0, 0, -2, 6, - 0, 0, -2, -3, -3, 0, -3, -3, 4, - -1, 0, -3, 0, 1, -5, -1, 1, -3, 5, - -1, 0, -2, -4, -4, 2, -4, -2, 2, -3, 5, - 0, 0, -2, -3, -3, 0, -3, -2, 3, -2, 3, 6, - 0, 0, -1, 2, 1, -3, 0, 1, -2, 1, -3, -2, 3, - 1, 0, -2, -2, -2, -2, -1, 0, -2, -1, 0, -2, -1, 6, - -1, 0, -3, 0, 2, -4, -1, 3, -3, 2, -2, -2, 0, 0, 5, - -1, 0, -1, -1, 0, -4, 0, 2, -3, 4, -3, -2, 0, -1, 2, 5, - 1, 0, 1, 0, -1, -2, 1, -1, -1, -1, -2, -1, 1, 1, -1, -1, 2, - 2, 0, -1, -1, -1, -2, 0, -1, 1, -1, -1, 0, 1, 1, -1, -1, 1, 2, - 1, 0, -2, -3, -2, 0, -2, -3, 4, -3, 2, 2, -2, -1, -3, -3, -1, 0, 4, - -4, 0, 1, -5, -5, -1, -1, -3, -4, -3, -2, -3, -4, -4, -3, 0, -3, -4, -3, 15, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -3, 0, 2, -2, -4, 5, -4, 4, -2, -3, -1, -3, -1, -3, -2, -2, -1, -3, -3, 0, 0, 9, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - -int md_350mt[]={ - 1, - 0, 0, - 0, 0, 9, - 0, 0, -2, 3, - 0, 0, -2, 3, 3, - -2, 0, 1, -3, -4, 6, - 1, 0, 0, 1, 1, -3, 4, - -1, 0, 0, 0, 0, 0, -1, 3, - 0, 0, -1, -2, -2, 1, -2, -2, 3, - -1, 0, -1, 0, 1, -3, 0, 1, -2, 3, - -1, 0, -1, -3, -3, 2, -2, -1, 2, -2, 3, - 0, 0, -1, -2, -2, 1, -2, -1, 2, -2, 2, 3, - 0, 0, -1, 1, 1, -2, 0, 1, -1, 1, -2, -1, 2, - 1, 0, -1, -1, -1, -2, -1, 0, -1, -1, 0, -1, 0, 4, - -1, 0, -2, 1, 1, -2, 0, 2, -2, 2, -1, -1, 0, 0, 3, - -1, 0, 0, 0, 0, -3, 0, 1, -2, 3, -2, -1, 0, 0, 2, 3, - 1, 0, 0, 0, 0, -1, 1, 0, -1, 0, -1, -1, 1, 1, 0, 0, 1, - 1, 0, 0, 0, -1, -1, 0, -1, 0, 0, -1, 0, 0, 1, -1, 0, 1, 1, - 0, 0, -1, -2, -2, 0, -1, -2, 2, -2, 1, 2, -1, -1, -2, -2, 0, 0, 2, - -3, 0, 1, -4, -3, 0, -1, -2, -3, -2, -1, -2, -3, -3, -2, 0, -2, -3, -2, 14, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -2, 0, 2, -2, -2, 5, -3, 3, -1, -2, 0, -1, -1, -2, -1, -1, -1, -2, -2, 0, 0, 7, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - -int idmat[]={ -10, - 0, 10, - 0, 0, 10, - 0, 0, 0, 10, - 0, 0, 0, 0, 10, - 0, 0, 0, 0, 0, 10, - 0, 0, 0, 0, 0, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10}; - -int dna_idmat[]={ -10, - -1, 10, - -1, -1, 10, - -1, -1, -1, 10, - -1, -1, -1, -1, 10, - -1, -1, -1, -1, -1, 10, - -1, -1, -1, -1, -1, -1, 10, - -1, -1, -1, -1, -1, -1, -1, 10, - -1, -1, -1, -1, -1, -1, -1, -1, 10, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,10, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,10, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,10, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,10, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,10}; - -int est_idmat[]={ -10, - -10, 10, - -10, -10, 10, - -10, -10, -10, 10, - -10, -10, -10, -10, 10, - -10, -10, -10, -10, -10, 10, - -10, -10, -10, -10, -10, -10, 10, - -10, -10, -10, -10, -10, -10, -10, 10, - -10, -10, -10, -10, -10, -10, -10, -10, 10, - -10, -10, -10, -10, -10, -10, -10, -10, -10, 10, - -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, 10, - -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, 10, - -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, 10, - -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, 10, - -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, 10, - -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, 10, - -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, 10, - -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, 10, - -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10,10, - -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10,10, - -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10,10, - -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10,10, - -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10,10}; - -/*These are the three structure specific matrices descibed by Luthy, R., McLachlan, A.D. and Eisenberg, D. - in Proteins 10, 229-239 (1991), taken from http://www.genome.ad.jp/dbget/dbget2.html - ID: coil :LUTR910107 - ID: alpha:LUTR910108 - ID: beta :LUTR910109 -*/ -int coil_mat []={ - 11, - 0, 0, - 3, 0, 108, - 2, 0, -5, 27, - 2, 0, 7, -9, 15, --15, 0, 3, -11, -4, 83, - 1, 0, 4, -15, -4, 2, 41, - 5, 0, 4, 108, -6, -13, -3, 55, - 27, 0, -11, 3, -2, -14, 0, -1, 49, - -9, 0, 3, 4, -21, -2, -3, -4, -7, 38, - -5, 0, 14, 7, -5, 2, 2, -6, -3, 1, 48, - -9, 0, 3, 4, -6, -16, -23, -9, -7, -1, 7, 52, - 28, 0, 1, 2, 27, 7, -5, -9, -9, 108, -15, 3, 10, -108, 0, 8, 14, 41, 0, -2, -4, -10, -3, 1, -15, 4, 58, - 10, 0, 3, -9, 3, -4, 3, 3, 8, 6, -15, 15, 5, -6, 14, - 1, 0, 10, 3, 1, 108, 5, 27, -5, -9, -9, -15, 2, 3, 2, 28, - 3, 0, -15, 3, -3, -8, -12, -5, -6, -9, -19, -14, 4, 4, -2, 7, 19, - 7, 0, 6, 3, 0, -7, 55, -18, -8, -7, 6, -20, -11, 2, -5, 4, 3, 31, --11, 0, -6, -15, -23, -6, -4, -3, 19, -15, -6, -22, 3, -11, -4, 14, -1, -7, 37, - 4, 0, 15, 3, 2, -8, -1, -12, -22, -2, 5, -19, 14, 4, -21, 4, 0, -9, -1, 129, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 0, -4, 8, -3, -30, -6, 49, -10, -7, -12, -18, 3, 3, -6, -11, 3, 2, 2, -5, 0, 59, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - -int alpha_mat []={ - 23, - 0, 0, - 5, 0, 51, - 1, 0, 4, 28, - 0, 0, 5, 1, 23, - -4, 0, 5, 1, 3, 88, - 3, 0, 5, -4, 2, 4, 37, - 7, 0, 6, 51, 3, 5, -6, 52, - 28, 0, 1, 1, 6, -21, 2, -6, 38, - 1, 0, 0, 5, 4, -2, 3, -7, -10, 34, - 4, 0, 11, 5, 4, 3, 5, -6, -19, -6, 45, - 1, 0, 4, 6, 3, -23, -5, -12, -14, -4, 9, 40, - 19, 0, 3, 0, 28, 5, 4, 1, 1, 51, -4, 1, 13, - 51, 0, 10, 11, 37, 1, 1, -6, -8, -5, 2, -22, 5, 59, - 13, 0, 1, 1, 0, 2, 4, 5, 10, 7, -5, 23, 7, 3, 11, - -1, 0, 13, 5, 3, 51, 7, 28, 4, 1, 1, -4, 1, 1, 0, 19, - 1, 0, -5, 0, -6, 2, -1, -10, -14, -9, -6, -22, 6, 3, 6, 5, 17, - 5, 0, 7, 4, 2, 3, 52, -8, -18, 3, 4, -24, 1, 4, 4, 5, 2, 14, - 1, 0, 3, -5, -5, 5, -7, -19, 7, -6, -2, -28, 4, 7, 3, 11, 2, -38, 40, - 5, 0, 23, 5, 5, 5, -6, -9, -19, -2, 3, -4, 11, 5, 4, 6, 2, -43, -8, 162, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 6, 0, 2, 10, 3, -2, -6, 38, -14, -7, -1, -19, 0, 5, 3, 1, 4, -29, -6, -19, 0, 97, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - -int beta_mat []={ - 23, - 0, 0, - 4, 0, 97, - 1, 0, -5, 49, - -1, 0, 9, -5, 31, --16, 0, 8, -11, 6, 57, - 2, 0, 7, -16, 12, 8, 54, - 13, 0, 9, 97, 2, -30, -4, 55, - 49, 0, -11, 0, 0, -22, 2, -7, 28, --19, 0, 0, 7, -18, 2, 17, -12, -10, 34, - -5, 0, 31, 9, 1, 7, 13, -9, -7, -3, 34, - -5, 0, 2, 9, 4, -9, -12, -17, -13, 1, 14, 25, - 40, 0, 2, -1, 49, 9, -5, -19, -5, 97, -16, 0, 15, - 97, 0, 17, 31, 54, 5, 5, -13, -34, -2, 8, -11, 7, 78, - 15, 0, 0, -19, 0, 12, 2, 8, 17, 11, -21, 31, 13, 2, 31, - -2, 0, 15, 4, 2, 97, 13, 49, -5, -5, -19, -16, 1, 0, -1, 40, - 0, 0, -21, 0, -4, -6, -5, -13, -12, -5, -23, -9, 9, 7, 0, 9, 26, - 9, 0, 11, 2, 2, 2, 55, -11, -14, -28, 12, -21, -11, 1, 1, 7, 4, 21, --11, 0, 2, -21, -12, -2, -12, -7, 13, 2, -1, -13, 2, -10, 6, 31, 2, -22, 23, - 7, 0, 31, 8, 13, -7, -7, -15, -15, -3, 10, -31, 31, 6, -18, 9, 6, -17, -5, 133, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 9, 0, 12, 17, 17, -12, -9, 28, -13, -3, 0, -12, 0, 8, 4, -11, 2, -5, -5, -5, 0, 47, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -/*EXPERIMENTAL MATRICES: Blosum Series*/ -/*Blosum Series rescaled on blosum62mt, -Lambda Blosum30mt=17 -Lambda Blosum62mt=10 -Lambda Blosum80mt=4.5 -*/ - -int rblosum30mt []={ - 52, - 33, 56, - 18, 23, 113, - 33, 56, 18, 75, - 33, 33, 37, 37, 61, - 23, 18, 18, 9, 14, 80, - 33, 33, 14, 28, 23, 18, 71, - 23, 23, 9, 23, 33, 18, 18, 99, - 33, 23, 23, 14, 18, 33, 28, 23, 61, - 33, 33, 18, 33, 42, 28, 28, 23, 23, 52, - 28, 28, 33, 28, 28, 42, 23, 28, 42, 23, 52, - 37, 23, 23, 18, 28, 23, 23, 42, 37, 42, 42, 61, - 33, 52, 28, 37, 28, 28, 33, 28, 33, 33, 23, 33, 71, - 28, 23, 18, 28, 37, 14, 28, 37, 18, 37, 18, 14, 18, 85, - 37, 28, 23, 28, 42, 18, 23, 33, 23, 33, 23, 28, 28, 33, 71, - 28, 23, 23, 28, 28, 28, 23, 28, 18, 37, 23, 33, 23, 28, 47, 71, - 37, 33, 23, 33, 33, 28, 33, 28, 28, 33, 23, 23, 33, 28, 28, 28, 52, - 37, 33, 23, 28, 23, 23, 23, 23, 33, 28, 33, 33, 37, 33, 33, 18, 42, 56, - 37, 23, 23, 23, 18, 37, 18, 18, 52, 23, 37, 33, 23, 14, 18, 28, 28, 37, 56, - 9, 9, 23, 14, 28, 37, 37, 9, 18, 23, 23, 18, 0, 18, 28, 33, 18, 9, 18, 128, - 0, -1, -2, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, -1, 0, -1, 0, 0, 0, -2, -1, - -4, -3, -6, -1, -2, 3, -3, 0, -1, -1, 3, -1, -4, -2, -1, 0, -2, -1, 1, 5, -1, 9, - 0, 0, 0, 0, 5, -4, -2, 0, -3, 1, -1, -1, -1, 0, 4, 0, -1, -1, -3, -1, 0, -2, 4, - - -}; - - - -int rblosum62mt[]={ - 80, - 20, 80, - 40, 10, 130, - 20, 80, 10, 100, - 30, 50, 0, 60, 90, - 20, 10, 20, 10, 10, 100, - 40, 30, 10, 30, 20, 10, 100, - 20, 40, 10, 30, 40, 30, 20, 120, - 30, 10, 30, 10, 10, 40, 0, 10, 80, - 30, 40, 10, 30, 50, 10, 20, 30, 10, 90, - 30, 0, 30, 0, 10, 40, 0, 10, 60, 20, 80, - 30, 10, 30, 10, 20, 40, 10, 20, 50, 30, 60, 90, - 20, 70, 10, 50, 40, 10, 40, 50, 10, 40, 10, 20, 100, - 30, 20, 10, 30, 30, 0, 20, 20, 10, 30, 10, 20, 20, 110, - 30, 40, 10, 40, 60, 10, 20, 40, 10, 50, 20, 40, 40, 30, 90, - 30, 30, 10, 20, 40, 10, 20, 40, 10, 60, 20, 30, 40, 20, 50, 90, - 50, 40, 30, 40, 40, 20, 40, 30, 20, 40, 20, 30, 50, 30, 40, 30, 80, - 40, 30, 30, 30, 30, 20, 20, 20, 30, 30, 30, 30, 40, 30, 30, 30, 50, 90, - 40, 10, 30, 10, 20, 30, 10, 10, 70, 20, 50, 50, 10, 20, 20, 10, 20, 40, 80, - 10, 0, 20, 0, 10, 50, 20, 20, 10, 10, 20, 30, 0, 0, 20, 10, 10, 20, 10, 150, - 0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, 0, 0, -1, -2, -1, - -2, -3, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, -1, 7, - -1, 1, -3, 1, 4, -3, -2, 0, -3, 1, -3, -1, 0, -1, 3, 0, 0, -1, -2, -3, -1, -2, 4, - - }; -int rblosum80mt[]={ -77, - 25, 72, - 36, 10, 108, - 25, 72, 5, 93, - 31, 46, 5, 51, 82, - 20, 10, 20, 10, 10, 93, - 41, 31, 10, 25, 20, 10, 87, - 25, 36, 5, 31, 41, 31, 20, 103, - 25, 10, 31, 5, 10, 36, 5, 10, 77, - 36, 36, 10, 31, 46, 15, 25, 36, 15, 82, - 25, 5, 25, 5, 10, 41, 5, 15, 51, 20, 72, - 31, 15, 25, 10, 20, 41, 15, 20, 51, 25, 56, 87, - 25, 67, 15, 51, 36, 10, 36, 46, 10, 41, 10, 20, 87, - 36, 20, 10, 25, 31, 10, 15, 20, 15, 31, 15, 20, 20, 103, - 31, 36, 15, 36, 56, 15, 20, 46, 15, 51, 20, 36, 41, 25, 87, - 25, 31, 10, 25, 36, 15, 20, 41, 15, 56, 20, 25, 36, 25, 46, 87, - 51, 41, 31, 36, 36, 20, 36, 31, 20, 36, 20, 25, 46, 31, 36, 31, 77, - 41, 36, 31, 31, 31, 20, 25, 25, 31, 36, 25, 36, 41, 25, 36, 31, 51, 82, - 36, 10, 31, 10, 20, 31, 10, 15, 62, 20, 46, 46, 15, 20, 20, 20, 25, 41, 77, - 15, 0, 15, 0, 10, 41, 10, 20, 15, 10, 20, 25, 5, 5, 20, 15, 10, 15, 15, 124, - -1, -3, -4, -3, -2, -3, -3, -2, -2, -2, -2, -2, -2, -3, -2, -2, -1, -1, -2, -5, -2, - -4, -5, -5, -6, -5, 4, -6, 3, -3, -4, -2, -3, -4, -6, -3, -4, -3, -3, -3, 3, -3, 11, - -2, 0, -7, 1, 6, -6, -4, 0, -6, 1, -5, -3, -1, -2, 5, 0, -1, -2, -4, -5, -1, -4, 6, -}; -int rpam120mt[]={ - 52, - 38, 57, - 24, 9, 81, - 38, 57, 4, 62, - 38, 52, 4, 52, 62, - 19, 14, 9, 4, 4, 77, - 43, 38, 19, 38, 33, 14, 62, - 24, 43, 19, 38, 33, 24, 19, 72, - 33, 24, 24, 24, 24, 38, 19, 19, 67, - 28, 38, 4, 33, 33, 4, 24, 28, 24, 62, - 24, 19, 4, 14, 19, 38, 14, 24, 43, 19, 62, - 28, 19, 9, 19, 24, 33, 19, 19, 43, 38, 52, 77, - 33, 52, 14, 48, 43, 19, 38, 48, 28, 43, 19, 24, 57, - 43, 28, 19, 24, 28, 14, 28, 33, 24, 28, 24, 24, 28, 67, - 33, 38, 4, 43, 48, 9, 24, 52, 24, 38, 28, 33, 38, 38, 67, - 24, 28, 19, 24, 24, 14, 19, 43, 28, 48, 19, 33, 33, 33, 43, 67, - 43, 38, 38, 38, 33, 24, 43, 28, 28, 33, 19, 28, 43, 43, 28, 33, 52, - 43, 38, 24, 33, 28, 19, 33, 24, 38, 33, 24, 33, 38, 33, 28, 28, 48, 57, - 38, 24, 24, 24, 24, 24, 28, 24, 52, 19, 43, 43, 24, 28, 24, 24, 28, 38, 62, - 4, 9, 0, 0, 0, 33, 0, 24, 9, 14, 24, 9, 19, 4, 9, 43, 28, 9, 0, 96, - -1, -1, -4, -2, -1, -3, -2, -2, -1, -2, -2, -2, -1, -2, -1, -2, -1, -1, -1, -5, -2, - -4, -3, -1, -5, -5, 4, -6, -1, -2, -5, -2, -4, -2, -6, -5, -5, -3, -3, -3, -2, -3, 8, - -1, 2, -7, 3, 4, -6, -2, 1, -3, -1, -3, -2, 0, -1, 4, -1, -1, -2, -3, -7, -1, -5, 4, -}; -int rpam160mt[]={ - 47, - 36, 52, - 26, 15, 83, - 36, 52, 10, 57, - 36, 47, 10, 52, 57, - 20, 15, 10, 5, 10, 73, - 41, 36, 20, 36, 36, 15, 57, - 26, 41, 20, 36, 36, 26, 20, 68, - 31, 26, 26, 20, 26, 36, 20, 20, 62, - 26, 36, 10, 36, 31, 10, 26, 31, 26, 57, - 26, 15, 5, 15, 20, 41, 15, 26, 47, 20, 62, - 31, 20, 10, 20, 26, 36, 20, 20, 47, 36, 52, 73, - 36, 47, 15, 47, 41, 20, 36, 47, 26, 41, 20, 26, 52, - 41, 31, 20, 26, 31, 15, 31, 31, 26, 26, 20, 26, 31, 62, - 31, 41, 10, 41, 47, 10, 26, 47, 26, 36, 26, 31, 36, 36, 62, - 26, 31, 20, 26, 26, 15, 20, 41, 26, 52, 20, 31, 31, 31, 41, 68, - 41, 36, 36, 36, 36, 20, 41, 31, 26, 31, 20, 26, 41, 41, 31, 31, 47, - 41, 36, 26, 31, 31, 20, 31, 26, 36, 36, 26, 31, 36, 36, 31, 31, 41, 52, - 36, 26, 26, 20, 26, 26, 26, 26, 52, 20, 41, 41, 26, 26, 26, 20, 31, 36, 57, - 10, 10, 0, 5, 0, 31, 0, 20, 10, 15, 26, 15, 15, 10, 10, 41, 26, 10, 5, 99, - 0, -1, -3, -1, -1, -3, -1, -1, -1, -1, -2, -1, 0, -1, -1, -1, 0, 0, -1, -4, -1, - -3, -3, 0, -4, -4, 5, -5, 0, -2, -4, -2, -3, -2, -5, -4, -4, -3, -3, -3, -1, -3, 8, - 0, 2, -5, 2, 3, -5, -1, 1, -2, 0, -3, -2, 1, -1, 3, 0, -1, -1, -2, -6, -1, -4, 3, -}; -int rpam250mt[]={ - 42, - 34, 47, - 25, 17, 85, - 34, 47, 12, 51, - 34, 47, 12, 47, 51, - 21, 17, 17, 8, 12, 72, - 38, 34, 21, 38, 34, 12, 55, - 30, 38, 21, 38, 38, 25, 25, 60, - 30, 25, 25, 25, 25, 38, 21, 25, 55, - 30, 38, 12, 34, 34, 12, 25, 34, 25, 55, - 25, 21, 8, 17, 21, 42, 17, 25, 42, 21, 60, - 30, 25, 12, 21, 25, 34, 21, 25, 42, 34, 51, 60, - 34, 42, 17, 42, 38, 21, 34, 42, 25, 38, 21, 25, 42, - 38, 30, 21, 30, 30, 12, 34, 34, 25, 30, 21, 25, 34, 60, - 34, 38, 12, 42, 42, 12, 30, 47, 25, 38, 25, 30, 38, 34, 51, - 25, 30, 17, 30, 30, 17, 21, 42, 25, 47, 21, 34, 34, 34, 38, 60, - 38, 34, 34, 34, 34, 21, 38, 30, 30, 34, 21, 25, 38, 38, 30, 34, 42, - 38, 34, 25, 34, 34, 21, 34, 30, 34, 34, 25, 30, 34, 34, 30, 30, 38, 47, - 34, 25, 25, 25, 25, 30, 30, 25, 51, 25, 42, 42, 25, 30, 25, 25, 30, 34, 51, - 8, 12, 0, 4, 4, 34, 4, 21, 12, 21, 25, 17, 17, 8, 12, 42, 25, 12, 8, 107, - 0, -1, -3, -1, -1, -2, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, 0, 0, -1, -4, -1, - -3, -3, 0, -4, -4, 7, -5, 0, -1, -4, -1, -2, -2, -5, -4, -4, -3, -3, -2, 0, -2, 10, - 0, 2, -5, 3, 3, -5, 0, 2, -2, 0, -3, -2, 1, 0, 3, 0, 0, -1, -2, -6, -1, -4, 3, -}; -int rpam350mt[]={ - 39, - 36, 43, - 26, 16, 92, - 36, 43, 13, 46, - 36, 43, 13, 46, 46, - 19, 16, 16, 13, 13, 76, - 39, 36, 19, 36, 36, 13, 49, - 29, 36, 19, 36, 36, 26, 26, 56, - 33, 26, 23, 26, 26, 39, 26, 26, 49, - 29, 36, 13, 36, 33, 13, 29, 36, 26, 49, - 26, 19, 9, 19, 19, 43, 19, 26, 46, 23, 59, - 29, 26, 13, 23, 26, 36, 23, 26, 43, 33, 49, 53, - 33, 39, 19, 39, 39, 19, 36, 39, 26, 36, 23, 26, 39, - 36, 33, 23, 33, 33, 16, 33, 33, 26, 29, 23, 26, 33, 53, - 33, 39, 13, 39, 43, 16, 29, 43, 26, 36, 26, 29, 36, 36, 46, - 29, 33, 19, 29, 33, 16, 26, 39, 26, 46, 23, 33, 36, 33, 39, 56, - 36, 36, 33, 36, 33, 19, 36, 29, 29, 33, 23, 26, 36, 36, 33, 33, 36, - 36, 33, 26, 33, 33, 23, 36, 29, 33, 33, 26, 29, 36, 36, 33, 29, 36, 39, - 33, 26, 26, 26, 26, 29, 29, 26, 46, 26, 43, 39, 26, 29, 26, 23, 29, 33, 49, - 9, 13, 0, 6, 6, 36, 6, 23, 13, 19, 26, 16, 16, 9, 16, 46, 23, 13, 9, 122, - 0, 0, -3, -1, 0, -2, -1, 0, 0, -1, -1, 0, 0, 0, 0, -1, 0, 0, 0, -5, -1, - -4, -4, 1, -5, -5, 11, -6, 0, 0, -5, 0, -2, -3, -6, -5, -5, -3, -3, -2, 1, -2, 14, - 0, 2, -6, 3, 3, -6, 0, 2, -2, 1, -3, -2, 2, 0, 3, 1, 0, 0, -2, -7, 0, -5, 3, -}; -int ralpha_mat []={ - 42, - 27, 27, - 31, 27, 60, - 28, 27, 30, 46, - 27, 27, 31, 28, 42, - 25, 27, 31, 28, 29, 84, - 29, 27, 31, 25, 29, 30, 51, - 32, 27, 31, 60, 29, 31, 23, 61, - 46, 27, 28, 28, 31, 14, 29, 23, 52, - 28, 27, 27, 31, 30, 26, 29, 23, 21, 49, - 30, 27, 35, 31, 30, 29, 31, 23, 15, 23, 57, - 28, 27, 30, 31, 29, 12, 24, 20, 18, 25, 33, 53, - 40, 27, 29, 27, 46, 31, 30, 28, 28, 60, 25, 28, 36, - 60, 27, 34, 35, 51, 28, 28, 23, 22, 24, 29, 13, 31, 66, - 36, 27, 28, 28, 27, 29, 30, 31, 34, 32, 24, 42, 32, 29, 35, - 27, 27, 36, 31, 29, 60, 32, 46, 30, 28, 28, 25, 28, 28, 27, 40, - 28, 27, 24, 27, 23, 29, 27, 21, 18, 22, 23, 13, 31, 29, 31, 31, 38, - 31, 27, 32, 30, 29, 29, 61, 22, 16, 29, 30, 12, 28, 30, 30, 31, 29, 36, - 28, 27, 29, 24, 24, 31, 23, 15, 32, 23, 26, 9, 30, 32, 29, 35, 29, 3, 53, - 31, 27, 42, 31, 31, 31, 23, 22, 15, 26, 29, 25, 35, 31, 30, 31, 29, 0, 22, 132, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 6, 0, 2, 10, 3, -2, -6, 38, -14, -7, -1, -19, 0, 5, 3, 1, 4, -29, -6, -19, 0, 97, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -int rbeta_mat[]={ - 44, - 26, 26, - 29, 26, 102, - 27, 26, 22, 64, - 25, 26, 33, 22, 50, - 14, 26, 32, 17, 31, 70, - 28, 26, 31, 14, 35, 32, 68, - 36, 26, 33, 102, 28, 3, 23, 69, - 64, 26, 17, 26, 26, 9, 28, 21, 48, - 11, 26, 26, 31, 12, 28, 39, 17, 18, 52, - 22, 26, 50, 33, 27, 31, 36, 19, 21, 24, 52, - 22, 26, 28, 33, 29, 19, 17, 13, 16, 27, 37, 45, - 57, 26, 28, 25, 64, 33, 22, 11, 22, 102, 14, 26, 38, -102, 26, 39, 50, 68, 30, 30, 16, 0, 24, 32, 17, 31, 87, - 38, 26, 26, 11, 26, 35, 28, 32, 39, 35, 10, 50, 36, 28, 50, - 24, 26, 38, 29, 28, 102, 36, 64, 22, 22, 11, 14, 27, 26, 25, 57, - 26, 26, 10, 26, 23, 21, 22, 16, 17, 22, 8, 19, 33, 31, 26, 33, 46, - 33, 26, 35, 28, 28, 28, 69, 17, 15, 4, 35, 10, 17, 27, 27, 31, 29, 42, - 17, 26, 28, 10, 17, 24, 17, 21, 36, 28, 25, 16, 28, 18, 31, 50, 28, 9, 44, - 31, 26, 50, 32, 36, 21, 21, 14, 14, 24, 34, 2, 50, 31, 12, 33, 31, 13, 22, 130, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 9, 0, 12, 17, 17, -12, -9, 28, -13, -3, 0, -12, 0, 8, 4, -11, 2, -5, -5, -5, 0, 47, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; -int rcoil_mat[]={ - 36, - 27, 27, - 29, 27, 124, - 28, 27, 22, 51, - 28, 27, 33, 18, 40, - 13, 27, 29, 17, 23, 101, - 27, 27, 30, 13, 23, 28, 64, - 31, 27, 30, 124, 21, 15, 24, 76, - 51, 27, 17, 29, 25, 14, 27, 26, 71, - 18, 27, 29, 30, 8, 25, 24, 23, 20, 61, - 22, 27, 39, 33, 22, 28, 28, 21, 24, 27, 70, - 18, 27, 29, 30, 21, 12, 6, 18, 20, 26, 33, 73, - 52, 27, 27, 28, 51, 33, 22, 18, 18, 124, 13, 29, 36, -124, 27, 34, 39, 64, 27, 25, 23, 18, 24, 27, 13, 30, 79, - 36, 27, 29, 18, 29, 23, 29, 29, 34, 32, 13, 40, 31, 21, 39, - 27, 27, 36, 29, 27, 124, 31, 51, 22, 18, 18, 13, 28, 29, 28, 52, - 29, 27, 13, 29, 24, 19, 16, 22, 21, 18, 9, 14, 30, 30, 25, 33, 44, - 33, 27, 32, 29, 27, 20, 76, 10, 19, 20, 32, 9, 17, 28, 22, 30, 29, 55, - 17, 27, 21, 13, 6, 21, 23, 24, 44, 13, 21, 7, 29, 17, 23, 39, 26, 20, 60, - 30, 27, 40, 29, 28, 19, 26, 16, 7, 25, 31, 9, 39, 30, 8, 30, 27, 18, 26, 143, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 0, -4, 8, -3, -30, -6, 49, -10, -7, -12, -18, 3, 3, -6, -11, 3, 2, 2, -5, 0, 59, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; -int tmpam250mt[]={ - 2, - 0, 0, - 2, 0, 6, - -1, 0, 0, 12, - 0, 0, 6, -1, 13, - -3, 0, 1, -3, 3, 5, - 1, 0, 3, -3, 1, 7, 6, - 6, 0, 2, 6, 0, -1, -3, 11, - 12, 0, -3, -2, -2, -5, 5, 0, 2, - -1, 0, 0, 3, -1, 0, 3, -3, -3, 12, - 0, 0, 11, 6, 3, 5, 3, -3, -2, -2, 3, - -1, 0, 2, 2, -1, -2, -1, -3, -4, -1, 9, 3, - 7, 0, 1, 0, 12, 6, 0, -1, -1, 6, -3, -2, 11, - 6, 0, 8, 11, 6, 0, 2, -4, -1, -2, 3, -4, 3, 11, - 11, 0, -2, -1, 0, 1, 2, 1, 8, 7, -3, 13, 6, 0, 11, - -1, 0, 11, 2, 1, 6, 6, 12, 0, -1, -1, -3, -1, -2, 0, 7, - -2, 0, -3, 0, -3, -3, -3, -4, -2, -3, -3, -4, 2, 2, -2, 6, 3, - 6, 0, 7, 2, 5, -2, 11, -2, -5, -1, 6, -6, -3, -1, 3, 3, 1, 3, - -3, 0, 0, -3, -1, 0, -3, -2, 1, -3, 4, -6, 2, 1, 3, 11, 0, -3, 2, - 3, 0, 13, 1, 3, -2, 0, -4, -4, -2, 1, 1, 11, 2, -1, 2, -1, -4, -1, 12, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 0, 1, 8, 3, -4, -3, 2, -4, -3, -1, -4, 0, 0, -1, -3, 1, 5, 0, -3, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; -int rtmpam250mt[]={ - 32, - 24, 24, - 32, 24, 49, - 20, 24, 24, 73, - 24, 24, 49, 20, 77, - 12, 24, 28, 12, 36, 45, - 28, 24, 36, 12, 28, 53, 49, - 49, 24, 32, 49, 24, 20, 12, 69, - 73, 24, 12, 16, 16, 4, 45, 24, 32, - 20, 24, 24, 36, 20, 24, 36, 12, 12, 73, - 24, 24, 69, 49, 36, 45, 36, 12, 16, 16, 36, - 20, 24, 32, 32, 20, 16, 20, 12, 8, 20, 61, 36, - 53, 24, 28, 24, 73, 49, 24, 20, 20, 49, 12, 16, 69, - 49, 24, 57, 69, 49, 24, 32, 8, 20, 16, 36, 8, 36, 69, - 69, 24, 16, 20, 24, 28, 32, 28, 57, 53, 12, 77, 49, 24, 69, - 20, 24, 69, 32, 28, 49, 49, 73, 24, 20, 20, 12, 20, 16, 24, 53, - 16, 24, 12, 24, 12, 12, 12, 8, 16, 12, 12, 8, 32, 32, 16, 49, 36, - 49, 24, 53, 32, 45, 16, 69, 16, 4, 20, 49, 0, 12, 20, 36, 36, 28, 36, - 12, 24, 24, 12, 20, 24, 12, 16, 28, 12, 40, 0, 32, 28, 36, 69, 24, 12, 32, - 36, 24, 77, 28, 36, 16, 24, 8, 8, 16, 28, 28, 69, 32, 20, 32, 20, 8, 20, 73, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 0, 1, 8, 3, -4, -3, 2, -4, -3, -1, -4, 0, 0, -1, -3, 1, 5, 0, -3, 0, 10, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/parttree.c b/binaries/src/tcoffee/t_coffee_source/parttree.c deleted file mode 100644 index 8a6a413..0000000 --- a/binaries/src/tcoffee/t_coffee_source/parttree.c +++ /dev/null @@ -1,1040 +0,0 @@ -#include -#include -#include -#include -#include -// #include - -#include "fast_tree_header.h" -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" -//TODO: -change kick-out value -// -pass arrays in partTree_r - -/*! - * \file parttree.c - * \brief Source code for PartTree algorithm. - */ - - - -#define ENLARGEMENT_PER_STEP 50 - - - -void -print_fastal_tree(Tree_fastal *tree, - int pos, - FILE *tree_file, - int num_seq) -{ - if (tree[pos].left >= num_seq) - print_fastal_tree(tree, tree[pos].left-num_seq, tree_file, num_seq); - if (tree[pos].right >= num_seq) - print_fastal_tree(tree, tree[pos].right-num_seq, tree_file, num_seq); - - fprintf(tree_file, "%i %i %i\n", tree[pos].left, tree[pos].right, tree[pos].name); -} - - - - - -PartTree_param *param_set; - - -//********************** UPGMA ***************************** - -/** - * Function to write tree to file in fastal_format. Leafs in \a tree are leafs in the complete tree as well. - * - * \param tree The root node of the (sub)tree. - * \param param_set Parameter of PartTree. - * \param start_write_here Current node to write into. - * \return position in tree - * \see tree_process - */ -int -tree_process_simple(NT_node tree, - PartTree_param *param_set, - int start_write_here) -{ - if (tree->isseq) - { -// printf("T: %s\n", tree->name); - return atoi(tree->name); - } - else - { - Tree_fastal *tree_flat = ¶m_set->tree[start_write_here]; - tree_flat->name = start_write_here +param_set->num_sequences; - if (start_write_here == param_set->pos_tree) - { - ++param_set->pos_tree; - } - start_write_here = param_set->pos_tree; - int left = tree_process_simple(tree->left, param_set, start_write_here); - start_write_here = param_set->pos_tree; - int right = tree_process_simple(tree->right, param_set, start_write_here); - tree_flat->index = NULL; - tree_flat->right = right; - tree_flat->left = left; - return tree_flat->name; - } -} - - - - -/** -* Function to write tree to file in fastal_format. Leafs in \a tree do not need to be leafs in the complete tree. -* -* \param tree The root node of the (sub)tree. -* \param param_set Parameter of PartTree. -* \param clusters Number of sequences in each cluster. -* \param subgroup The sequences for each cluster. -* \param start_write_here Current node to write into. -* \return position in tree -* \see tree_process_simple -*/ -int -tree_process(NT_node tree, - PartTree_param *param_set, - int *clusters, - int *subgroup, - int start_write_here) -{ - if (tree->isseq) - { - int node_num = atoi(tree->name); - int num_in_sub = clusters[node_num+1] - clusters[node_num]; -// printf("NUM: %i %i %i %i\n",node_num, num_in_sub, clusters[node_num+1], clusters[node_num]); - if (num_in_sub > 1) - { - Tree_fastal *tree_flat = ¶m_set->tree[start_write_here]; - tree_flat->name = start_write_here +param_set->num_sequences; - if (start_write_here == param_set->pos_tree) - { - ++param_set->pos_tree; - } - tree_flat->left = -1; - tree_flat->right = -1; - tree_flat->index = &subgroup[clusters[node_num]]; - - tree_flat->num_leafs = num_in_sub; - return tree_flat->name; - } - else - { - return(subgroup[clusters[node_num]]); - } - } - else - { -// printf("TREEPOS: %i\n",param_set->pos_tree); - Tree_fastal *tree_flat = ¶m_set->tree[start_write_here]; - tree_flat->name = start_write_here +param_set->num_sequences; - if (start_write_here == param_set->pos_tree) - { - ++param_set->pos_tree; - } - start_write_here = param_set->pos_tree; - int left = tree_process(tree->left, param_set, clusters, subgroup, start_write_here); - start_write_here = param_set->pos_tree; - int right = tree_process(tree->right, param_set, clusters, subgroup, start_write_here); - tree_flat->index = NULL; - tree_flat->right = right; - tree_flat->left = left; - return tree_flat->name; - } -} - - -/** -* \brief Calculates tree out of distance matrix. -* -* Calculates the upgma tree using a given distance matrix. -* \param mat The distance matrix. -* \param nseq Number of sequences. -* \param fname Filename for temporary storage. -* \param seqnames Names of the sequences. -* \return The calculated UPGMA Tree. -*/ -NT_node ** int_dist2upgma_tree_fastal (int **mat, int nseq, char *fname, char **seqnames) -{ - NT_node *NL, T; - int a, n, *used; - int tot_node; - if (upgma_node_heap (NULL)) - { - printf_exit ( EXIT_FAILURE,stderr, "\nERROR: non empty heap in upgma [FATAL]"); - } - NL=vcalloc (nseq, sizeof (NT_node)); - - for (a=0; aname, "%s", seqnames[a]); - NL[a]->isseq=1; - NL[a]->leaf=1; - } - used=vcalloc ( nseq, sizeof (int)); - n=nseq; - while (n>1) - { - T=upgma_merge(mat, NL,used, &n, nseq); - } - vfree (used); - vfclose (print_tree (T, "newick", vfopen (fname, "w"))); - upgma_node_heap (NULL); - vfree (NL); - - return read_tree (fname,&tot_node, nseq, seqnames); -} - - - - -//Part_Tree - -/*! -* \brief Constructs a guide tree for multiple sequence alignment. -* -* This algorithm is an implementation of the partTree algorithm (PartTree: an algorithm to build an approximate tree from a large number of unaligned -* sequences. Katoh et al. 2007). -* \param sequence_f Filename of file with sequences. -* \param tree_f Filename of file where the tree will be stored. -* \param ktup Size of the ktups. -* \param subgroup Parameter for subgroupsize. -*/ -void -make_partTree(char *sequence_f, - char *tree_f, - int ktup, - int subgroup) -{ - param_set = vcalloc(1,sizeof(PartTree_param)); - param_set->ktup = ktup; - param_set->subgroup = subgroup; - - long *file_positions = NULL; - long **tmp1 = &file_positions; - int *seq_lengths = NULL; - int **tmp2 = &seq_lengths; - - //make index - int number_of_sequences = make_pos_len_index_of_file(sequence_f, "KTUP_table", tmp1, tmp2, ktup, "DNA"); - param_set->num_sequences = number_of_sequences; - param_set->ktup_positions = file_positions; - param_set->seq_lengths = seq_lengths; - param_set->threshold = 0.01; - param_set->ktup_table_f = fopen("KTUP_table","r"); - - Tree_fastal *tree = vcalloc(number_of_sequences-1,sizeof(Tree_fastal)); - param_set->tree = tree; - - int i; - partTree_r(param_set); -// for (i = 0; i < number_of_sequences-1; ++i) -// { -// printf("%i %i %i\n", tree[i].left, tree[i].right, tree[i].name); -// } - FILE * tree_file = fopen(tree_f,"w"); - print_fastal_tree(tree, 0, tree_file, number_of_sequences); - fclose(tree_file); - vfree(tree); -} - - -/** -* Filters seed set. -* -* \param sequence_group Sequences to filter. -* \param dist_mat The distance matrix. -* \param seed_set_cleaned ordered_seed_set. -* \param param_set Parameters for PartTree algorithm. -* \return number in the filtered set. -*/ -int -filter(int *sequence_group, - int **dist_mat, - int *seed_set_cleaned, - PartTree_param *param_set) -{ - int i, j; - int num_in_subgroup = param_set->subgroup; - int *seq_lengths = param_set->seq_lengths; - int num_in_clean = 0; - double threshold = param_set->threshold; -// printf("threshold: %f\n", threshold); - double min; - for (i = 0; i < num_in_subgroup; ++i) - { - if (!seed_set_cleaned[i]) - continue; - for (j = i+1; j < num_in_subgroup; ++j) - { - min = MIN(seq_lengths[sequence_group[i]], seq_lengths[sequence_group[j]]); -// printf("MINIMUM: %i\n",min); - min = (threshold * min); -// printf("MINIMUM: %f\n",min); - if (seed_set_cleaned[j] &&(dist_mat[i][j] < min)) - { - if (seq_lengths[sequence_group[i]] < seq_lengths[sequence_group[j]]) - { - seed_set_cleaned[i] = 0; - break; - } - else - seed_set_cleaned[j] = 0; - } - } - } - - for (i = 0; i < num_in_subgroup; ++i) - { - num_in_clean += seed_set_cleaned[i]; - } - int max = num_in_subgroup -1; - i = 0; - int tmp; -// printf("CLEAN: %i\n", num_in_clean); - while (i < num_in_clean) - { - if (seed_set_cleaned[i]) - { - ++i; - } - else - { - seed_set_cleaned[i] = seed_set_cleaned[max]; - seed_set_cleaned[max] = 0; - tmp = sequence_group[i]; - sequence_group[i] = sequence_group[max]; - sequence_group[max] = tmp; - --max; - } - } - return num_in_clean; -} - - - - - -/*! -* \brief Function to create a tree using the PartTree algorithm. -* -* \param param_set A \a PartTree_param object containing all necessary parameters and the data. -* \return The node_number. -*/ -void -partTree_r(PartTree_param *param_set) -{ - - int num_of_tree_nodes = param_set->num_sequences-1; - int loop_tree_node; - - Tree_fastal *tree = param_set->tree; -// int this_node = param_set->pos_tree; - - int i; - int tsize = param_set->tsize; - - - //get some memory - short *table1 = vcalloc(tsize, sizeof(short)); - short *table2 = vcalloc(tsize, sizeof(short)); - int *seed_set = vcalloc(param_set->subgroup, sizeof(int)); - char **names = declare_char(param_set->subgroup, 8); - int **dist_mat = declare_int(param_set->subgroup, param_set->subgroup); - int **dist_mat2 = declare_int(param_set->subgroup, param_set->subgroup); - char * file_name_tmp = vtmpnam(NULL); - int *seed_set_cleaned = vcalloc(param_set->subgroup, sizeof(int)); - FILE *table_f = param_set->ktup_table_f; - long *file_positions = param_set->ktup_positions; - int max_n_group = param_set->subgroup; - int num_in_subgroup = param_set->subgroup; - int *seq_lengths = param_set->seq_lengths; - int *clusters = vcalloc(param_set->subgroup+1, sizeof(int)); - int *min_dist = vcalloc(param_set->num_sequences, sizeof(int)); - int *belongs_to = vcalloc(param_set->num_sequences, sizeof(int)); - - - - - - - //Prepare first node - - tree[0].index = vcalloc(param_set->num_sequences,sizeof(int)); - int *index = tree[0].index; - for (i = 0; i< param_set->num_sequences; ++i) - index[i] = i; - tree[0].name = param_set->pos_tree +param_set->num_sequences; - - tree[0].num_leafs = param_set->num_sequences; - int *sequence_group2 = vcalloc(param_set->num_sequences,sizeof(int)); - - Tree_fastal *current_node; - for (loop_tree_node = 0; loop_tree_node < num_of_tree_nodes; ++loop_tree_node) - { -// printf("ROUND: %i\n", loop_tree_node); - current_node = &tree[loop_tree_node]; - index= current_node->index; - if (current_node->index == NULL) - { - continue; - } - int num_sequences = current_node->num_leafs; - - //if number of sequences in this group smaller than number subgoup size: make tree, finisch - if (num_sequences <= max_n_group) - { - int j; - dist_mat = make_distance_matrix(table_f, file_positions, index, num_sequences, dist_mat); - for (i = 0; i < num_sequences; ++i) - { - sprintf(names[i],"%i", current_node->index[i]); - } - NT_node **tree= (int_dist2upgma_tree_fastal (dist_mat, num_sequences, file_name_tmp , names)); - tree_process_simple(tree[0][0], param_set,loop_tree_node); - continue; - } - - - for (i = 0; i < num_in_subgroup; ++i) - { - seed_set_cleaned[i] = 1; - } - - //finde longest sequence and put into the first field - - int index_longest = 0; - int length_of_longest = 0; - - for(i = 0; i < num_sequences; ++i) - { - if (seq_lengths[index[i]] > length_of_longest) - { - index_longest = i; - length_of_longest = seq_lengths[index[i]]; - } - } - int tmp = index[index_longest]; - index[index_longest] = index[0]; - index[0] = tmp; - - //distance of longest to rest - int seq_index = 1; - int min= euclidean_dist(table_f, file_positions[index[0]], file_positions[index[1]], table1, table2, param_set->tsize); - for (i = 2; i < num_sequences; ++i) - { - tmp = euclidean_dist_half(table_f, file_positions[index[i]], table1, table2, param_set->tsize); - if (tmp < min) - { - min = tmp; - seq_index = i; - } - } - - //get the new seed_set in the first n spaces - tmp = index[1]; - index[1] = index[seq_index]; - index[seq_index] = tmp; - int r,j; - num_in_subgroup = param_set->subgroup; - - - for (i = 2; i < num_in_subgroup; ++i) - { - r = i + rand() / ( RAND_MAX / ( num_sequences-i) + 1 ); -// printf("RANDOM: %i\n",r); - tmp = index[r]; - index[r] = index[i]; - index[i] = tmp; - } - - //Calculate matrix - dist_mat = make_distance_matrix(table_f, file_positions, index, param_set->subgroup, dist_mat); - - //Filter out sequences that are to similar & reorder - - NT_node **upgma_tree; - - - int num_in_clean = filter(index, dist_mat, seed_set_cleaned, param_set); -// if (num_in_clean == 1) -// { -// int j; -// // dist_mat = make_distance_matrix(table_f, file_positions, index, upgma_tree, dist_mat); -// for (i = 0; i < param_set->subgroup; ++i) -// { -// sprintf(names[i],"%i", current_node->index[i]); -// } -// upgma_tree= (int_dist2upgma_tree_fastal (dist_mat, param_set->subgroup, file_name_tmp , names)); -// tree_process_simple(upgma_tree[0][0], param_set,loop_tree_node); -// continue; -// } -// else -// { - - if (num_in_clean ==1) - { - num_in_clean = 2; - seed_set_cleaned[1] = 1; - } - //make_tree - int col = 0; - int row = 0; - for (i = 0; i < num_in_subgroup; ++i) - { - if (seed_set_cleaned[i]) - { - row = col+1; - for (j = i+1; j < num_in_subgroup; ++j) - { - if (seed_set_cleaned[j]) - { - dist_mat2[row][col] = dist_mat2[col][row] = dist_mat[i][j]; - ++row; - } - } - ++col; - } - } - for (i = 0; i < num_in_clean; ++i) - { - sprintf(names[i],"%i",i); - } - upgma_tree= (int_dist2upgma_tree_fastal (dist_mat2, num_in_clean, file_name_tmp , names)); -// } - -// int *pos_tree_p = ¶m_set->pos_tree; - - - int leaf = 0; - - //cluster - //calculate distances from n' to N - get_table(table1, table_f, file_positions[index[0]]); - for (j = num_in_clean; j < num_sequences; ++j) - { - min_dist[j] = euclidean_dist_half(table_f, file_positions[index[j]], table1, table2, param_set->tsize); - belongs_to[j] = 0; - } - for(i = 1; i < num_in_clean; ++i) - { - get_table(table1, table_f, file_positions[index[i]]); - belongs_to[i] = i; - for (j = num_in_clean; j < num_sequences; ++j) - { - tmp = euclidean_dist_half(table_f, file_positions[index[j]], table1, table2, param_set->tsize); - if (tmp < min_dist[j]) - { - min_dist[j] = tmp; - belongs_to[j] = i; - } - } - } - - //how_many sequences has each cluster - for (j = 0; j <= num_in_subgroup; ++j) - { - clusters[j] = 0; - } - for (j = 0; j < num_sequences; ++j) - { - ++clusters[belongs_to[j]]; - } -// for (j = 0; j <= num_in_subgroup; ++j) -// { -// printf("CL: %i ",clusters[j]); -// } -// printf("\n"); - for(i = 1; i < num_in_clean; ++i) - { - clusters[i] += clusters[i-1]; - } - clusters[num_in_clean] = clusters[num_in_clean-1]; - - for (i = 0; i < num_sequences; ++i) - { - sequence_group2[--clusters[belongs_to[i]]] = index[i]; - } - - for (i = 0; i < num_sequences; ++i) - { - index[i] = sequence_group2[i]; - } - - - for (i = 0; i < num_in_clean; ++i) - { - sprintf(names[i],"%i",i); - } - tree_process(upgma_tree[0][0], param_set, clusters, index, loop_tree_node); - NT_node tmp_tree = upgma_tree[3][0]; - vfree(upgma_tree[0]); - vfree(upgma_tree[1]); - vfree(upgma_tree[2]); - vfree(upgma_tree[3]); - vfree(upgma_tree); - free_tree(tmp_tree); - } - vfree(min_dist); - vfree(belongs_to); - vfree(clusters); -} - - - -/*! - * \brief Makes the distance matrix between all sequences. - * - * \param table_file File with the ktup tables - * \param file_positions Index of positions where the tabels are stored in \a table_file - * \param sequence_group the group of sequences - * \param number number of sequences - * \param dist_mat distance matrix - * \return the distance matrix. (same as \a dist_mat ) -*/ -int ** -make_distance_matrix(FILE *table_f, - long *file_positions, - int *sequence_group, - int number, - int **dist_mat) -{ - static short *table1 = NULL; - static short *table2; - int tsize = param_set->tsize; - if (table1 == NULL) - { - table1 = vcalloc(tsize, sizeof(short)); - table2 = vcalloc(tsize, sizeof(short)); - } - int i, j, num = number-1; - for (i = 0; i < num; ++i) - { - j = i+1; - dist_mat[i][j] = dist_mat[j][i]= euclidean_dist(table_f, file_positions[sequence_group[i]], file_positions[sequence_group[j]], table1, table2, tsize); - ++j; - for (; j < number; ++j) - { - dist_mat[i][j] = dist_mat[j][i] = euclidean_dist_half(table_f, file_positions[sequence_group[j]], table1, table2, tsize); - } - } - return dist_mat; -} - - - - -/** -* Replaces the coded sequence with coded tuples -* -* \param coded_seq The coded sequence which will be replaced by the tuple number -* \param ktup Size of the ktup -* \param ng Coded alphabet size -* \param length Lengths of coded sequence -*/ -void -makepointtable_fast(int *coded_seq, //sequence - int ktup, //ktup size - int ng, //hmm... - int length) //length of coded_seq -{ - int point, a; - register int *p; - static int *prod; - - if (!prod) - { - prod=vcalloc ( ktup, sizeof (int)); - for ( a=0; a 0) - fprintf(tables_f, "%i %i\n", point, table[point]); - } - fprintf(tables_f, "*\n"); -} - - -/** JUST FOR TEST */ -void -make_fast_tree(char *file_name, - int n, - int ktup) -{ - - make_partTree(file_name, "TREE_OUT", ktup, n); - -} - - - -/** -* \brief Reads ktup_table from file -* -* \param table Table to save the file content in. -* \param tables_f File in which the tables are stored. -* \param index Position of the table in \a tables_f -*/ -void -get_table(short *table, //Table to save the readings in - FILE* tables_f, //File with tables - long index) //index positin of ktup-tables -{ - fseek(tables_f, index, SEEK_SET); - const int LINE_LENGTH = 101; - char line[LINE_LENGTH]; - fgets(line, LINE_LENGTH, tables_f); - - char delims[] = " "; - char *result = NULL; - int code; - - while (line[0] != '*') - { - result = strtok( line, delims ); - code = atoi(result); - table[code] = atoi(strtok( NULL, delims)); - fgets(line, LINE_LENGTH, tables_f); - } -} - - - -/** -* \brief calculates the euclidean ktub distance between two sequences -* -* @param ktup_f, ktup_file -* @param pos1 position of sequence 1 in \a ktup_f -* @param pos2 position of sequence 2 in \a ktup_f -* @param table1 Saves the number of occurences for each ktup in sequence 1 -* @param table2 Saves the number of occurences for each ktup in sequence 2 -*/ -int -euclidean_dist(FILE* ktup_f, //ktup_file - long pos1, //position of table1 - long pos2, //position of table2 - short *table1, //table to save ktups in - short *table2, //table to save ktups in - int length) -{ - const int LINE_LENGTH = 101; - char line[LINE_LENGTH]; - - - char delims[] = " "; - char *result = NULL; - int code; - - fseek(ktup_f, pos1, SEEK_SET); - fgets(line, LINE_LENGTH, ktup_f); - int i; - for (i = 0; i < length; ++i) - { - table1[i] = 0; - table2[i] = 0; - } - while (line[0] != '*') - { - result = strtok( line, delims ); - code = atoi(result); - table1[code] = atoi(strtok( NULL, delims)); - fgets(line, LINE_LENGTH, ktup_f); - } - fseek(ktup_f, pos2, SEEK_SET); - fgets(line, LINE_LENGTH, ktup_f); - while (line[0] != '*') - { - result = strtok( line, delims ); - code = atoi(result); - table2[code] = atoi(strtok( NULL, delims)); - fgets(line, LINE_LENGTH, ktup_f); - } - - int dist = 0; - for (i = 0; i < length; ++i) - { - dist += (table1[i]-table2[i])*(table1[i]-table2[i]); - } - return dist; -} - - - -/** - * \brief calculates the euclidean ktub distance between two sequences. - * - * The difference to \a euclidean_dist is, that this uses the ktups stored in \a table1 - * @param ktup_f, ktup_file - * @param pos2 position of sequence 2 in \a ktup_f - * @param table1 Saves the number of occurences for each ktup in sequence 1 - * @param table2 Saves the number of occurences for each ktup in sequence 2 - * \see euclidean_dist - */ -int -euclidean_dist_half(FILE* ktup_f, //ktup_file - long pos2, //position of table1 - short *table1, //table to save ktups in - short *table2, //table to save ktups in - int length) -{ - const int LINE_LENGTH = 101; - char line[LINE_LENGTH]; - - - char delims[] = " "; - char *result = NULL; - int code; - - fseek(ktup_f, pos2, SEEK_SET); - fgets(line, LINE_LENGTH, ktup_f); - int i; - for (i = 0; i < length; ++i) - { - table2[i] = 0; - } - while (line[0] != '*') - { - result = strtok( line, delims ); - code = atoi(result); - table2[code] = atoi(strtok( NULL, delims)); - fgets(line, LINE_LENGTH, ktup_f); - } - - int dist = 0; - for (i = 0; i < length; ++i) - { - dist += (table1[i]-table2[i])*(table1[i]-table2[i]); - } - return dist; -} - - - - -/** -* Makes an index of a file -*/ -int -make_pos_len_index_of_file(char *file_name, //file with sequences - char *ktable_f, //file with the ktup-tables - long **file_positions, //array to save the positions - int **seq_lengths, //array to save the sequence length - int ktup, //length of ktup - char *type) //type of the seuqence -{ - //preparations for recoding sequence - int *aa; - int a, b, l; - - int ng = 0; - char **gl; - if ( strm (type, "DNA") || strm (type, "RNA")) - { - gl=declare_char (5,13); - sprintf ( gl[ng++], "Aa"); - sprintf ( gl[ng++], "Gg"); - sprintf ( gl[ng++], "TtUu"); - sprintf ( gl[ng++], "Cc"); - sprintf ( gl[ng++], "NnRrYyDdMmWw"); - } - else - { - gl=make_group_aa ( &ng, "mafft"); - } - aa=vcalloc ( 256, sizeof (int)); - for ( a=0; atsize = tsize; - param_set->ng = ng; - - int *table=vcalloc ( tsize,sizeof (int)); - - - //Reading and recoding squences - const int LINE_LENGTH = 501; - int *coded_seq = vcalloc(2*LINE_LENGTH, sizeof(int)); - int allocated_mem = 2*LINE_LENGTH; - - (*file_positions) = vcalloc(ENLARGEMENT_PER_STEP, sizeof(long)); - (*seq_lengths) = vcalloc(ENLARGEMENT_PER_STEP, sizeof(int)); - int current_size = ENLARGEMENT_PER_STEP; - int current_pos = 0; - - FILE *file = fopen(file_name,"r"); - - - int seq_length=0; - char line[LINE_LENGTH]; - - int num_of_sequences = 0; - int str_len = 0; - int mem_for_pos = ENLARGEMENT_PER_STEP; - int tmp; - int real_len; - int *c_seq; - - FILE *tables_f = fopen(ktable_f, "w"); - - - if (file == NULL) - { - printf("FILE NOT FOUND\n"); - exit(1); - } - else - { - - while(fgets(line, LINE_LENGTH , file)!=NULL) - { - if ( str_len >= allocated_mem - LINE_LENGTH) - { - allocated_mem += LINE_LENGTH; - coded_seq = vrealloc(coded_seq, allocated_mem*sizeof(int)); - } - - int i; - int length = strlen(line); - if (line[0] == '>') - { - if (num_of_sequences >0) - { - (*seq_lengths)[num_of_sequences-1] = str_len; -// printf("len: %i\n", str_len); - c_seq = coded_seq; - makepointtable_fast(coded_seq,ktup,ng, str_len); - - (*file_positions)[num_of_sequences-1] = ftell(tables_f ); - for (i=0; i < tsize; ++i) - table[i] = 0; - makecompositiontable_fastal(tables_f, table, coded_seq,tsize ); - - - } - str_len = 0; - ++num_of_sequences; - - if (num_of_sequences == mem_for_pos) - { - mem_for_pos += ENLARGEMENT_PER_STEP; - (*file_positions) = vrealloc((*file_positions), mem_for_pos * sizeof(long)); - (*seq_lengths) = vrealloc((*seq_lengths), mem_for_pos * sizeof(int)); - } - } - else - { - int i; - real_len = strlen(line); - if (line[real_len-1] == '\n') - --real_len; - for (i = 0; i < real_len; ++i) - { - coded_seq[str_len++] = aa[line[i]]; - } - } - } - } - - (*seq_lengths)[num_of_sequences-1] = str_len; - c_seq = coded_seq; - makepointtable_fast(coded_seq,ktup,ng, str_len); - (*file_positions)[num_of_sequences] = ftell(tables_f ); - makecompositiontable_fastal(tables_f, table, coded_seq,tsize ); - fclose(file); - fclose(tables_f); - return num_of_sequences; -} -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/pavie_dp.c b/binaries/src/tcoffee/t_coffee_source/pavie_dp.c deleted file mode 100644 index 4353b80..0000000 --- a/binaries/src/tcoffee/t_coffee_source/pavie_dp.c +++ /dev/null @@ -1,1411 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" - - -static double mc_delta_matrix ( int ***mat1, int ***mat2, char **alp, int nch); -static double delta_matrix ( int **mat1,int **mat2, char *alp); -static double ***pavie_seq2pavie_fmat (Sequence *S,double *gop, double *gep, char **mat, char *idmat, int id_threshold, int sample_size, int nch, char *param ); -static int **pavie_fmat2pavie_logodd_mat (double **fmat, char *alp); -static double **pavie_aln2fmat(Alignment *A, double **fmat, char *idmat, int id_threshold, int ch, int nch, char *param); -static int pavie_mat2pavie_id_mat ( int **mat,char *in_name, char *alp, char *ignore, char *force,int T, char *out_name); -static double paviemat2gep ( int **mat, char *alp); -static Alignment *align_pavie_sequences (char *seq0,char *seq1,char **mat,double *gop,double *gep,int nch, char *param); -static int pavie_score (char *s0,int p0, char *s1,int p1,char **mat_file, double *gop, double *gep, int nch, float factor, char *param); -static char **seq2pavie_alp (Sequence *S, int nch); -static Sequence * seq2pavie_seq ( Sequence *S, int nch); -static FILE* output_pavie_aln (Alignment *A, int nch, FILE *fp); -static char **output_pavie_mat_list ( int ***current_mat, double *gep, char **alp, int nch,char *prefix,int cycle, char **mat_name); -static float pavie_aln2id ( Alignment *A, int mode); -static int check_pavie_cl ( char *string); -float pavie_aln2delta_age ( Alignment *A,int s0, int s1, int a0, int a1); - -static float tgep_factor; -static int id_thres_used_aln; -static int log_odd_mode; -Sequence * pavie_seq2noisy_seq ( Sequence *S, int freq, char *alp) -{ - int a, b, l1, l2; - - vsrand(0); - - if (alp==NULL) - { - char **x; - x=seq2pavie_alp (S,1); - alp=x[0]; - } - - l2=strlen (alp); - for (a=0; a< S->nseq; a++) - { - l1=strlen (S->seq[a]); - for ( b=0; bseq[a][b]=alp[rand()%l2]; - } - } - return S; -} -Sequence * pavie_seq2random_seq ( Sequence *S, char *subst) -{ - int a, b, r, l; - - - vsrand (0); - r=subst[0]; subst++; - l=strlen (subst); - for ( a=0; a< S->nseq; a++) - for (b=0; blen[a]; b++) - if ( S->seq[a][b]==r)S->seq[a][b]=subst[rand()%l]; - return S; -} - -double **pavie_seq2pavie_aln(Sequence *S,char *mat, char *mode) -{ - int a, b,c, nch=0; - char **mat_list; - char *buf; - - double *gep, *gop; - Alignment *A; - char **alp; - char *pavie_idmat; - FILE *fp; - double **dist_mat; - float score; - - check_pavie_cl (mode); - - mat_list=declare_char (100, 100); - - if ( is_matrix (mat)) - { - sprintf ( mat_list[nch++], "%s", mat); - } - else - { - fp=vfopen (mat,"r"); - while ( (c=fgetc(fp))!=EOF) - { - ungetc(c, fp); - fscanf (fp, "%s\n",mat_list[nch++]); - } - vfclose (fp); - } - - alp=seq2pavie_alp (S, nch); - S=seq2pavie_seq (S, nch); - - gop=vcalloc (nch, sizeof (double)); - gep=vcalloc (nch, sizeof (double)); - - for ( a=0; a< nch; a++) - { - int **m; - char *st; - int v; - m=read_matrice (mat_list[a]); - if ((st=vstrstr(mode, "_GEP"))) - { - sscanf ( st, "_GEP%d_", &v); - gep[a]=v*-1; - } - else if ( m[0][GAP_CODE]==0) - { - gep[a]=paviemat2gep(m,alp[a]); - } - else - { - gep[a]=m[0][GAP_CODE]; - } - free_int (m, -1); - } - - - if ( (buf=vstrstr (mode, "_TGEPF"))) - { - - sscanf (buf, "_TGEPF%f_", &tgep_factor); - tgep_factor/=(float)100; - } - else - { - tgep_factor=0.5; - } - - pavie_idmat=vtmpnam(NULL); - - pavie_mat2pavie_id_mat (NULL,"idmat", alp[0],"X","",1,pavie_idmat); - dist_mat=declare_double ( S->nseq, S->nseq); - - - - for ( a=0; a< S->nseq-1; a++) - { - for ( b=a+1; b< S->nseq; b++) - { - int a0, a1; - float delta_a; - - - if ( ! strstr (mode, "_MSA_")) - { - A=align_pavie_sequences (S->seq[a],S->seq[b],mat_list,gop,gep,nch, mode); - sprintf ( A->name[0], "%s", S->name[a]); - sprintf ( A->name[1], "%s", S->name[b]); - } - else - { - - A=strings2aln ( 2, S->name[a], S->seq[a], S->name[b], S->seq[b]); - sprintf ( A->seq_al[0], "%s", S->seq[a]); - sprintf ( A->seq_al[1], "%s", S->seq[b]); - A->len_aln=strlen (S->seq[a]); - ungap_aln (A); - } - - if (strm (mode, "_ID01_"))A->score=score=pavie_aln2id (A, 1); - else if ( vstrstr (mode, "_ID02_"))A->score=score=pavie_aln2id (A, 2); - else if ( vstrstr (mode, "_ID04_"))A->score=score=pavie_aln2id (A, 4); - else if ( vstrstr (mode, "_ID05_"))A->score=score=pavie_aln2id (A, 5); - else if ( vstrstr (mode, "_ID06_"))A->score=score=pavie_aln2id (A, 6); - - else A->score=score=pavie_aln2id (A, 1); - - a0=S->seq[a][strlen(S->seq[a])+1]; - a1=S->seq[b][strlen(S->seq[b])+1]; - - delta_a=pavie_aln2delta_age (A, 0, 1, a0, a1); - - if ( vstrstr (mode, "_MATDIST_")) - dist_mat[a][b]=dist_mat[b][a]=(double)(vstrstr (mode, "_ID05") || vstrstr (mode, "_ID06"))?-score*100:(100-score); - else if ( vstrstr (mode, "_MATSIM_")) - dist_mat[a][b]=dist_mat[b][a]=(double)(score); - - - if ( !vstrstr (mode, "_MAT") ) - { - fprintf ( stdout, "#############\nAlignment %s %s: %d %% ID SCORE %d DELTA_AGE %.2f\n", S->name[a], S->name[b], A->score, A->score_aln, delta_a); - output_pavie_aln (A,nch, stdout); - } - free_aln(A); - } - } - - if ( vstrstr (mode, "_MAT") && !vstrstr ( mode, "_NOPRINT_")) - { - if ( vstrstr (mode, "_MFORMAT2")) - { - int max, n; - float *tot,s, bigtot=0; - - for (max=0, a=0; a< S->nseq; a++)max=MAX(max,(strlen (S->name[a]))); - tot=vcalloc ( S->nseq, sizeof (float)); - fprintf (stdout, "# TC_DISTANCE_MATRIX_FORMAT_01\n"); - for ( a=0; anseq; a++) - fprintf ( stdout, "# SEQ_INDEX %s %d\n",S->name[a],a); - fprintf ( stdout, "# PW_SEQ_DISTANCES \n"); - for (n=0,a=0;a< S->nseq-1; a++) - { - for ( b=a+1; bnseq; b++, n++) - { - s=dist_mat[a][b]; - - fprintf (stdout, "BOT\t %4d %4d\t %5.2f %*s\t %*s\t %5.2f\n", a,b,s,max,S->name[a], max, S->name[b], s); - fprintf (stdout, "TOP\t %4d %4d\t %5.2f %*s\t %*s\t %5.2f\n", b,a,s,max,S->name[b], max, S->name[a], s); - tot[a]+=s; - tot[b]+=s; - bigtot+=s; - } - } - for ( a=0; a< S->nseq; a++) - { - fprintf (stdout, "AVG\t %d\t %*s\t %*s\t %5.2f\n", a,max,S->name[a], max, "*", tot[a]/(S->nseq-1)); - } - vfree (tot); - fprintf (stdout, "TOT\t %*s\t %*s\t %5.2f\n", max,"TOT", max, "*", bigtot/n); - vfclose (stdout); - } - else - { - for ( a=0; anseq; a++) - { - fprintf ( stdout, "\n%s ", S->name[a]); - for ( b=0; b< S->nseq; b++) - fprintf ( stdout, "%6d ", (int)(dist_mat[a][b]*100)); - } - } - } - - return dist_mat; -} - -float pavie_aln2delta_age ( Alignment *A,int s0, int s1, int a0, int a1) -{ - int a,r0, r1, g0, g1, n; - float delta; - for (n=0, delta=0, a=0; a< A->len_aln; a++) - { - r0=A->seq_al[s0][a]; - r1=A->seq_al[s1][a]; - - g0=!is_gap(r0); - g1=!is_gap(r1); - - a0+=g0;a1+=g1; - if ( g0 && g1) - { - delta+=FABS((a0-a1)); - n++; - } - } - delta/=(float)((n)?n:1); - return delta; -} - -int **pavie_seq2trained_pavie_mat(Sequence *S, char *param) -{ - double ***fmat; - int ***current_mat; - int ***previous_mat; - char **alp; - - char **mat_file; - double d,delta_min=10; - double *gep; - double *gop; - - char ignore[100]; - char force [100]; - char pavie_idmat[100]; - int id_threshold; - int sample_size; - char *b; - int a,n=0,nch=1; - char *buf; - - check_pavie_cl (param); - - if ( !param)param=vcalloc (1, sizeof (char)); - - if ((b=vstrstr(param,"_THR")))sscanf ( b, "_THR%d_", &id_threshold); - else id_threshold=0; - - sample_size=0; - if ((b=vstrstr(param,"_SAMPLE")))sscanf ( b, "_SAMPLE%d_", &sample_size); - if ((b=vstrstr(param,"_PARALOGOUS"))) - { - sscanf ( b, "_PARALOGOUS%d_", &sample_size); - sample_size*=-1; - } - - if ((b=vstrstr(param,"_CHANNEL")))sscanf ( b, "_CHANNEL%d_", &nch); - else nch=1; - - if ( (buf=vstrstr (param, "_TGEPF"))) - { - sscanf (buf, "_TGEPF%f_", &tgep_factor); - tgep_factor/=(float)100; - } - else - { - tgep_factor=0.5; - } - if ( (buf=vstrstr (param, "_PAMLOGODD_"))) - { - log_odd_mode=1; - } - /*Declare Arrays*/ - gep=vcalloc (nch, sizeof (double)); - gop=vcalloc (nch, sizeof (double)); - mat_file=declare_char ( nch, 100); - current_mat =vcalloc ( nch, sizeof (double**)); - previous_mat=vcalloc ( nch, sizeof (double**)); - - - sprintf (ignore, "X"); - force[0]='\0'; - sprintf ( pavie_idmat, "pavie_idmat"); - - - - alp=seq2pavie_alp (S, nch); - - S=seq2pavie_seq (S, nch); - - pavie_mat2pavie_id_mat (NULL,"idmat", alp[0],ignore,force,1,pavie_idmat); - - for ( a=0; adelta_min) - { - - fprintf ( stdout, "\nDelta=%d: ",(int) d); - for (a=0; anseq-exclude_id; a++) - { - - output_completion ( stderr,a+1,S->nseq,1, ""); - - for ( b=a+exclude_id; b< S->nseq; b++) - { - tot++; - - A=align_pavie_sequences (S->seq[a],S->seq[b],mat,gop,gep,nch,param); - - for ( chan=0; chan< nch; chan++) - { - - fmat[chan]=pavie_aln2fmat (A, fmat[chan], idmat, id_threshold, chan, nch, param); - } - free_aln (A); - } - } - } - else - { - int c; - static int **list; - - if ( sample_size>0 && !list) - { - if ( exclude_id==0)sample_size*=3; - if (!list) - { - list=declare_int ((sample_size+1), 2); - vsrand(0); - tot=0; - while (totnseq);b=rand()%(S->nseq); - if ( a!=b) - { - list[tot][0]=a;list[tot][1]=b; - tot++; - if ( exclude_id==0) - { - list[tot][0]=a;list[tot][1]=a; - tot++; - list[tot][0]=b;list[tot][1]=b; - tot++; - } - } - } - } - } - else if ( sample_size<0 && !list) - { - - int **sim; - int m; - sim=seq2sim_mat (S, "idmat"); - sample_size*=-1; - list=declare_int (S->nseq*S->nseq, 2); - - m=S->nseq-exclude_id; - for (a=0; anseq; b++) - { - if ( sim[a][b]>sample_size) - { - list[tot][0]=a; - list[tot][1]=b; - tot++; - fprintf ( stderr, "\n%s %s: %d", S->name[a], S->name[b], sim[a][b]); - fprintf ( stderr, "\nKeep %s Vs %s : %d%% ID", S->name[a], S->name[b], sim[a][b]); - } - } - free_int(sim, -1); - } - - for (c=0; cseq[a],S->seq[b],mat,gop,gep,nch, param); - for (chan=0; chan< nch; chan++) - fmat[chan]=pavie_aln2fmat (A, fmat[chan], idmat, id_threshold,chan, nch, param); - - free_aln (A); - output_completion ( stderr,c,tot,1, ""); - } - } - fprintf ( stderr, "\n\tSample_size: %d Used alignments: %d\n", tot, id_thres_used_aln); - return fmat; -} - - - -int **pavie_fmat2pavie_logodd_mat (double **fmat, char *alp) -{ - int s1, s2,S1, S2; - double r1, r2; - int **mat; - int a, b; - int ns; - int logodd=0; - - - ns=strlen (alp); - mat=declare_int (256, 256); - - for ( a=0; alen_aln/nch); - start=l*ch; - A->len_aln=l; - A->seq_al[0]+=start; - A->seq_al[1]+=start; - - - - if ( fmat==NULL)fmat=declare_double(300, 300); - - if ( vstrstr (param, "_TWE00_"))w=100; - else if ( vstrstr (param, "_TWE01_"))w=pavie_aln2id (A, 1); - else if ( vstrstr (param, "_TWE02_"))w=pavie_aln2id (A, 2); - else if ( vstrstr (param, "_TWE03_"))w=pavie_aln2id (A, 3); - else if ( vstrstr (param, "_TWE04_"))w=pavie_aln2id (A, 4); - else if ( vstrstr (param, "_TWE05_"))w=pavie_aln2id (A, 5); - else if ( vstrstr (param, "_TWE06_"))w=pavie_aln2id (A, 6); - - else w=pavie_aln2id (A, 3); - - id=pavie_aln2id(A, 3); - - - if (idlen_aln*=nch; - A->seq_al[0]-=start;A->seq_al[1]-=start; - return fmat; - } - else - { - id_thres_used_aln++; - for ( a=0; alen_aln; a++) - { - c1=tolower(A->seq_al[0][a]); - c2=tolower(A->seq_al[1][a]); - fmat[c1][c2]+=w; - - fmat[c1][0]++; - fmat[c1][1]+=w; - - fmat[c2][0]++; - fmat[c1][1]+=w; - - fmat[0][0]+=2; - fmat[1][1]+=2*w; - } - A->len_aln*=nch; - A->seq_al[0]-=start;A->seq_al[1]-=start; - - return fmat; - } -} - -int pavie_mat2pavie_id_mat ( int **mat,char *in_name, char *alp, char *ignore, char *force,int T, char *out_name) -{ - int n1, n2, n3; - int s1, s2, S1, S2; - int a, b; - int **idmat; - - if (mat==NULL && in_name==NULL) return 0; - else if (mat==NULL) - { - mat=read_matrice (in_name); - } - - - idmat=declare_int ( 256, 256); - n1=strlen (alp); - n2=strlen (ignore); - n3=strlen (force); - - for (a=0; a< n1; a++) - for ( b=0; b=T)?PAVIE_MAT_FACTOR:0; - } - for (a=0; alen_aln=strlen (seq0); - A->nseq=2; - A->score=A->score_aln=100; - sprintf ( A->seq_al[0], "%s", seq1); - sprintf ( A->seq_al[1], "%s", seq0); - return A; - } - - - x=seq0; - y=seq1; - - XL=strlen (x)/nch; - YL=strlen (y)/nch; - - - ax=vcalloc ( (YL+XL)*nch+1, sizeof (char)); - ay=vcalloc ( (YL+XL)*nch+1, sizeof (char)); - bufx=vcalloc ( (YL+XL)*nch+1, sizeof (char)); - bufy=vcalloc ( (YL+XL)*nch+1, sizeof (char)); - - F=declare_double (XL+2, YL+2); - T=declare_int (XL+2, YL+2); - - - /*Fill stage*/ - F[0][0] = 0; - for(i = 1; i <=XL; i++) - { - - F[i][0] = F[i-1][0]+pavie_score (x,i-1,NULL,GAP_CODE,mat, gop, gep, nch, factor, param) /*CL->M[x[i-1]-'A'][gap]*/; - - T[i][0] = GY; - } - - for(j = 1; j <= YL; j++) - { - - F[0][j] = F[0][j-1]+pavie_score (NULL,GAP_CODE,y,j-1,mat, gop, gep, nch, factor, param)/*CL->M[y[j-1]-'A'][gap]*/; - T[0][j] = GX; - } - - - for(i = 1; i <= XL; i++) - { - for(j = 1; j <= YL; j++) - { - - match = F[i-1][j-1] + /*CL->M[x[i-1]-'A'][y[j-1]-'A']*/pavie_score (x,i-1,y, j-1,mat, gop, gep, nch, 1, param); - gap_inY= F[i-1][j] + /*CL->M[x[i-1]-'A'][gap]*/ pavie_score (x,i-1, NULL,GAP_CODE,mat, gop, gep, nch, (j==YL)?factor:1, param); - gap_inX= F[i][j-1] + /*+ CL->M[y[j-1]-'A'][gap]*/ pavie_score (NULL,GAP_CODE,y, j-1,mat, gop, gep, nch, (i==XL)?factor:1, param); - - if ( match >= gap_inY && match >=gap_inX){F[i][j]=match; T[i][j]=MXY;} - else if ( gap_inX>=gap_inY){F[i][j]=gap_inX; T[i][j]=GX;} - else {F[i][j]=gap_inY; T[i][j]=GY;} - } - } - /*Trace back stage*/ - - - i = XL; - j = YL; - len=0; - while(!(i==0 && j==0)) - { - - if (T[i][j]==MXY) - { - ax[len]=1;i--; - ay[len]=1;j--; - } - else if ( T[i][j]==GY) - { - ax[len]=1;i--; - ay[len]='-'; - } - else if ( T[i][j]==GX) - { - ax[len]='-'; - ay[len]=1;j--; - } - len++; - } - - for (a=0; alen_aln=strlen (ax); - A->nseq=2; - A->score=A->score_aln=F[XL][YL]; - - for (a=0, b=0, c=0; alen_aln; a++) - { - if (ax[a]==1)ax[a]=seq0[b++]; - if (ay[a]==1)ay[a]=seq1[c++]; - } - - - - sprintf ( A->seq_al[0], "%s", ax); - sprintf ( A->seq_al[1], "%s", ay); - - vfree (ax); vfree(ay);vfree (bufx); vfree (bufy);free_double(F, -1); free_int (T, -1); - return A; -} - - -int pavie_score (char *s0,int p0, char *s1,int p1,char **mat_file, double *gop, double *gep, int nch, float factor, char *param) - - { - static char *cmat; - static int ***mat; - static int use_age; - static int mchscore=-1; - - int l0, l1, c0, c1; - int a, score=0; - - if ( !use_age) - { - strget_param ( param, "_AGECHANNEL", "-1", "%d", &use_age); - - } - if (mchscore==-1) - { - strget_param (param, "_MCHSCORE", "0", "%d", &mchscore); - - } - - if ( !cmat || !mat_file || !strm (cmat, mat_file[0])) - { - if ( !cmat)cmat=vcalloc ( 100, sizeof (char)); - sprintf ( cmat, "%s", (mat_file)?mat_file[0]:"idmat"); - if ( !mat)mat=vcalloc ( nch, sizeof (int**)); - for ( a=0; a< nch; a++) - { - if ( mat[a])free_int (mat[a], -1); - mat[a]=read_matrice ((mat_file)?mat_file[a]:"idmat"); - - } - } - - l0=(s0)?strlen (s0)/nch:0; - l1=(s1)?strlen (s1)/nch:0; - - if (mchscore==0); - else if (mchscore==1) score=999999; - else if (mchscore==2)score=-9999999; - else - { - HERE ("Error: mchscore >2 [FATAL]\n"); - exit (EXIT_FAILURE); - } - for ( a=0; a< nch; a++) - { - int s; - c0=(s0)?s0[l0*a+p0]-'A':p0; - c1=(s1)?s1[l1*a+p1]-'A':p1; - if ( c0==GAP_CODE)s=(gep[a]!=0)?gep[a]:mat[a][c1][GAP_CODE]; - else if ( c1==GAP_CODE)s=(gep[a]!=0)?gep[a]:mat[a][c0][GAP_CODE]; - else s=mat[a][c0][c1]; - - if (mchscore==0)score+=s; - else if (mchscore==1)score=MIN(s, score); - else if (mchscore==2)score=MAX(s, score); - - - } - - if ( use_age>0 && s0 && s1) - { - - int a0, a1; - int s; - - a0=s0[strlen(s0)+1]; - a1=s1[strlen(s1)+1]; - - a0+=p0; - a1+=p1; - s=use_age*FABS((a0-a1))*-1; - - if (mchscore==0)score+=s; - else if (mchscore==1)score=MIN(s, score); - else if (mchscore==2)score=MAX(s, score); - } - - - score*=factor; - return score; - } -Sequence * seq2pavie_seq ( Sequence *S, int nch) - { - char *buf, *p; - int a, b; - - S->nseq/=nch; - - for (b=0; bnseq; b++) - { - - buf=vcalloc ((strlen (S->seq[b])*nch)+10, sizeof (char)); - for ( a=0; a< nch; a++) - { - strcat (buf, S->seq[b+(S->nseq)*a]); - vfree ( S->seq[b+(S->nseq)*a]); - } - S->seq[b]=buf; - /*Code Age on the byte just after the string termination*/ - - if ((p=strstr (S->seq_comment[b], "FIRSTYEAR"))) - { - sscanf ( p, "FIRSTYEAR%d", (int*)&(S->seq[strlen(buf)+1])); - } - - } - return S; - } -char **seq2pavie_alp (Sequence *S, int nch) - { - int a, n; - char **alp; - - n=S->nseq/nch; - alp=vcalloc (nch, sizeof (char*)); - for ( a=0; a< nch; a++) - { - alp[a]=array2alphabet (S->seq+n*a, n, "-."); - } - return alp; - } -FILE *output_pavie_aln (Alignment *A, int nch, FILE *fp) -{ - int a, b, c,d, l, start, end; - Alignment *B; - Sequence *S; - B=declare_aln2(A->nseq*nch+nch, A->len_aln); - - - - l=A->len_aln/nch; - - for ( a=0; a< nch; a++) - { - for (b=0; b< A->nseq; b++, B->nseq++) - { - sprintf (B->name[B->nseq], "%s.c%d", A->name[b], a); - start=l*a;end=start+l; - for (d=0,c=start; cseq_al[B->nseq][d]=A->seq_al[b][c]; - B->seq_al[B->nseq][d]='\0'; - } - if ( a!=nch-1) - { - B->name[B->nseq][0]='\0'; - for ( b=0; bseq_al[B->nseq][b]='^'; - B->nseq++; - } - } - - B->len_aln=l; - fp=output_Alignment_without_header (B,fp); - S=free_aln (B); - free_sequence (S, S->nseq); - return fp; - -} -char **output_pavie_mat_list ( int ***current_mat, double *gep, char **alp, int nch,char *prefix,int cycle, char **mat_name) -{ - int a; - char mat_list_name[100]; - FILE *fp; - char latest[1000]; - char current[1000]; - char command[1000]; - - sprintf ( mat_list_name, "pavie_matrix%s.cycle_%d.mat_list", prefix, cycle+1); - fp=vfopen ( mat_list_name, "w"); - fprintf ( stderr, "\n\tOutput Pavie Matrix: %s", mat_list_name); - for ( a=0; a< nch; a++) - { - sprintf ( mat_name[a], "pavie_matrix%s.ch_%d.cy_%d.pavie_mat", prefix,a+1, cycle+1); - sprintf (latest, "pavie_matrix%s.ch_%d.cy_last.pavie_mat",prefix,a+1); - sprintf ( current, "matrix.ch%d.pavie_mat", a); - fprintf ( stderr, "\n\t Channel %d Matrix: %s",a+1, mat_name[a]); - output_pavie_mat (current_mat[a],mat_name[a],gep[a], alp[a]); - sprintf ( command, "cp %s %s", mat_name[a], latest); - system (command); - sprintf ( command, "cp %s %s", latest, current); - system (command); - fprintf ( fp, "%s\n", mat_name[a]); - } - vfclose (fp); - return mat_name; -} - - -int pavie_pair_wise (Alignment *A,int *ns, int **l_s,Constraint_list *CL ) -{ - double **F; int **T; - char *x,*y; - char *ax, *ay; - int XL, YL, len; - int i, j; - double match, gap_inX, gap_inY, MXY=1, GX=2, GY=3; - int gap=GAP_CODE; - char *ix, *iy; - float factor=0.5; - - - /*factor: - decreases terminal gap penalties with a factor X - factor=1: terminal gap penalties <=> internal gap penalties - */ - - - - x=A->seq_al[l_s[0][0]]; - y=A->seq_al[l_s[1][0]]; - XL=strlen (x); - YL=strlen (y); - - ax=vcalloc ( YL+XL+1, sizeof (char)); - ay=vcalloc ( YL+XL+1, sizeof (char)); - - - F=declare_double (XL+2, YL+2); - T=declare_int (XL+2, YL+2); - - - /*Fill stage*/ - F[0][0] = 0; - for(i = 1; i <=XL; i++) - { - - F[i][0] = F[i-1][0]+(CL->M[x[i-1]-'A'][gap]*factor); - T[i][0] = GY; - } - - for(j = 1; j <= YL; j++) - { - F[0][j] = F[0][j-1]+CL->M[y[j-1]-'A'][gap]*factor; - T[0][j] = GX; - } - - - for(i = 1; i <= XL; i++) - { - for(j = 1; j <= YL; j++) - { - - match = F[i-1][j-1] + CL->M[x[i-1]-'A'][y[j-1]-'A']; - gap_inY= F[i-1][j] + (CL->M[x[i-1]-'A'][gap]*(j==YL)?factor:1); - gap_inX= F[i][j-1] + (CL->M[y[j-1]-'A'][gap]*(i==XL)?factor:1); - - if ( match >= gap_inY && match >=gap_inX){F[i][j]=match; T[i][j]=MXY;} - else if ( gap_inX>=gap_inY){F[i][j]=gap_inX; T[i][j]=GX;} - else {F[i][j]=gap_inY; T[i][j]=GY;} - } - } - /*Trace back stage*/ - A->score=A->score_aln=F[XL][YL]; - - i = XL; - j = YL; - len=0; - while(!(i==0 && j==0)) - { - - if (T[i][j]==MXY) - { - ax[len]=x[--i]; - ay[len]=y[--j]; - } - else if ( T[i][j]==GY) - { - ax[len]=x[--i]; - ay[len]='-'; - } - else if ( T[i][j]==GX) - { - ax[len]='-'; - ay[len]=y[--j]; - } - len++; - } - ax[len]='\0'; - ay[len]='\0'; - - ix=invert_string (ax); iy=invert_string(ay); - A=realloc_aln (A,len+1); - - sprintf ( A->seq_al[l_s[0][0]], "%s", ix); - sprintf ( A->seq_al[l_s[1][0]], "%s", iy); - A->nseq=2; - A->len_aln=len; - - vfree (ax); vfree(ay);vfree(ix); vfree(iy); free_double(F, -1); free_int (T, -1); - return A->score; -} - -float pavie_aln2id ( Alignment *A, int mode) -{ - int a, id=0, match=0, l1=0, l2=0, r1, r2, is_res1, is_res2; - - - - for (a=0; alen_aln; a++) - { - r1=A->seq_al[0][a]; - r2=A->seq_al[1][a]; - - - is_res1=(!is_gap(r1) && r1!='x' && r1!='X')?1:0; - is_res2=(!is_gap(r2) && r2!='x' && r2!='X')?1:0; - - l1+=is_res1; - l2+=is_res2; - - - if ( is_res1 && is_res2 ) - { - match++; - id+=(r1==r2)?1:0; - } - } - - - if ( mode==1)return (match==0)?0:((id*100)/match); - else if (mode ==2) return (A->len_aln==0)?0:((id*100)/A->len_aln); - else if (mode ==3) return (MIN(l1,l2)==0)?0:((id*100)/(MIN(l1,l2))); - else if (mode ==4) return (MAX(l1,l2)==0)?0:((id*100)/(MAX(l1,l2))); - else if (mode ==5)return (A->score_aln * -1)/*/PAVIE_MAT_FACTOR*/; - else if (mode ==6)return ((MAX(l1,l2)==0)?0:((A->score_aln)/(MAX(l1,l2))))*-1; - else - { - fprintf ( stderr, "\nUnknown Mode [pavie_aln2id:FATAL:%s]", PROGRAM); - myexit (EXIT_FAILURE); - return EXIT_FAILURE; - } - -} - -int check_pavie_cl ( char *string) -{ - if ( !string || string[0]=='\0' ) return 1; - else if (( string[0]!='_') ||string [strlen (string)-1]!='_') - { - fprintf ( stderr, "ERROR: parameters must start and finish with an underscore: _parameters_ [FATAL:%s]\n", PROGRAM); - myexit (EXIT_FAILURE); - } - return 1; -} - -Alignment *pavie_seq2pavie_sort ( Sequence *S, char *mat, char *mode) -{ - int a, b, c=0, avg_c; - int **avg; - double **dm; - Alignment *A=NULL; - char **new_order; - - if ( vstrstr (mode, "_IDSORT_") || vstrstr (mode, "_MASTERSORT")) - { - char *buf; - buf=vcat ( mode, "_MATDIST_NOPRINT_"); - dm=pavie_seq2pavie_aln (S, mat,buf); - avg=declare_int (S->nseq, 2); - if ( vstrstr (mode,"_IDSORT_")) - { - - for ( a=0; a< S->nseq; a++) - { - avg[a][0]=a; - for ( b=0; bnseq; b++) - if ( b!=a)avg[a][1]+=(int) dm[a][b]; - avg[a][1]/=S->nseq-1; - } - sort_int ( avg, 2, 1, 0, S->nseq-1); - - c=avg[0][0]; - avg_c=avg[0][1]; - fprintf ( stderr, "\nAVG\t %s\t %s\t %d",S->name[c],"avg", avg_c); - } - else if ( vstrstr (mode, "_MASTERSORT")) - { - char name[100]; - char *s; - s=vstrstr(mode, "_MASTERSORT"); - mode=substitute ( mode, "_", " "); - sscanf (s, " MASTERSORT%s", name); - - mode=substitute (mode, " ", "_"); - c=name_is_in_list ( name, S->name, S->nseq, 100); - - if ( c==-1) - { - fprintf ( stderr, "\nERROR: Sequence %s is not in the dataset [FATAL:%s]", name, PROGRAM); - myexit (EXIT_FAILURE); - } - } - - for ( a=0; anseq; a++) - { - avg[a][0]=a; - if ( a!=c)avg[a][1]=dm[c][a]; - else avg[a][1]=-1; - } - - sort_int ( avg, 2, 1, 0, S->nseq-1); - - new_order=declare_char ( S->nseq, 100); - sprintf (new_order[0], "%s", S->name[c]); - for ( a=1; anseq; a++) - { - sprintf ( new_order[a], "%s", S->name[avg[a][0]]); - - fprintf ( stderr, "\nTOP\t %s\t %s\t %d", S->name[c],new_order[a] , avg[a][1]); - } - - fprintf ( stderr, "\n"); - A=seq2aln (S, NULL,RM_GAP); - A=reorder_aln (A, new_order, A->nseq); - vfree ( buf); - free_double(dm, -1);free_int (avg, -1);free_char (new_order, -1); - } - else if ( vstrstr ( mode, "_TREESORT_")) - { - A=pavie_seq2pavie_msa (S, mat, mode); - } - else - { - fprintf ( stderr, "\nERROR: pavie_seq2sort <_IDSORT_ | _TREESORT_>"); - } - return A; - -} -NT_node pavie_seq2pavie_tree (Sequence *S, char *mat, char *mode) -{ - double **dm; - char *tree_name,*buf; - - - buf=vcat (mode,"_MATDIST_NOPRINT_"); - dm=pavie_seq2pavie_aln (S, mat,buf); - dist2nj_tree (dm,S->name, S->nseq,tree_name=vtmpnam (NULL)); - - free_double(dm, -1);vfree (buf); - - return main_read_tree (tree_name); -} - -Alignment* pavie_seq2pavie_msa ( Sequence *S, char *mat_in, char *mode) -{ - Constraint_list *CL; - char **alp, *s; - Alignment *A; - NT_node **FT, T; - int a; - char mat[100]; - - - A=seq2aln (S, NULL, RM_GAP); - CL=declare_constraint_list (S, NULL, NULL, 0, NULL, NULL); - sprintf ( CL->dp_mode, "myers_miller_pair_wise"); - sprintf ( CL->tree_mode, "nj"); - sprintf ( CL->distance_matrix_mode, "idscore"); - CL=choose_extension_mode ("matrix", CL); - - if ( !is_matrix (mat_in)) - { - FILE *fp; - fp=vfopen ( mat_in, "r"); - fscanf (fp, "%s", mat); - vfclose (fp); - add_warning( stderr, "\nWarning: Multiple Channel Not Supported. Used First Channel Only for MSA [Matrix: %s][WARNING:%s]", mat, PROGRAM); - } - else - { - sprintf ( mat, "%s", mat_in); - } - - CL->M=read_matrice (mat); - CL->gop=0; - - alp=seq2pavie_alp (S, 1); - CL->gep=paviemat2gep(CL->M, alp[0]); - - - CL->pw_parameters_set=1; - CL->local_stderr=stderr; - - if ( vstrstr (mode, "_QUICKTREE_")) - { - FT=make_tree (A, CL, CL->gop, CL->gep,S, NULL,MAXIMISE); - T=FT[3][0]; - } - else if ( (s=vstrstr (mode, "_USETREE"))) - { - char fname[100]; - mode=substitute ( mode, "_", " "); - sscanf (s, " USETREE%s", fname); - mode=substitute (mode, " ", "_"); - T=main_read_tree (fname); - } - else - { - T=pavie_seq2pavie_tree ( S, mat_in, mode); - } - - for ( a=0; a< A->nseq; a++)ungap (A->seq_al[a]); - - tree_aln (T->left,T->right,A,(CL->S)->nseq, CL); - A=reorder_aln ( A,A->tree_order,A->nseq); - - return A; -} -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/pb_util_read_seq_util.c b/binaries/src/tcoffee/t_coffee_source/pb_util_read_seq_util.c deleted file mode 100644 index f8c6c44..0000000 --- a/binaries/src/tcoffee/t_coffee_source/pb_util_read_seq_util.c +++ /dev/null @@ -1,374 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" - - -/* -* Prototypes -*/ - - -void fatal(char *,...); -void error(char *,...); -void warning(char *,...); -char *rtrim(char *); -char *blank_to_(char *); /* DES change blanks to _ */ -char *upstr(char *); -char *lowstr(char *); -void getstr(char *,char *); -double getreal(char *,double,double,double); -int getint(char *,int,int,int); -void do_system(void); -Boolean linetype(char *,char *); -Boolean blankline(char *); -void get_path(char *,char *); - -/* -* ckalloc() -* -* Tries to allocate "bytes" bytes of memory. Exits program if failed. -* Return value: -* Generic pointer to the newly allocated memory. -*/ - -void *ckalloc(size_t bytes) -{ - register void *ret; - extern void *vcalloc (size_t nelem, size_t elsize); - - if( (ret = vcalloc(bytes, sizeof(char))) == NULL) -/* - if( (ret = vmalloc(bytes)) == NULL) -*/ - fatal("Out of memory\n"); - else - return ret; - return ret; -} - -/* -* ckvrealloc() -* -* Tries to vreallocate "bytes" bytes of memory. Exits program if failed. -* Return value: -* Generic pointer to the re-allocated memory. -*/ - -void *ckvrealloc(void *ptr, size_t bytes) -{ - register void *ret; - extern void *vrealloc (void *ptr, size_t size); - - if( (ret = vrealloc(ptr, bytes)) == NULL) - fatal("Out of memory\n"); - else - return ret; - return ret; -} - -/* -* ckfree() -* -* Tries to free memory allocated by ckalloc. -* Return value: -* None. -*/ - -void ckfree(void *ptr) -{ - vfree(ptr); -} - - -/* -* fatal() -* -* Prints error msg to stderr and exits. -* Variadic parameter list can be passed. -* -* Return values: -* none -*/ - -void fatal( char *msg,...) -{ - va_list ap; - - va_start(ap,msg); - fprintf(stderr,"\n\nFATAL ERROR: "); - vfprintf(stderr,msg,ap); - fprintf(stderr,"\n\n"); - va_end(ap); - myexit(EXIT_FAILURE); -} - -/* -* error() -* -* Prints error msg to stderr. -* Variadic parameter list can be passed. -* -* Return values: -* none -*/ - -void error( char *msg,...) -{ - va_list ap; - - va_start(ap,msg); - fprintf(stderr,"\n\nERROR: "); - vfprintf(stderr,msg,ap); - fprintf(stderr,"\n\n"); - va_end(ap); -} - -/* -* warning() -* -* Prints warning msg to stderr. -* Variadic parameter list can be passed. -* -* Return values: -* none -*/ - -void warning( char *msg,...) -{ - va_list ap; - - va_start(ap,msg); - fprintf(stderr,"\n\nWARNING: "); - vfprintf(stderr,msg,ap); - fprintf(stderr,"\n\n"); - va_end(ap); -} - - -/* -* rtrim() -* -* Removes trailing blanks from a string -* -* Return values: -* Pointer to the processed string -*/ - -char * rtrim(char *str) -{ - register int p; - - p = strlen(str) - 1; - - while ( isspace(str[p]) ) - p--; - - str[p + 1] = EOS; - - return str; -} - - -/* -* blank_to_() -* -* Replace blanks in a string with underscores -* -* Also replaces , ; : ( or ) with _ -* -* Return value: -* Pointer to the processed string -*/ - -char * blank_to_(char *str) -{ - int i,p; - - - p = strlen(str) - 1; - for(i=0;i<=p;i++) - { - if( strrchr(";,():",str[i]))str[i]='_'; - else if (isspace(str[i])); - } - return str; -} - - -/* -* upstr() -* -* Converts string str to uppercase. -* Return values: -* Pointer to the converted string. -*/ - -char * upstr(char *str) -{ - register char *s = str; - - while( (*s = toupper(*s)) ) - s++; - - return str; -} - -/* -* lowstr() -* -* Converts string str to lower case. -* Return values: -* Pointer to the converted string. -*/ - -char * lowstr(char *str) -{ - register char *s = str; - - while( (*s = tolower(*s)) ) - s++; - - return str; -} - -void getstr(char *instr,char *outstr) -{ - fprintf(stdout,"%s: ",instr); - fgets(outstr, 100, stdin); -} - -double getreal(char *instr,double minx,double maxx,double def) -{ - int status; - double ret; - char line[MAXLINE]; - - while(TRUE) { - fprintf(stdout,"%s (%.1lf-%.1lf) [%.1lf]: ",instr,minx,maxx,def); - fgets(line, MAXLINE, stdin); - status=sscanf(line,"%lf",&ret); - if(status == EOF) return def; - if(ret>maxx) { - fprintf(stderr,"ERROR: Max. value=%.1lf\n\n",maxx); - continue; - } - if(retmaxx) { - fprintf(stderr,"ERROR: Max. value=%d\n\n",(pint)maxx); - continue; - } - if(ret-1;--i) { - if(str[i]==DIRDELIM) { - i = -1; - break; - } - if(str[i]=='.') break; - } - if(i<0) - strcat(path,"."); - else - path[i+1]=EOS; -} -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/pb_util_read_sequence.c b/binaries/src/tcoffee/t_coffee_source/pb_util_read_sequence.c deleted file mode 100644 index 8f684df..0000000 --- a/binaries/src/tcoffee/t_coffee_source/pb_util_read_sequence.c +++ /dev/null @@ -1,562 +0,0 @@ -/********* Sequence input routines for CLUSTAL W *******************/ -/* DES was here. FEB. 1994 */ -/* Now reads PILEUP/MSF and CLUSTAL alignment files */ - -#include -#include -#include -#include -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" - -/* -* Prototypes -*/ - -extern Boolean linetype(char *,char *); -extern Boolean blankline(char *); -extern void warning(char *,...); -extern void error(char *,...); -extern char * rtrim(char *); -extern char * blank_to_(char *); -extern void getstr(char *,char *); - -void fill_chartab(void); - - -static void get_seq(char *,char *,int *,char *); -static void get_clustal_seq(char *,char *,int *,char *,int); -static void get_msf_seq(char *,char *,int *,char *,int); -static void check_infile(int *); - - - - -static int count_clustal_seqs(void); -static int count_msf_seqs(void); - -/* - * Global variables - */ - -static FILE *fin; - - -char *amino_acid_codes = "ABCDEFGHIKLMNPQRSTUVWXYZ-"; /* DES */ -char *nucleic_acid_order = "ACGTUN"; -static int seqFormat; -static char chartab[128]; - -void fill_chartab(void) /* Create translation and check table */ -{ - register int i; - register int c; - - - for(i=0;i<128;chartab[i++]=0); - for(i=0,c=0;c<=amino_acid_codes[i];i++) - chartab[c]=chartab[tolower(c)]=c; - -} - -static void get_msf_seq(char *sname,char *seq,int *len,char *tit,int seqno) -/* read the seqno_th. sequence from a PILEUP multiple alignment file */ -{ - static char *line; - int i,j,k; - unsigned char c; - if ( !line)line=vcalloc ( (MAXLINE+1), sizeof (char)); - - fseek(fin,0,0); /* start at the beginning */ - - *len=0; /* initialise length to zero */ - for(i=0;;i++) { - if(fgets(line,MAXLINE+1,fin)==NULL) return; /* read the title*/ - if(linetype(line,"/") ) break; /* lines...ignore*/ - } - - while (fgets(line,MAXLINE+1,fin) != NULL) { - if(!blankline(line)) { - - for(i=1;i') - break; /* EOL */ - - if( (c=chartab[c])) - {seq[++(*len)]=c; - } - } - if(*len == SEQ_MAX_LEN || c == '>') break; - } - break; -/**********************************************/ - case GDE: - - while(*line != '#' ||*line != '%' ) - fgets(line,MAXLINE+1,fin); - - - - - for (i=1;i<=MAXNAMES;i++) { - if (line[i] == '(' || line[i] == '\n') - { - i--; - break; - } - sname[i-1] = line[i]; - } - sname[i]=EOS; - offset=0; - if (sname[i-1] == '(') sscanf(&line[i],"%d",&offset); - else offset = 0; - for(i=MAXNAMES-1;i > 0;i--) - if(isspace(sname[i])) { - sname[i]=EOS; - break; - } - blank_to_(sname); - - - *tit=EOS; - - *len=0; - for (i=0;i SEQ_MAX_LEN) - { - error("Sequence too long. Maximum is %d",(pint)SEQ_MAX_LEN); - return 0; /* also return zero if too many */ - } - - - - for ( a=0; a') { /* no */ - seqFormat=(line[3] == ';')?PIR:PEARSON; /* distinguish PIR and Pearson */ - (*nseqs)++; - } - else if((*line == '"') || (*line == '%') || (*line == '#')) { - seqFormat=GDE; /* GDE format */ - if (*line == '%') { - (*nseqs)++; - - } - else if (*line == '#') { - (*nseqs)++; - - } - } - else { - seqFormat=UNKNOWN; - return; - } - - while(fgets(line,MAXLINE+1,fin) != NULL) { - switch(seqFormat) { - case EMBLSWISS: - if( linetype(line,"ID") ) - (*nseqs)++; - break; - case PIR: - case PEARSON: - if( *line == '>' ) - (*nseqs)++; - break; - case GDE: - if(( *line == '%' ) ) - (*nseqs)++; - else if (( *line == '#') ) - (*nseqs)++; - break; - case CLUSTAL: - *nseqs = count_clustal_seqs(); -/* DES */ /* fprintf(stdout,"\nnseqs = %d\n",(pint)*nseqs); */ - fseek(fin,0,0); - return; - break; - case MSF: - *nseqs = count_msf_seqs(); - fseek(fin,0,0); - return; - break; - case USER: - default: - break; - } - } - fseek(fin,0,0); -} - - -static int count_clustal_seqs(void) -/* count the number of sequences in a clustal alignment file */ -{ - static char *line; - int nseqs; - - if ( !line)line=vcalloc ( (MAXLINE+1), sizeof (char)); - - while (fgets(line,MAXLINE+1,fin) != NULL) { - if(!blankline(line)) break; /* Look for next non- */ - } /* blank line */ - nseqs = 1; - - while (fgets(line,MAXLINE+1,fin) != NULL) { - if(blankline(line)) return nseqs; - nseqs++; - } - - return 0; /* if you got to here-funny format/no seqs.*/ -} - -static int count_msf_seqs(void) -{ -/* count the number of sequences in a PILEUP alignment file */ - - static char *line; - int nseqs; - - if ( !line)line=vcalloc ( (MAXLINE+1), sizeof (char)); - - while (fgets(line,MAXLINE+1,fin) != NULL) { - if(linetype(line,"/")) break; - } - - while (fgets(line,MAXLINE+1,fin) != NULL) { - if(!blankline(line)) break; /* Look for next non- */ - } /* blank line */ - nseqs = 1; - - while (fgets(line,MAXLINE+1,fin) != NULL) { - if(blankline(line)) return nseqs; - nseqs++; - } - - return 0; /* if you got to here-funny format/no seqs.*/ -} - - - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/perl_header_lib.h b/binaries/src/tcoffee/t_coffee_source/perl_header_lib.h deleted file mode 100644 index 68711d6..0000000 --- a/binaries/src/tcoffee/t_coffee_source/perl_header_lib.h +++ /dev/null @@ -1,5955 +0,0 @@ -char *PerlScriptName[]={"rec_sum.pl","count.pl","p\ -rocess_list.pl","make_license.pl","CCsed.script","\ -msa2bootstrap.pl","t_coffee_dpa","t_coffee_dpa2","\ -tc_generic_method.pl","generic_method.tc_method","\ -clustalw_method.tc_method","extract_from_pdb","ins\ -tall.pl","clean_cache.pl","mocca","dalilite.pl","w\ -ublast.pl","blastpgp.pl","RNAplfold2tclib.pl","fas\ -ta_seq2RNAplfold_templatefile.pl","fasta_seq2hmmto\ -p_fasta.pl","fasta_seq2consan_aln.pl","clustalw_al\ -n2fasta_aln.pl","msf_aln2fasta_aln.pl","blast_aln2\ -fasta_aln.pl","blast_xml2fasta_aln.pl","fasta_aln2\ -fasta_aln_unique_name.pl","newick2name_list.pl","e\ -xcel2fasta.pl","any_file2unix_file.pl","EndList"};\ -char *PerlScriptFile[]={"use File::Copy;\nuse Env \ -qw(HOST);\nuse Env qw(HOME);\nuse Env qw(USER);\n$\ -x_field=0;\n$y_field=1;\n$interval=0;\n$file=\"std\ -in\";\n$print_avg=1;\n$print_sd=0;\n$print_sum=0;\\ -n$print_n=0;\nforeach $value ( @ARGV)\n {\n if \ -($value ne $ARGV[$np]) \n {\n ;\n }\n \ -elsif($value eq \"-print_all\")\n {\n $pri\ -nt_sd=$print_avg=$print_n=$print_sum=1;\n $np+\ -+;\n }\n elsif($value eq \"-print_sum\")\n \ - {\n $print_sum=1;\n $print_avg=0;\n $\ -np++;\n }\n elsif($value eq \"-print_n\")\n \ - {\n $print_n=1;\n $print_avg=0;\n $n\ -p++;\n }\n elsif($value eq \"-print_avg\")\n \ - {\n $print_avg=1;\n $print_avg=0;\n \ - $np++;\n }\n elsif($value eq \"-sd\")\n {\ -\n $print_sd=1;\n $print_avg=0;\n $np+\ -+;\n }\n elsif($value eq \"-h\")\n {\n \ - $header=1;\n $np++;\n }\n elsif ($value e\ -q \"-i\")\n {\n $interval= $ARGV[++$np];\n\ - $np++;\n }\n elsif ($value eq \"-r\")\ -\n {\n $min= $ARGV[++$np];\n $max= $AR\ -GV[++$np];\n $np++;\n }\n \n elsif ($v\ -alue eq \"-x\")\n {\n $x_field= $ARGV[++$n\ -p]-1;\n $np++;\n }\n elsif ($value eq \ -\"-y\")\n {\n \n while ($ARGV[$np+1]\ - && !($ARGV[$np+1]=~/\\-/))\n {\n $y_field[\ -$nyf++]=$ARGV[++$np]-1;\n $y_field_set=1;\n \ - }\n\n $np++;\n }\n elsif ($value eq \\ -"-file\")\n {\n $file= $ARGV[++$np];\n \ - $file_set=1;\n $np++;\n } \n el\ -sif ( $value eq \"h\" || $value eq \"-h\" || $val\ -ue eq \"-H\" || $value eq \"-help\" || $value eq \\ -"help\")\n {\n print STDOUT \"data_analyse: \ -Analyse and discretization of data\\n\";\n pri\ -nt STDOUT \" -file: ,.\\n\";\n print ST\ -DOUT \" -x: ,.......\ -........\\n\";\n print STDOUT \" \ --y: ,...............\\n\";\n print STDOUT \" -i:,...............\\n\";\n p\ -rint STDOUT \" -i:<0:only one interval>\\n\"\ -;\n print STDOUT \" -r:\\ -\n\";\n print STDOUT \" -sd: print stand\ -ard deviation on the Y\";\n print STDOUT \" \ - -h : print column header \\n\";\n exit (0\ -);\n }\n elsif ($value=~/-/)\n {\n print \\ -"$value is not a valid FLAG[FATAL]\\n\";\n exi\ -t (0);\n } \n elsif ($list eq \"\") \n {\n \ - $file=$ARGV[$np];\n $np++;\n }\n \n \n\ - }\n\n\n\n\n\nif ($file eq \"stdin\")\n {\n $\ -remove_file=1;\n $file=\"tmp$$\";\n open (F, \">$f\ -ile\");\n while ()\n {\n print F $_;\n }\ -\n close (F);\n \n ;}\n\n\nopen(F,$file);\n\nif (\ -$interval)\n {\n $interval_size=($max-$min)/$i\ -nterval;\n }\nwhile ()\n {\n $line=$_;\n \ - if (!/\\S/){next;}\n @list=($line=~/(\\S+)/g)\ -;\n \n if ($interval==0){$bin=0;}\n else{\ -$bin=int (($list[$x_field]-$min)/($interval_size))\ -;}\n\n \n if ($bin && $bin==$interval){$bin-\ --;}\n for ( $a=0; $a<$nyf; $a++)\n {\n $su\ -m{$a}{$bin}+=$list[$y_field[$a]];\n $sum2{$a}{$bin\ -}+=$list[$y_field[$a]]*$list[$y_field[$a]];\n $n{$\ -a}{$bin}++;\n }\n }\n\nif (!$interval){$inte\ -rval=1;}\nfor ( $a=0; $a<$interval; $a++)\n {\n \ - printf ( \"%3d %3d \", $interval_size*$a, $inter\ -val_size*($a+1));\n for ( $b=0; $b<$nyf; $b++) \ -\n {\n $i=$interval*$a;\n if ( $n{$b}{$a}==0)\ -\n {\n $avg=0;\n $sd=0;\n }\n else\n \ - {\n $avg=$sum{$b}{$a}/$n{$b}{$a};\n $sd=s\ -qrt($sum2{$b}{$a}*$n{$b}{$a}-$sum{$b}{$a}*$sum{$b}\ -{$a})/($n{$b}{$a}*$n{$b}{$a});\n }\n if ($print_\ -n) {printf \"%10.4f \", $n{$b}{$a};}\n if ($print_\ -sum){printf \"%10.4f \", $sum{$b}{$a};}\n if ($pri\ -nt_avg){printf \"%10.4f \", $avg}\n if ($print_sd)\ - {printf \"%10.4f \", $sd;}\n }\n printf (\ -\"\\n\");\n }\n\n\nif ( $remove_file){unlink $fil\ -e;}\n","use File::Copy;\nuse Env qw(HOST);\nuse En\ -v qw(HOME);\nuse Env qw(USER);\n\nforeach $v (@ARG\ -V){$cl.=$v;}\n\n\nif ( $cl=~/-k(\\d+)/){$k=$1;}\ne\ -lse {$k=1;}\nif ( $cl=~/-w(\\d+)/){$w=$1;}\nelse {\ -$w=-1;}\nif ( $cl=~/-p(\\d+)/){$p=$1;}\nelse {$p=-\ -1;}\n\nwhile ()\n {\n @l=($_=~/(\\S+)/g\ -);\n $v=$l[$k-1];\n if ( !$h{$v}){@ll=($v, @\ -ll);}\n \n if ( $w==-1)\n {$h{$v}++;}\n\ - else\n {$h{$v}+=$l[$w-1];}\n\n if ($p!\ -=-1){$print{$v}=$l[$p-1];}\n\n }\nforeach $v (@ll\ -)\n {\n print \"$v $print{$v} $h{$v}\\n\";\n \ -}\n","\nuse Env qw(HOST);\nuse Env qw(HOME);\nuse \ -Env qw(USER);\n$random_tag=int (rand 10000)+1;\n$u\ -nique_prefix=\"$$.$HOST.$random_tag\";\n$queue=\"d\ -istillery.and.mid\";\n$monitor=0;\n$stderr_file=\"\ -/dev/null\";\n$stdio_file=\"/dev/null\";\n$log_fil\ -e=\"/dev/null\";\n$pause_time=0;\n$max_sub_jobs=60\ -;\n$min_sub_jobs=30;\n$output_all=0;\n$var='\\$';\\ -n\nforeach $value ( @ARGV)\n {\n if ($value ne \ -$ARGV[$np]) \n {\n ;\n }\n elsif ($val\ -ue eq \"-max_sub_jobs\")\n {\n $max_sub_jo\ -bs= $ARGV[++$np];\n $np++;\n } \n elsi\ -f ($value eq \"-min_sub_jobs\" )\n {\n $mi\ -n_sub_jobs= $ARGV[++$np];\n $np++;\n }\ -\n elsif ($value eq \"-para\")\n {\n $para\ -=1;\n $monitor=1;\n $np++;\n }\n e\ -lsif ($value eq \"-monitor\") \n {\n $moni\ -tor=1;\n $np++;\n }\n elsif ($value eq \"-\ -no_monitor\") \n {\n $monitor=0;\n $np\ -++;\n }\n elsif ($value eq \"-queue\")\n {\ -\n $queue=$ARGV[++$np];\n $np++;\n } \\ -n elsif ($value eq \"-stderr_file\")\n {\n \ - $stderr_file=$ARGV[++$np];\n $np++;\n }\n\ - elsif ($value eq \"-stdio_file\")\n {\n $\ -stdio_file=$ARGV[++$np];\n $np++;\n }\n el\ -sif ($value eq \"-output_all\")\n {\n $out\ -put_all=1;\n $np++;\n }\n elsif ($value eq\ - \"-pause\") \n {\n $pause_time=$ARGV[++$n\ -p];\n $np++;\n }\n elsif ($value eq \"-log\ -\")\n {\n $log=1;\n \n \ -if ($ARGV[$np+1]=~/\\-\\S+/) \n {\n $\ -log_file=\"stderr\";\n }\n else \\ -n {\n $log_file=$ARGV[++$np]; \n +\ -+$np;\n \n }\n }\n elsif ( $valu\ -e eq \"-com\")\n {\n \n if (!$ARGV[$np+1]=~/\ -^\\'/) { $com=$ARGV[++$np];}\n else {$com=$ARGV[+\ -+$np];}\n\n $np++;\n }\n elsif ( $value e\ -q \"-check\")\n {\n \n if (!$ARGV[$np+1]\ -=~/^\\'/) { $check=$ARGV[++$np];}\n else {$che\ -ck=$ARGV[++$np];}\n $np++;\n }\n elsif ($com\ - eq \"\") \n {\n $com_set=1;\n $com=$A\ -RGV[$np];\n \n $np++;\n }\n elsif ($li\ -st eq \"\") \n {\n $list_set=1;\n $lis\ -t=$ARGV[$np];\n $np++;\n }\n elsif ( $var_\ -set eq \"\")\n {\n $var_set=1;\n $var=\ -$ARGV[$np];\n $np++;\n }\n }\n\n\n\n\nif (\ - $com eq \"\"){print \"You Need to Provide a Comma\ -nd [FATAL]\\n\";\n die;\n }\n\n\n\nif (\ -$list_set==0) \n {\n $x= int (rand 100000)+1\ -;\n $tmp_file_name=\"tmp_file_$x\";\n open (\ - TMP, \">$tmp_file_name\");\n while ()\n\ - {\n print TMP $_;\n }\n close (TMP);\ -\n open (F, $tmp_file_name);\n }\nelse \n \ - {\n open (F, $list);\n }\n\nif ($para==0) \\ -n {\n\n @tc_list= ;\n close (F); \n \ - \n foreach $val(@tc_list) \n {\n \ - \n \n \n $loc_com=$com;\n \ - if ($check){$loc_check=$check;}\n \n \ -@i_val=($val=~/([^\\s]+)/g);\n \n if (\ - $#i_val==0)\n {\n if ($check){$loc_check=~s/$\ -var/$i_val[0]/g;}\n $loc_com=~s/$var/$i_val[0]/\ -g;\n }\n else\n {\n for ($n=1; $n<=$#i_\ -val+1;$n++ )\n {\n \n $sub=\"$v\ -ar$n\";\n \n $loc_com=~s/$sub/$i_val\ -[$n-1]/g;\n if ($check){$loc_check=~s/$var/\ -$i_val[0]/g;}\n }\n }\n if ( $check &&\ - -e $loc_check)\n {\n print STDERR \"skipping \ -$loc_com...\\n\";\n }\n else\n {\n sy\ -stem \"$loc_com\";\n }\n }\n exit;\n }\\ -n\nelsif ($para==1) \n {\n print STDERR \"do\ - parallel execution of: \\\"$com $list\\\"\\n\";\n\ - \n if ($log==1) \n {\n if ($log_file eq \"s\ -tdout\" || $log_file eq \"stderr\" ) \n {\n $log\ -_file=\"\";\n }\n\n else \n {\n s\ -ystem \"echo LOG FILE> $log_file\";\n \n \ -}\n }\n else \n {\n open ( OUT, \">/dev/null\")\ -;\n }\n \n \n $id=0;\n $n_sub=0;\n whi\ -le ($val=) \n { \n $job_log[$i\ -d]=\"$HOME/tmp/$unique_prefix.$id.log_file\";\n \ - \n $job=$unique_prefix.\"_$id\";\n open \ -(JOB, \">$job\");\n \n $loc_com=$com;\n \ - chop $val;\n\n $loc_com=~s/\\$/$val/g;\n \n\ - print JOB \"#!/bin/csh\\n\";\n print JOB \ -\"#\\$ -cwd\\n\";\n print JOB \"#\\$ -N $uniqu\ -e_prefix\\n\";\n if ($queue && !($queue eq \" \ -\")) {print JOB \"#\\$ -l $queue\\n\";}\n prin\ -t JOB \"#\\n\"; \n print JOB \"$loc\ -_com\\n\";\n print JOB \"echo FINISHED >> $jo\ -b_log[$id]\\n\";\n print JOB \"pwd\\n\";\n \ - \n close (JOB);\n if ( $output_all==1)\n \ - {\n system \"qsub $job > $unique_prefix\"; \n \ - }\n else\n {system \"qsub $job -e $st\ -derr_file -o $stdio_file >$unique_prefix\"; \ - \n } \n\n\n\n print STDERR \"$id: $o\ -utput_all\\n\";\n $n_sub++;\n if ( $max_su\ -b_jobs && $n_sub==$max_sub_jobs) \n {\n $n_sub=m\ -onitor_process($min_sub_jobs,@job_log); \n \n \ - } \n \n unlink $unique_prefi\ -x;\n sleep $pause_time;\n $id++;\n }\n\ -\n close (OUT);\n close (F);\n\n print ST\ -DERR \"Your $id Jobs Have Been Submited (NAME=$uni\ -que_prefix)\\n\";\n monitor_process (0, @job_lo\ -g);\n foreach $file(@job_log) {if (-e $file) {u\ -nlink($file);}}\n \n }\n\nsub monitor_proces\ -s ( @job_list)\n {\n my (@job_list)=@_;\n \ - my $min_sub_jobs=shift (@job_list);\n my $n_su\ -b_jobs;\n my $finished;\n my $n=0;\n\n $n\ -_sub_jobs=-1;\n $finished=0;\n print STDERR \ -\"\\nMonitor Batch: [$min_sub_jobs]\";\n \n \ - while (!$finished && (($n_sub_jobs>$min_sub_job\ -s)|| $n_sub_jobs==-1) ) \n {\n $finished=1;\n $n_s\ -ub_jobs=0;\n $n=0;\n foreach $file (@job_list)\n \ - {\n \n if (-e $file){;}\n else \n {\\ -n $finished=0; $n_sub_jobs++;\n }\ -\n }\n system \"sleep 1\";\n }\n \ - \n return $n_sub_jobs;\n }\n \n \nif \ -($tmp_file_name){unlink($tmp_file_name);}\n","\n\n\ -foreach ($np=0; $np<=$#ARGV; $np++)\n {\n $v\ -alue=$ARGV[$np];\n\n if ($value eq \"-file\")\n\ - {\n $file= $ARGV[++$np];\n }\n \ -elsif ($value eq \"-type\")\n {\n $typ\ -e= $ARGV[++$np];\n }\n elsif ($value eq \"\ --institute\")\n {\n $institute= $ARGV[\ -++$np];\n }\n elsif ($value eq \"-author\"\ -)\n {\n $author= $ARGV[++$np];\n \ -}\n elsif ($value eq \"-date\")\n {\n \ - $date= $ARGV[++$np];\n }\n elsif ($val\ -ue eq \"-program\")\n {\n $program= $A\ -RGV[++$np];\n }\n elsif ($value eq \"-emai\ -l\")\n {\n $email= $ARGV[++$np];\n \ - }\n else\n {\n print \"$value is an unko\ -wn argument[FATAL]\\n\";\n exit (1);\n }\n }\ -\n\n\n\nopen F, $file || die;\nprint $INSTITUTE;\n\ -if ( $type eq \"c\"){print \"/********************\ -*************COPYRIGHT NOTICE*********************\ -*************/\\n\";}\nif ( $type eq \"perl\"){pri\ -nt \"#################################COPYRIGHT NO\ -TICE#################################/\\n\";}\nif \ -( $type eq \"txt\"){print \"----------------------\ -------------COPYRIGHT NOTICE----------------------\ ------------/\\n\";}\n\n\nwhile ()\n {\n s/\\$\ -INSTITUTE/$institute/g;\n s/\\$AUTHOR/$author/g;\\ -n s/\\$DATE/$date/g;\n s/\\$PROGRAM/$program/g; \ - \n s/\\$EMAIL/$email/g; \n if ( $type eq \"txt\ -\"){print $_;}\n elsif ($type eq \"c\"){chop $_; \ -print \"\\/*$_*\\/\\n\";}\n elsif ($type eq \"per\ -l\"){print \"\\#$_\";}\n}\nclose (F);\nif ( $type \ -eq \"c\"){print \"/*******************************\ -**COPYRIGHT NOTICE********************************\ -**/\\n\";}\nif ( $type eq \"perl\"){print \"######\ -###########################COPYRIGHT NOTICE#######\ -##########################/\\n\";}\nif ( $type eq \ -\"txt\"){print \"---------------------------------\ --COPYRIGHT NOTICE---------------------------------\ -/\\n\";}\n\n","\nwhile (<>) \n {\n s/\\=cc/1234567\ -89/g;\n s/\\bcc/\\$\\(CC\\)/g;\n s/123456789/\\=cc\ -/g;\n print $_;\n }\n\n","$version=\"1.00\";\n$rse\ -ed= int(rand(100000))+1;\n\n\nif ( $#ARGV==-1)\n \ -{\n print \"msa2bootstrap -i -o -input -n -o -tmode -d\ -mode -alignpg -rtree -stype -recomput\ -e -system \";\n print \"\\n\\t-i: \ -input file, can be sequneces, msa, matrix, trees, \ -type is specified via -input\";\n print \"\\n\\\ -t-input: Type of input data\";\n print \"\\n\\t\ -\\tmsa: msa in fasta format\";\n print \"\\n\\t\ -\\tseq: compute an msa with -alignpg\";\n print\ - \"\\n\\t\\tmatrix: phylipp distance matrix fed di\ -rectly to method -tmode [caveat: tmode=nj or upgma\ -]\";\n print \"\\n\\t\\ttree: list of newick tr\ -ees directly fed to consence in order to generate \ -a bootstraped tree\";\n \n print \"\\n\\t-n:\ - number of bootstrap replicates\";\n print \"\\\ -n\\t-o: name of the output tree. Files are not ove\ -rwritten. Use -recompute to overwrite existing fil\ -e\";\n print \"\\n\\t-tmode: tree mode: nj|upgm\ -a|parsimony|ml\";\n print \"\\n\\t-dmode: dista\ -nce mode\";\n print \"\\n\\t-alignpg: program f\ -or aligning sequences (t_coffee=default)\";\n p\ -rint \"\\n\\t-rtree: replicate tree file (default:\ - no file)\";\n print \"\\n\\t-rmsa: replicate m\ -sa file (default: no file)\";\n print \"\\n\\t-\ -rmat: replicate matrix file (default: no file)\";\\ -n print \"\\n\\t-stype: sequence type: protein,\ - dna or cdna\";\n print \"\\n\\t-recompute: for\ -ce files to be overwritten\";\n print \"\\n\\t-\ -system: cygwin|unix\";\n \n\n \n &my_ex\ -it (EXIT_FAILURE);\n }\nforeach $arg (@ARGV){$com\ -mand.=\"$arg \";}\n\nprint \"CLINE: $command\\n\";\ -\n$threshold=100;\n$trim_msa=0;\n$stype=\"prot\";\\ -nprint \"msa2bootstrap \";\n\n$system=\"cygwin\";\\ -nif(($command=~/\\-system (\\S+)/))\n {\n $sys\ -tem=$1;\n if ( $system eq \"cygwin\")\n {\\ -n $exec_extension=\".exe\";\n }\n elsif ( \ -$system eq \"unix\")\n {\n $exec_extension=\"\ -\";\n print \"system=Unix\";die;\n }\n els\ -e\n {\n print \"msa2boostrap: -system=$system\ - is an unknown mode [FATAL]\\n\"; die;\n }\n \ - \n print \"-system $system \";\n }\nif(($co\ -mmand=~/\\-stype (\\S+)/))\n {\n $stype=$1;\n \ - }\nprint \"-stype=$stype \";\n\n\n\nif(($command=\ -~/\\-i (\\S+)/))\n {\n $msa=$1;\n print \"-\ -i $msa \";\n }\n\nif(($command=~/\\-rtree (\\S+)/\ -))\n {\n $rtree=$1;\n print \"-rtree=$rtree\ - \";\n }\n\nif(($command=~/\\-rmsa (\\S+)/))\n {\ -\n $rmsa=$1;\n }\nif(($command=~/\\-rmat (\\S+\ -)/))\n {\n $rmat=$1;\n }\n$input=\"seq\";\nif\ -(($command=~/\\-input (\\S+)/))\n {\n $input=$\ -1;\n }\nprint \"-input=$input \";\n\n$dmode=\"kim\ -ura\";\nif(($command=~/\\-dmode (\\S+)/))\n {\n \ - $dmode=$1;\n }\nprint \"-dmode=$dmode \";\n$ali\ -gnpg=\"muscle\";\nif(($command=~/\\-alignpg (\\S+)\ -/))\n {\n $alignpg=$1;\n }\nprint \"-alignpg=\ -$dmode \";\n\n$tmode=\"nj\";\nif(($command=~/\\-tm\ -ode (\\S+)/))\n {\n $tmode=$1;\n }\nprint \"-\ -tmode=$tmode \";\n$recompute=0;\nif(($command=~/\\\ --recompute/))\n {\n $recompute=1;\n print \\ -"-recompute \";\n }\n\n$out=$msa;\n$out=~s/\\..*/\ -/;\n$out.=\".bph\";\nif(($command=~/\\-o (\\S+)/))\ -\n {\n $out=$1;\n \n }\nprint \"-out=$out \ -\";\nif (-e $out && !$recompute)\n {\n print \\ -"\\nNo Computation Required $out already exists\\n\ -\";\n &my_exit (EXIT_SUCCESS);\n \n }\n\n$n\ -=100;\nif(($command=~/\\-n (\\d+)/))\n {\n $n=\ -$1;\n }\nprint \"-n=$n \";\n$seed=3;\nif(($comman\ -d=~/\\-s (\\d+)/))\n {\n $seed=$1;\n }\nprint\ - \"-s=$seed\";\n\nif(($command=~/\\-run_name (\\d+\ -)/))\n {\n $suffix=$1;\n }\nelse\n {\n $m\ -sa=~/([^.]+)/;\n $suffix=$1;\n }\nprint \"-run\ -_name=$suffix\";\n\n\nif ( $input eq \"seq\")\n {\ -\n $seq=$msa;\n $msa=\"$suffix.prot_msa\";\n\ - \n if ($stype eq \"cdna\")\n {\n $cdna\ -_seq=$seq;\n $clean_cdna_seq=&vtmpnam();\n $seq=&v\ -tmpnam();\n `t_coffee -other_pg seq_reformat -in $\ -cdna_seq -action +clean_cdna >$clean_cdna_seq`;\n \ -`t_coffee -other_pg seq_reformat -in $clean_cdna_s\ -eq -action +translate >$seq`;\n \n }\n\n i\ -f (!-e $msa || $recompute)\n {\n print \"\\n#\ -#### Compute an MSA With $alignpg\\n\";\n \n if \ -( $alignpg eq \"t_coffee\")\n {`$alignpg $seq -o\ -utfile=$msa >/dev/null 2>/dev/null`;}\n elsif ( $a\ -lignpg eq \"muscle\")\n {\n `$alignpg -in $s\ -eq > $msa 2>/dev/null`;\n }\n elsif ( $alignpg e\ -q \"clustalw\")\n {\n `$alignpg -infile=$seq\ - -outfile=$msa -quicktree >/dev/null 2>/dev/null`;\ -\n }\n elsif ( $align eq \"mafft\")\n {\n \ -`$alignpg $seq > $msa >/dev/null 2>/dev/null`;\n \ - }\n else\n {\n `$alignpg -in=$seq -outfile=\ -$msa`;\n }\n }\n if (!-e $msa)\n {\\ -n print \"\\nError: $alignpg Could Not produce the\ - MSA $msa [FATAL]\\n\";\n }\n\n if ($stype\ - eq \"cdna\")\n {\n $msa2=\"$suffix.cdna_msa\\ -";\n `t_coffee -other_pg seq_reformat -in $clean_c\ -dna_seq -in2 $msa -action +thread_dna_on_prot_aln \ --output fasta_aln >$msa2`;\n $msa=$msa2;\n }\ -\n \n $input=\"msa\";\n }\n\n\n\n$seqboot_o\ -=&vtmpnam();\n$seqboot_c=&vtmpnam();\n\n$protdist_\ -o=&vtmpnam();\n$protdist_c=&vtmpnam();\nif ( $inpu\ -t eq \"msa\")\n {\n if ($tmode eq \"nj\" || $t\ -mode eq \"upgma\"){$input=\"matrix\";}\n \n \ -$lmsa= &vtmpnam ();\n `t_coffee -other_pg seq_r\ -eformat -in $msa -output phylip_aln > $lmsa`;\n \ - \n if ( -e \"outfile\"){unlink (\"outfile\");}\ -\n # run seqboot\n \n if ( $n>1)\n {\n\ - print \"Run SeqBoot .....\";\n open (F, \">$seqbo\ -ot_c\");\n print F \"$lmsa\\nR\\n$n\\nY\\n$seed\\n\ -\";\n close (F);\n `seqboot$exec_extension < $seq\ -boot_c`;\n if ( -e \"outfile\"){ print \"[OK]\\n\"\ -;}\n else { print \"[FAILED]\\n\";&my_exit (EXIT_F\ -AILURE);}\n `mv outfile $seqboot_o`;\n }\n \ - else\n {\n `cp $lmsa $seqboot_o`;\n }\n\ -\n if ($rmsa){`cp $seqboot_o $rmsa`;}\n \n \ - if ($tmode eq \"nj\" || $tmode eq \"upgma\")\n \ - {\n if ( $stype eq \"prot\")\n {\n # run\ - protdist\n print \"Run Protdist [dmode=$dmode\ -]\";\n if ($dmode eq \"kimura\")\n {\n \ -$dmode=\"P\\nP\\nP\";\n }\n else\n \ - {\n print \"\\n$dmode is an unknown mode for Pro\ -tdist [FATAL:msa2bootstrap.pl]\\n\";\n &my_exit (\ -EXIT_FAILURE);\n }\n open (F, \">$protdi\ -st_c\");\n if ($n>1){print F \"$seqboot_o\\n$d\ -mode\\nM\\nD\\n$n\\nY\\n\";}\n else {printf F \ -\"$seqboot_o\\n$dmode\\nY\\n\";}\n close (F);\\ -n `cp $protdist_c pd_comm`;\n `protdist$ex\ -ec_extension < $protdist_c`;\n if ( -e \"outf\ -ile\"){ print \"[OK]\\n\";}\n else { print \"[\ -FAILED]\\n\";&my_exit (EXIT_FAILURE);}\n `mv o\ -utfile $protdist_o`;\n \n }\n elsif ( $stype eq\ - \"cdna\" || $stype eq \"dna\")\n {\n print \ -\"Run dnadist [dmode=default\";\n open (F, \">\ -$protdist_c\");\n if ($n>1){print F \"$seqboot\ -_o\\nM\\nD\\n$n\\nY\\n\";}\n else {printf F \"\ -$seqboot_o\\nY\\n\";}\n close (F);\n `prot\ -dist$exec_extension < $protdist_c`;\n if ( -e\ - \"outfile\"){ print \"[OK]\\n\";}\n else { pr\ -int \"[FAILED]\\n\";&my_exit (EXIT_FAILURE);}\n \ - `mv outfile $protdist_o`;\n }\n }\n }\ne\ -lsif ( $input eq \"matrix\")\n {\n $protdist_o\ -=&vtmpnam();\n print \"MSA: $msa\\n\";\n `cp\ - $msa $protdist_o`;\n $n=1;\n }\n\n\n\n\n\n$nb\ -_o=&vtmpnam();\n$nb_c=&vtmpnam();\nif ($input eq \\ -"matrix\" && $tmode ne \"parsimony\" && $tmode ne \ -\"ml\")\n {\n print \"Run neighbor [tmode=$tmo\ -de]\";\n\n if ($tmode eq \"nj\")\n {\n $tm\ -ode=\"\\nN\\nN\";\n }\n elsif ( $tmode eq \ -\"upgma\")\n {\n $tmode = \"\\nN\";\n }\\ -n else\n {\n print \"\\n ERROR: $tmode is \ -an unknown tree computation mode\\n\";\n &my_exit \ -(EXIT_FAILURE);\n }\n\n open (F, \">$nb_c\\ -");\n if ($n>1){print F \"$protdist_o$tmode\\nM\ -\\n$n\\n$seed\\nY\\n\";}\n else {print F \"$pro\ -tdist_o$tmode\\nY\\n\";}\n close (F);\n\n `n\ -eighbor$exec_extension < $nb_c`;\n if ( -e \"o\ -uttree\"){ print \"[Neighbor OK]\\n\";}\n else \ -{ print \"[FAILED]\\n\";&my_exit (EXIT_FAILURE);}\\ -n `mv outtree $nb_o`;\n unlink (\"outfile\")\ -;\n }\nelsif ($input eq \"msa\" && $tmode eq \"pa\ -rsimony\")\n {\n if ( -e \"outfile\"){unlink (\ -\"outfile\");}\n if ( -e \"outtree\"){unlink (\\ -"outtree\");}\n \n if ($stype eq \"prot\")\n\ - {\n print \"Run protpars [tmode=$tmode]\";\n\ - open (F, \">$nb_c\");\n if ($n>1){print F \"$seqb\ -oot_o\\nM\\nD\\n$n\\n$seed\\n10\\nY\\n\";}\n else \ -{print F \"$seqboot_o\\nY\\n\";}\n close (F);\n `p\ -rotpars$exec_extension < $nb_c`;\n }\n el\ -sif ( $stype eq \"dna\" || $stype eq \"cdna\")\n \ - {\n print \"Run dnapars [tmode=$tmode]\";\n op\ -en (F, \">$nb_c\");\n if ($n>1){print F \"$seqboot\ -_o\\nM\\nD\\n$n\\n$seed\\n10\\nY\\n\";}\n else {pr\ -int F \"$seqboot_o\\nY\\n\";}\n close (F);\n `dnap\ -ars$exec_extension < $nb_c`;\n }\n if ( -\ -e \"outtree\"){ print \"[OK]\\n\";}\n else { pr\ -int \"[FAILED]\\n\";&my_exit (EXIT_FAILURE);}\n \ - `mv outtree $nb_o`;\n unlink (\"outfile\");\n \ -}\nelsif ($input eq \"msa\" && $tmode eq \"ml\")\n\ - {\n if ( -e \"outfile\"){unlink (\"outfile\")\ -;}\n if ( -e \"outtree\"){unlink (\"outtree\");\ -}\n \n if ($stype eq \"prot\")\n {\n pr\ -int \"Error: ML impossible with Protein Sequences \ -[ERROR]\";\n &my_exit (EXIT_FAILURE);\n }\n \ - elsif ( $stype eq \"dna\" || $stype eq \"cdna\")\ -\n {\n print \"Run dnaml [tmode=$tmode]\";\n \ -open (F, \">$nb_c\");\n if ($n>1){print F \"$seqbo\ -ot_o\\nM\\nD\\n$n\\n$seed\\n10\\nY\\n\";}\n else {\ -print F \"$seqboot_o\\nY\\n\";}\n close (F);\n `dn\ -aml$exec_extension < $nb_c`;\n }\n if ( -\ -e \"outtree\"){ print \"[OK]\\n\";}\n else { pr\ -int \"[FAILED]\\n\";&my_exit (EXIT_FAILURE);}\n \ - `mv outtree $nb_o`;\n unlink (\"outfile\");\n \ -}\n\n\nelse\n {\n `cp $msa $nb_o`;\n $n=2;\\ -n }\n\nif ($rmsa && -e $seqboot_o){print \"\\nOut\ -put List of $n Replicate MSA: $rmsa\\n\";`cp $seqb\ -oot_o $rmsa`;}\nif ($rmat && -e $protdist_o){print\ - \"\\nOutput List of $n Replicate MATRICES: $rmat\\ -\n\";`cp $protdist_o $rmat`;}\nif ($rtree && -e $n\ -b_o){print \"\\nOutput List of $n Replicate TREES:\ - $rtree\\n\";`cp $nb_o $rtree`;}\n\n\n\n$con_o=&vt\ -mpnam();\n$con_c=&vtmpnam();\nif ($n >1)\n {\n \ - print \"Run Consense.....\";\n open (F, \">$co\ -n_c\");\n print F \"$nb_o\\nY\\n\";\n close \ -(F);\n `consense$exec_extension < $con_c`;\n \ - if ( -e \"outfile\"){ print \"[OK]\\n\";}\n e\ -lse { print \"[FAILED]\\n\";&my_exit (EXIT_FAILURE\ -);}\n `mv outtree $con_o`;\n unlink (\"outfi\ -le\");\n }\nelse\n {\n `cp $nb_o $con_o`;\n \ -}\n\n\n`cp $con_o $out`;\nif ( !-e $out)\n {\n \ - print \"Tree Computation failed [FAILED]\\n\";\n \ - &my_exit (EXIT_FAILURE);\n }\nelsif ($n>1)\n \ -{\n print \"\\nOutput Bootstrapped Tree: $out\\\ -n\";\n $avg=`t_coffee -other_pg seq_reformat -i\ -n $out -action +avg_bootstrap`;\n $avg=~s/\\n//\ -g;\n print \"$avg\\n\";\n }\nelse\n {\n pr\ -int \"\\nOutput Tree: $out\\n\";\n }\n\nopen (F, \ -\"$out\");\nwhile ()\n {\n \n $tree.=$_;\ -\n }\nclose (F);\n$tree=~s/\\n//g;\nprint \"BPH: \ -$tree\\n\";\n\n\n&my_exit (EXIT_SUCCESS);\n\nsub m\ -y_exit \n {\n my $m=@_[0];\n &clean_vtmpnam\ -();\n exit ($m);\n }\nsub vtmpnam \n {\n m\ -y $file;\n\n\n $ntmp++;\n $file=\"tmp4msa2bo\ -otstrap.$rseed.$$.$ntmp\";\n \n push (@tmpfi\ -le, $file);\n return $file;\n }\nsub clean_vtm\ -pnam \n {\n my $t;\n foreach $t (@tmpfile)\\ -n {\n if ( -e $t){unlink ($t)};\n }\n }\ -\n","use Env;\n$seq_reformat=\"t_coffee -other_pg \ -seq_reformat \";\n$VersionTag=\"1.00\";\n$step=1;\\ -n$unset=\"\";\n$scoreT1=$scoreT2=$nseqT=$dp_limit=\ -$unset;\n@tl=();\nchomp($tc_version=`t_coffee -ver\ -sion`);$tc_version=~s/PROGRAM: //;\n\n\nprint STDE\ -RR \"\\n******************************************\ -***********************\";\nprint STDERR \"\\n* \ - HIGH LEVEL PROGRAM: T-COFFEE_DPA Version $\ -VersionTag\";\nprint STDERR \"\\n* LOW \ -LEVEL PROGRAM: $tc_version \";\nprint STDERR \"\\n\ -**************************************************\ -***************\";\n\nif (!@ARGV)\n {\n print \ -\"t_coffee_dpa accepts every t_coffee_flag.\\nType\ - t_coffee to obtain a list\\n\";\n print \"Requ\ -ires $TC_VERSION\\n\";\n print \"Requires \";\n\ - print \"t_coffee_dpa specific flags:\\n\";\n \ - print \"\\t-dpa_master_aln....................Ma\ -ster alignment: provided OR computed\\n\";\n pr\ -int \"\\t-dpa_master_aln....................By def\ -ault, Computed with t_coffee -very_fast\\n\";\n \ - print \"\\t-dpa_master_aln=.............Use\ - file, (must be an aln in Fasta or ClustalW\\n\";\\ -n print \"\\t-dpa_master_aln=.........\ -.Compute aln with pg -in seq -out aln`\\n\";\n \ -print \"\\t-dpa_maxnseq.......................Maxi\ -mum number of sequences in subgroups\\n\";\n pr\ -int \"\\t-dpa_min_score1....................Minimu\ -m Id for two sequences to be grouped in ref_aln\\n\ -\";\n print \"\\t-dpa_min_score2...............\ -.....Minimum Id within a subgroup\\n\";\n print\ - \"\\t-dpa_debug.........................Keep Tmp \ -File (for debug purpose)\\n\\n\";\n \n exit \ -(0);\n }\nforeach $arg (@ARGV)\n {\n $arg_lis\ -t.=\" $arg\";\n }\n$arg_list=~s/[=,;]/ /g;\n\n\n(\ -$seq0, $arg_list)=&extract_val_from_arg_list(\"^\"\ -,$arg_list, \"SPLICE\",\"unset\");\n($seq1, $arg_l\ -ist)=&extract_val_from_arg_list(\"-seq\",$arg_list\ -, \"SPLICE\",\"unset\");\n($seq2, $arg_list)=&extr\ -act_val_from_arg_list(\"-in\",$arg_list, \"KEEP\",\ -\"unset\");\n($seq3, $arg_list)=&extract_val_from_\ -arg_list(\"-infile\",$arg_list, \"SPLICE\",\"unset\ -\");\n($prf, $arg_list)=&extract_val_from_arg_lis\ -t(\"-profile\",$arg_list, \"SPLICE\",\"unset\");\n\ -\n$gl{'Seq'}=$seq=&vtmpnam();#file containing all \ -the sequences\n\n #1-remove sequences from -in\n\ -if ( $arg_list =~/\\-in\\b/)\n {\n my $save, $\ -name;\n while($arg_list=~/\\-in\\b[^-]+(\\bS[\\\ -w.]+)/)\n {\n $name=$1;$name=~s/^.//;\n if ( \ -!-e $name){$save.=\" S$name \";}\n\n $arg_list=~s/\ -S$name/ /;\n }\n $arg_list=~s/\\-in\\b/\\-\ -in $save /;\n }\n #2-prepare \n\nif (!($arg_lis\ -t=~/\\-outorder/))\n {\n \n $output_cl .=\"\ - -outorder=$seq\";\n }\n@output_flag=(\"-output\"\ -,\"-outfile\", \"-run_name\", \"-outorder\"); \nfo\ -reach $v1 (@output_flag)\n {\n ($v2, $arg_list\ -)=&extract_val_from_arg_list($v1,$arg_list, \"SPLI\ -CE\",\"unset\");\n if ($v2 ne \"\")\n {\n\\ -n if ($v1 eq \"-run_name\"){$run_name=$v2;$output_\ -cl .=\" $v1 $v2 \";}\n elsif ( $v1 eq \"-outorder\\ -")\n {\n if ( $v2 eq \"input\"){$v2=$seq;}\n\ - $outorder=$v2;$output_cl .=\" $v1 $v2 \";\n \ - }\n else\n {\n $output_cl .=\" $v1 $v2 \";\\ -n }\n }\n }\n\n\n($dpa_master_aln, $arg_lis\ -t) =&extract_val_from_arg_list(\"-dpa_master_aln\\ -",$arg_list, \"SPLICE\", \"t_coffee\");\n$dpa_mast\ -er_aln=~s/\\s//g;\n($nseqT, $arg_list) =\ -&extract_val_from_arg_list(\"-dpa_maxnseq\",$arg_l\ -ist, \"SPLICE\", 30);\n($scoreT1, $arg_list) \ - =&extract_val_from_arg_list(\"-dpa_min_score1\"\ -,$arg_list, \"SPLICE\", 80);\n($scoreT2, $arg_list\ -) =&extract_val_from_arg_list(\"-dpa_min_s\ -core2\" ,$arg_list, \"SPLICE\", 30);\n($dpa_lim\ -it, $arg_list) =&extract_val_from_arg_list(\\ -"-dpa_limit\" ,$arg_list, \"SPLICE\", 0);\n\ -($dpa_delta_id, $arg_list) =&extract_val_from_a\ -rg_list(\"-dpa_delta_id\" ,$arg_list, \"SPL\ -ICE\", 1);\n($dpa_debug, $arg_list) =&extrac\ -t_val_from_arg_list(\"-dpa_debug\" ,$arg\ -_list, \"SPLICE\", 0);\n\n\n$in_seq=$seq0.\" \".$s\ -eq1.\" \".$seq2.\" \".$seq3;\n$in_prf=(($prf ne $u\ -nset)?\"$prf \":\"\");\n&exit_dpa (($in_seq eq \"\\ -" && $in_prf eq \"\")?1:0, \"ERROR: You did not Pr\ -ovide any sequences. Use the -seq flag [FATAL: t_c\ -offee_dpa]\\n\", EXIT_FAILURE);\n\n\nprint STDERR \ -\"\\nSTART DPA COMPUTATION\";\n\n\n\nif ($in_seq=~\ -/\\S+/)\n {\n \n print STDERR \"\\n Step $s\ -tep: Gather all the sequences into the tmp file: [\ -$seq]\";$step++; \n &my_system (\"t_coffee $in_\ -seq -convert -quiet -output fasta_seq -outfile=$se\ -q -maxnseq 0\");\n }\n\nif ( !-e $seq){$seq=\"\";\ -}\n\nif ($in_prf=~/\\S+/)\n {\n $seq_in_type=\\ -"profile\"; \n $seq.= $in_prf; \n }\nif ($seq \ -eq \"\"){ &exit_dpa (1, \"\\nERROR: No Sequence FO\ -und. Provide Sequences with the -seq flag [FATAL: \ -t_coffee_dpa]\", EXIT_FAILURE);}\n\n \n\nif ( $run\ -_name)\n {\n $suffix=$run_name;\n }\nelsif ($\ -in_seq=~/\\b(S[\\w.]+\\b)/)\n {\n my $suffix1,\ - $sufffix2;\n $suffix1=$suffix2=$1;\n $suffi\ -x2=~s/^S//;\n if ( -e $suffix1){$suffix=$suffix\ -1;}\n elsif ( -e $suffix2){$suffix=$suffix2;}\n\ - else\n {\n $suffix=&vtmpnam(); \n }\\ -n $suffix=~s/\\.\\w+//;\n }\n\nelse\n {\n \ -$suffix=&vtmpnam();\n }\n\n\nif (!$run_name){$out\ -put_cl.=\" -run_name $suffix \";}\n\n\n$gl{'Tree'}\ -=&seq2dpa_tree ($seq, \"$suffix.dpadnd\");\n\nprin\ -t STDERR \"\\n Step $step: Prepare guide tree: $gl\ -{'Tree'}\";$step++;\n\nprint STDERR \"\\n Step $st\ -ep: Identify and Align Closely Related Groups\";$s\ -tep++;\n%gl=&make_one_pass (0, $scoreT1,\"Align\",\ -%gl);\n\nprint STDERR \"\\n Step $step: Make Multi\ -ple Group Alignment\";$step++;\nwhile (!%gl ||$gl{\ -'Ng'}>$nseqT)\n {\n %gl=&make_one_pass ($nseqT\ -, $scoreT2,\"t_coffee\",%gl);\n if ( $gl{'Newgr\ -oups'}==0){$scoreT2--;} \n }\nprint STDERR \"\\ -\n Step $step: Make The Final Alignment\";$step++;\ -\n\n\n$arg_list .=$output_cl;\n\n\n%gl=&tree2group\ - (0,0, %gl);\n$gl{$gl{'0'}{'File'}}{'Output'}=\"\"\ -;\n$a=0;\n&align_groups (\"t_coffee\",'0', $arg_li\ -st, \" \", %gl);\n\n\n\nif ( !$dpa_keep_tmpfile){&\ -clean_tmp_file (@tl);}\n\n\n\nsub seq2dpa_tree \n \ - {\n my $seq=@_[0];\n my $newtree=@_[1];\n \ - my $aln=&vtmpnam ();\n\n &my_system (\"t_coff\ -ee -special_mode quickaln -in $seq -outfile $aln -\ -quiet\");\n &my_system (\"$seq_reformat -in $al\ -n -action +aln2tree +tree2dpatree -output newick >\ -$newtree\");\n return $newtree;\n } \nsub seq2\ -dpa_tree_old \n {\n my $aln=@_[0];\n my $ne\ -wtree=@_[1];\n \n \n &my_system(\"$seq_re\ -format -in $aln -action +seq2dpatree -output newic\ -k > $newtree\");\n return $newtree;\n }\nsub a\ -ln2dpa_tree \n {\n my $aln=@_[0];\n my $new\ -tree=&vtmpnam();\n \n &my_system(\"$seq_refo\ -rmat -in $aln -action +aln2tree +tree2dpatree -out\ -put newick > $newtree\");\n return $newtree;\n \ - }\nsub group_file2ngroups\n {\n my $file=@_[0\ -];\n my $n;\n \n open ( F, $file);\n w\ -hile ()\n {\n $n+=/\\>/;\n }\n clo\ -se (F);\n return $n;\n }\n\nsub make_one_pass\\ -n {\n my ($N, $ID,$pg, %gl)=@_;\n my $a;\n\\ -n %gl=&tree2group ($N,$ID,%gl);\n if (!$gl{'\ -Newgroups'}){return %gl;}\n else\n {\n for\ - ( $a=0; $a< $ng; $a++)\n {\n if ($gl{$gl{$a\ -}{'File'}}{'Ng'}>1){&display_group($a, %gl);}\n \ - &align_groups ($pg, $a, $arg_list, \" -quiet=qui\ -et \", %gl);\n }\n return %gl;\n }\n }\n\n\ -sub tree2group \n {\n my ($N, $ID, %gl)=@_;\n \ - my $prefix=&vtmpnam();\n my $group_file=&vtm\ -pnam();\n my $file;\n my $oldtree=&vtmpnam()\ -;\n my $n;\n my $tree;\n\n\n if ( $gl{'Ng\ -'}==1){return %gl;}\n $tree=$gl{'Tree'}; \n \ -\n #1 extract the groups\n &my_system (\"$se\ -q_reformat -in $tree -action +tree2group $N $ID $p\ -refix > $group_file\");\n $n=group_file2ngroups\ -($group_file);\n \n \n $gl{'Newgroups'}=1\ -;\n if ( $n==$gl{'Ng'})\n {\n $gl{'Newgrou\ -ps'}=0;\n return %gl;\n }\n $gl{'Iteration\ -'}++;\n $gl{'MaxNseq'}=$N;$gl{'MinID'}=$ID;\n \ - $gl{'GroupFile'}=$group_file;$gl{'Ng'}=$ng=0;\n \ - #2 Process the group list into the hash\n op\ -en (F, $group_file);\n while ()\n {\n $\ -gl{'File'}.=$_;\n if (/\\>/)\n {\n $line=$_;\ -\n $line=~s/\\>//;\n @list=($line=~/(\\S+)\ -/g);\n $file=$gl{$ng}{'File'}=shift @list;\n \ - $gl{$file}{'Output'}=$file;\n \n $gl{$f\ -ile}{'Ng'}=$#list+1;\n if ($gl{$file}{'Ng'}>1)\ -{ $gl{$file}{'Tlist'}=$gl{$file}{'Alist'}=\"(\";}\\ -n foreach $l (@list)\n {\n \n $gl{$file\ -}{'List'}.=\" $l \";\n \n if (!$gl{$l}{'Tlist'})\ -\n {\n $gl{$l}{'Tlist'}=\"$l\";\n $gl\ -{$l}{'Alist'}=\"$l\";\n $gl{$l}{'Nseq'}=1;\n \ - $gl{$l}{'Ng'}=1;\n }\n $gl{$file}{'Tlist'\ -}.=\"$gl{$l}{'Tlist'},\";\n $gl{$file}{'Alist'}.=\ -\"$gl{$l}{'Tlist'}|\";\n $gl{$file}{'Nseq'}+=$gl{\ -$l}{'Nseq'};\n }\n \n\n chop($gl{$fi\ -le}{'Tlist'});chop($gl{$file}{'Alist'});\n if \ -($gl{$file}{'Ng'}>1){$gl{$file}{'Tlist'}.=\")\"; $\ -gl{$file}{'Alist'}.=\");\";}\n $ng++;\n } \n\ - }\n $gl{'Ng'}=$ng;\n close (F);\n \\ -n #3 Update the old tree with the new groups\n \ - $gl{'Tree'}=&vtmpnam();\n &my_system (\"$seq\ -_reformat -in $tree -action +collapse_tree $group_\ -file -output newick > $gl{'Tree'}\");\n \n r\ -eturn %gl;\n }\n\nsub display_group \n {\n my\ - ($g,%gl)=@_;\n my $f;\n \n if ( $g==-1)\\ -n {\n print STDERR \"\\nIteration $gl{'Iterat\ -ion'} [MaxN=$gl{'MaxNseq'}][MinID=$gl{'MinID'}]\";\ -\n }\n else\n {\n\n $f=$gl{$g}{'File'\ -};\n $action=($gl{$f}{'Ng'}==1 || $gl{'Iteration'}\ -==1)?\"KEEP \":\"ALIGN \";\n print STDERR \ -\"\\n\\t[$action][MaxN=$gl{'MaxNseq'}][MinID=$gl{'\ -MinID'}][File $f][Nseq=$gl{$f}{'Nseq'}][Ngroups=$g\ -l{$f}{'Ng'}][$gl{$f}{'Alist'}]\";\n }\n }\n \ - \n\n\nsub align_groups\n {\n my ($pg, $g,\ - $arg, $extra_arg,%gl)=@_;\n my $f;\n my $Ou\ -tput,$Outflag;\n \n \n $f=$gl{$g}{'File'}\ -;\n $Output=($gl{$f}{'Output'});\n \n if \ -( $pg eq \"Align\")\n {\n if ( !-e $f)\n {\\ -n $command=\"$seq_reformat -in $gl{'Seq'} -ac\ -tion +extract_aln $gl{'GroupFile'}\";\n if ($g\ -l{$f}{'Ng'}>1)\n {\n &my_system ($command);\ -\n $command=\"t_coffee -special_mode quick_aln S\ -$f -outfile=$Output -quiet\";\n }\n }\n el\ -se \n {$command=\"\";}\n }\n elsif ( -e \ -$f)\n { \n $Outflag=($Output)?\"-outfile=$Out\ -put\":\"\";\n $command=\"$pg -infile $f $Outflag -\ -quiet stdout $arg $extra_arg -maxnseq 0 -convert -\ -quiet stdout\";\n }\n elsif ( $gl{$f}{'Ng'\ -}==1)\n {\n $action=($dpa_debug)?\"cp\":\"mv\\ -";\n $command=\"$action $gl{$f}{'List'} $Output\";\ -\n }\n else\n {\n $Outflag=($Output)?\ -\"-outfile=$Output\":\"\";\n $command=\"$pg -profi\ -le $gl{$f}{'List'} $Outflag $arg $extra_arg -maxns\ -eq 0\";\n }\n \n &my_system ($command);\ -\n return $outfile;\n }\n \nsub my_system \\ -n {\n my $command=@_[0];\n my $force=@_[1];\ -\n my $status;\n\n if ( $dpa_debug) {print S\ -TDERR \"\\nCOMMAND: $command\";}\n $status=syst\ -em ($command);\n\n if (!$force)\n {\n &e\ -xit_dpa (($status==1), \"Failed in Command:\\n$com\ -mand\\n[FATAL: t_coffee_dpa]\\n\", EXIT_FAILURE);\\ -n }\n \n return $status;\n }\n\nsub v\ -tmpnam\n {\n my $prefix=@_[0];\n my $tmp_fi\ -le_name;\n\n $tmp_prefix=($prefix)?$prefix:\"dp\ -a_tmp_file_$$\";\n \n $tmp_count++;\n $tmp\ -_file_name=\"$tmp_prefix\".\"$tmp_count\";\n $t\ -l[$#tl+1]=$tmp_file_name;\n return $tmp_file_na\ -me;\n }\n\nsub clean_tmp_file\n {\n\n my $lis\ -t;\n my $file;\n \n if ($dpa_debug){retur\ -n;}\n $list=vtmpnam();\n `ls -1 | grep $tmp_\ -prefix>$list`;\n \n open (F,$list);\n whi\ -le ( )\n {\n $file=$_;\n chop $file;\n if \ -( -e $file){unlink $file;}\n }\n close (F)\ -;\n unlink $list;\n }\n\n\nsub exit_dpa\n {\n\ - my $condition=@_[0];\n my $error_msg=@_[1];\n \ -my $exit_value=@_[2];\n if ( $condition)\n {\n\ - print \"$error_msg\\n\";\n exit ($exit_\ -value);\n }\n else\n {\n return;\n \ -}\n \n}\nsub extract_val_from_arg_list\n {\n \ -my $arg=@_[0];\n my $arg_list=@_[1];\n my $k\ -eep_flag=@_[2];\n my $default_value=@_[3];\n \ - my $val=\"\";\n \n #protect\n $arg_list=\ -~s/\\s-/ \\@/g;\n $arg=~s/-/\\@/g;\n \n #\ -search\n if ($arg eq \"^\")\n {\n $arg_lis\ -t=~/^([^@]*)/;\n $val=$1;\n }\n else\n \ - {$arg_list=~/$arg ([^@]*)/;$val=$1;}\n \n \ -#remove trailing spaces\n $val=~s/\\s*$//;\n \ - \n #remove the parsed sequence if needed\n \ -if (($val ne \"\") && $keep_flag ne \"KEEP\")\n \ - {\n if ( $arg eq \"^\"){$arg_list=~s/$val/ /;}\\ -n else {$arg_list=~s/($arg [^@]*)/ /;}\n }\n \ -\n #unprotect\n $arg_list=~s/\\@/-/g;\n $\ -arg=~s/\\@/-/g;\n \n if (($val eq \"\") && $\ -default_value ne \"unset\"){$val=$default_value;}\\ -n \n return $val, $arg_list;\n }\n$program=\ -\"T-COFFEE (Version_7.90)\";\\n\n","\n$DEBUG=1;\n$\ -dpa_nseq=10;\n$dpa_sim=0;\nif (!@ARGV)\n {\n `\ -t_coffee`;\n exit (0);\n }\nforeach $arg (@ARG\ -V)\n {\n $arg_list.=\" $arg\";\n }\n$max_nseq\ -=10;\n($seq0, $arg_list)=&extract_val_from_arg_lis\ -t(\"^\",$arg_list);\n($seq1, $arg_list)=&extract_v\ -al_from_arg_list(\"-seq\",$arg_list);\n($seq2, $ar\ -g_list)=&extract_val_from_arg_list(\"-in\",$arg_li\ -st, \"KEEP\");\n($seq3, $arg_list)=&extract_val_fr\ -om_arg_list(\"-infile\",$arg_list);\n$in_seq=$seq0\ -.\" \".$seq1.\" \".$seq2.\" \".$seq3;\n\n$seq=vtmp\ -nam();\n`t_coffee $in_seq -convert -output fasta_s\ -eq -outfile=$seq`;\n\n\n($dpa_nseq, $arg_list)=&ex\ -tract_val_from_arg_list(\"-dpa_nseq\",$arg_list);\\ -n($master_aln, $arg_list)=&extract_val_from_arg_li\ -st(\"-master_aln\",$arg_list);\n($sim_matrix, $arg\ -_list)=&extract_val_from_arg_list(\"-sim_matrix\",\ -$arg_list);\n($core_seq, $arg_list)=&extract_val_f\ -rom_arg_list(\"-core_seq\",$arg_list);\n($dpa_sim,\ - $arg_list)=&extract_val_from_arg_list(\"-dpa_sim\\ -",$arg_list);\n($run_name, $arg_list)=&extract_val\ -_from_arg_list(\"-run_name\",$arg_list);\n($output\ -, $arg_list)=&extract_val_from_arg_list(\"-output\\ -",$arg_list);\n\n\n\nif (!$sim_mat && !$master_aln\ -)#Compute the fast alignment\n {\n $ref_aln=vt\ -mpnam();\n `t_coffee -seq=$seq -very_fast -outf\ -ile=$ref_aln -quiet`;\n \n }\n\nif (!$sim_mat)\ -\n {\n $sim_mat=vtmpnam();\n `seq_reformat \ --in $ref_aln -output sim > $sim_mat`;\n }\n\nif (\ - !$core_seq)\n {\n $core_seq=vtmpnam();\n `\ -seq_reformat -in $ref_aln -action +trimTC N$max_ns\ -eq -output fasta_seq > $core_seq`;\n }\n@core_nam\ -e=`seq_reformat -in $core_seq -output name `; \n\n\ -@tot_name=`seq_reformat -in $seq -output name `;\n\ -\nforeach $s (@core_name){$s=~s/\\s//g;$hcore{$s}=\ -1;}\nforeach $s (@tot_name){$s=~s/\\s//g;}\nprint \ -STDERR \"T-Coffee_dpa:\\n\";\nprint STDERR \"\\tTO\ -TAL SEQ: @tot_name\\n\";\nprint STDERR \"\\tCHOSE\ -N SEQ: @core_name\\n\";\n\n\n\nopen (F, $sim_mat);\ -\nwhile ( )\n {\n @l=($_=~/(\\b[\\S]+\\b)/g\ -);\n if (($l[0] eq \"TOP\" || $l[0] eq \"BOT\")\ -)\n {\n $s1=$l[1];$s2=$l[2];$v=$l[3];\n if ($\ -hcore{$s1} && !$hcore{$s2})\n {\n if (!$hseq\ -{$s2}{\"sim\"} || $v>$hseq{$s2}{\"sim\"})\n \ -{\n $hseq{$s2}{\"sim\"}=$v;$hseq{$s2}{\"seq\"}=$s\ -1;\n }\n }\n }\n }\nclose (F);\nfore\ -ach $s (@tot_name)\n {\n\n if ( !$hseq{$s}{\"s\ -eq\"}){;}\n else\n {\n $s2=$hseq{$s}{\"seq\ -\"};\n $v=$hseq{$s}{\"sim\"};\n \n if ($v>$dpa_si\ -m)\n {\n $hseq{$s}{'used'}=1;\n $seq_lis\ -t{$s2}{$seq_list{$s2}{'nseq'}++}=$s;\n }\n \ -}\n }\nforeach $s (@core_name){$seq_list{$s}{$seq\ -_list{$s}{'nseq'}++}=$s;$hseq{$s}{'used'}=1;}\nfor\ -each $s (@tot_name){if (!$hseq{$s}{'used'}){$seq_l\ -ist{'unused'}{$seq_list{'unused'}{'nseq'}++}=$s;}}\ -\n\n\n$n=0;\nforeach $s (@core_name)\n {\n $ng\ -++;\n $n=$seq_list{$s}{'nseq'};\n for (@g_li\ -st=(), $a=0; $a<$n; $a++){@g_list=(@g_list,$seq_li\ -st{$s}{$a});}\n\n $g_seq=vtmpnam();\n $g_aln\ -=vtmpnam();\n \n print STDERR \"Group $ng: $\ -#g_list Seq: @g_list: \";\n \n \n `seq_re\ -format -in $seq -action +lower +keep_name +extract\ -_seq @g_list -output fasta_seq > $g_seq`;\n \n\ - \n if ( $#g_list==0)\n {\n print STDER\ -R \"[No aln]\\n\";\n $g_aln=$g_seq;\n }\n \ -elsif ($#g_list<$max_nseq) \n {\n print STDER\ -R \"[t_coffee]\\n\";\n `t_coffee $g_seq -outfile=$\ -g_aln -quiet $arg_list`;\n }\n else\n \ - {\n print STDERR \"[t_coffee_dpa]\\n\";\n `t_coff\ -ee_dpa2 $g_seq -outfile=$g_aln $arg_list -sim_matr\ -ix $sim_matrix -dpa_nseq $dpa_nseq`;\n }\n \ - @profile_list=(@profile_list, $g_aln);\n }\n\n\n\ -print \"UNUSED $seq_list{'unused'}{'nseq'}\";\n\ni\ -f ($seq_list{'unused'}{'nseq'})\n {\n $prf\ -=vtmpnam();\n \n `t_coffee -profile @pro\ -file_list $arg_list -outfile=$prf -quiet`;\n \ -$n=$seq_list{\"unused\"}{'nseq'};\n $new_seq=\ -vtmpnam();\n $new_prf=vtmpnam();\n for (\ -$a=0; $a<$n-1; $a++)\n {\n $s=$seq_list{\"unused\ -\"}{$a};\n print STDERR \"\\nADD Sequence $s\";\\ -n \n `seq_reformat -in $seq -action +lower +ke\ -ep_name +extract_seq $s -output fasta_seq > $new_\ -seq`;\n `t_coffee -profile $prf $new_seq $arg_li\ -st -outfile=$new_prf`;\n `cp $new_prf $prf`;\n }\ -\n $s=$seq_list{\"unused\"}{$a};\n `seq_\ -reformat -in $seq -action +lower +keep_name +extra\ -ct_seq $s -output fasta_seq > $new_seq`;\n @\ -profile_list=($prf, $new_seq);\n }\n \n \ - \nif ($run_name){$arg_list.=\" -run_name $run_nam\ -e\";}\nelse \n {\n $in_seq=~/([\\w-]+)/;\n \ -$arg_list.=\" -run_name $1\";\n }\nif ( $output){\ -$arg_list.=\" -output $output \";}\n\n`t_coffee -p\ -rofile @profile_list $arg_list`;\n\n\n&clean (@tmp\ -_file_list);\n\n\nsub vtmpnam\n {\n my $tmp_fi\ -le_name;\n $tmp_name_counter++;\n $tmp_file_\ -name=\"tmp_file_$tmp_name_counter\\_Pid$$\";\n \ -$tmp_file_list[$ntmp_file++]=$tmp_file_name;\n \ -return $tmp_file_name;\n }\nsub clean\n {\n my \ -@fl=@_;\n my $file;\n return;\n\n foreach $file\ - ( @fl)\n {\n if ( -e $file){unlink($file)\ -;}\n }\n}\nsub extract_val_from_arg_list\n {\n\ - my $arg=@_[0];\n my $arg_list=@_[1];\n m\ -y $keep_flag=@_[2];\n #protect\n $arg_list=~\ -s/\\s-/ \\@/g;\n $arg=~s/-/\\@/g;\n \n #s\ -earch\n if ($arg eq \"^\")\n {\n $arg_list\ -=~/^([^@]*)/;\n $val=$1;\n }\n else\n \ - {$arg_list=~/$arg ([^@]*)/;$val=$1;}\n \n #\ -remove the parsed sequence if needed\n if ($val\ - && $keep_flag ne \"KEEP\")\n {\n if ( $arg e\ -q \"^\"){$arg_list=~s/$val/ /;}\n else {$arg_list=\ -~s/($arg [^@]*)/ /;}\n }\n \n #unprotect\n\ - $arg_list=~s/\\@/-/g;\n $arg=~s/\\@/-/g;\n \ - \n return $val, $arg_list;\n }\n\n","use En\ -v;\n\n$BLAST_MAX_NRUNS=2;\n$EXIT_SUCCESS=0;\n$EXIT\ -_FAILURE=1;\n\nuse Cwd;\n$REF_EMAIL=\"\";\n\n\n$tm\ -p_dir=\"\";\n$init_dir=\"\";\n$program=\"tc_generi\ -c_method.pl\";\n\n$test=0;\nif ($test==1)\n {\n \ - $SERVER=\"NCBI\";\n $query=$ARGV[0];\n $hi\ -tf=$ARGV[1];\n %s=read_fasta_seq($query);\n \ -@sl=keys(%s);\n &blast_xml2profile (\"xx\", $s{\ -$sl[0]}{seq},$maxid,$minid,$mincov, $hitf);\n m\ -yexit ($EXIT_FAILURE);\n }\n\nforeach $v(@ARGV){$\ -cl.=\"$v \";}\n($mode)=&my_get_opt ( $cl, \"-mode=\ -\",1,0);\n\n($A)=(&my_get_opt ( $cl, \"-name1=\",0\ -,0));\n($B)=(&my_get_opt ( $cl, \"-name2=\",0,0));\ -\n($TMPDIR)=(&my_get_opt ( $cl, \"-tmpdir=\",0,0))\ -;\n($CACHE)=(&my_get_opt ( $cl, \"-cache=\",0,0));\ -\n($SERVER)=((&my_get_opt ( $cl, \"-server=\",0,0)\ -));\n($EMAIL)=((&my_get_opt ( $cl, \"-email=\",0,0\ -)));\n\nif (!$A){$A=\"A\";}\nif (!$B){$B=\"B\";}\n\ -\n\nif (!$TMPDIR)\n {\n $HOME=$ENV{HOME};\n \ - if ($ENV{TMP_4_TCOFFEE}){$TMPDIR=$ENV{TMP_4_TCOFF\ -EE};}\n else{$TMPDIR=\"$HOME/.t_coffee/tmp/\";}\ -\n }\nif ( ! -d $TMPDIR)\n {\n mkdir $TMPDIR;\ -\n }\nif ( ! -d $TMPDIR)\n {\n print \"ERROR:\ - Could not create temporary dir: $TMPDIR\\n\";\n \ - myexit ($EXIT_FAILURE);\n }\n\n$EMAIL=~s/XEMAIL\ -X/\\@/g;\nif (!$EMAIL)\n {\n if ($ENV{EMAIL_4_\ -TCOFFEE}){$EMAIL=$ENV{EMAIL_4_TCOFFEE};}\n elsi\ -f ($ENV{EMAIL}){$EMAIL=$ENV{EMAIL};}\n else {$E\ -MAIL=$REF_EMAIL;}\n }\n\n($maxid,$minid,$mincov)=\ -(&my_get_opt ( $cl, \"-maxid=\",0,0, \"-minid=\",0\ -,0,\"-mincov=\",0,0));\nif (!$cl=~/\\-maxid\\=/){$\ -maxid=95;}\nif (!$cl=~/\\-minid\\=/){$minid=35;}\n\ -if (!$cl=~/\\-mincov\\=/){$mincov=80;}\n\n\n\nif (\ -$mode eq \"seq_msa\")\n {\n &seq2msa($mode,&my\ -_get_opt ( $cl, \"-infile=\",1,1, \"-method=\",1,2\ -, \"-param=\",0,0, \"-outfile=\",1,0));\n }\n\nel\ -sif ( $mode eq \"thread_pair\")\n {\n &seq2thr\ -ead_pair($mode,&my_get_opt ( $cl, \"-infile=\",1,1\ -, \"-pdbfile1=\",1,1, \"-method=\",1,2,\"-param=\"\ -,0,0, \"-outfile=\",1,0, ));\n }\nelsif ( $mode e\ -q \"pdbid_pair\")\n {\n &seq2pdbid_pair($mode,\ -&my_get_opt ( $cl, \"-pdbfile1=\",1,0, \"-pdbfile2\ -=\",1,0, \"-method=\",1,2,\"-param=\",0,0, \"-outf\ -ile=\",1,0, ));\n }\nelsif ( $mode eq \"pdb_pair\\ -")\n {\n &seq2pdb_pair($mode,&my_get_opt ( $cl\ -, \"-pdbfile1=\",1,1, \"-pdbfile2=\",1,1, \"-metho\ -d=\",1,2,\"-param=\",0,0, \"-outfile=\",1,0, ));\n\ - }\nelsif ( $mode eq \"profile_pair\")\n {\n \ - &seq2profile_pair($mode,&my_get_opt ( $cl, \"-pro\ -file1=\",1,1, \"-profile2=\",1,1, \"-method=\",1,2\ -,\"-param=\",0,0, \"-outfile=\",1,0, ));\n }\nels\ -if ( $mode eq \"pdb_template\")\n {\n &blast2p\ -db_template ($mode,&my_get_opt ( $cl, \"-infile=\"\ -,1,1, \"-database=\",1,0, \"-method=\",1,0, \"-out\ -file=\",1,0));\n }\nelsif ( $mode eq \"profile_te\ -mplate\")\n {\n &psiblast2profile_template ($m\ -ode,&my_get_opt ( $cl, \"-infile=\",1,1, \"-databa\ -se=\",1,0, \"-method=\",1,0, \"-outfile=\",1,0));\\ -n }\nelsif ( $mode eq \"psiprofile_template\")\n \ - {\n &psiblast2profile_template ($mode,&my_get_\ -opt ( $cl, \"-infile=\",1,1, \"-database=\",1,0, \\ -"-method=\",1,0, \"-outfile=\",1,0));\n }\nelsif \ -( $mode eq \"RNA_template\")\n {\n &seq2RNA_te\ -mplate ($mode,&my_get_opt ( $cl, \"-infile=\",1,1,\ - \"-outfile=\",1,0));\n }\nelsif ( $mode eq \"tm_\ -template\")\n {\n &seq2tm_template ($mode,&my_\ -get_opt ( $cl, \"-infile=\",1,1,\"-arch=\",1,1,\"-\ -psv=\",1,1, \"-outfile=\",1,0,));\n }\nelsif ( $m\ -ode eq \"psitm_template\")\n {\n &seq2tm_templ\ -ate ($mode,&my_get_opt ( $cl, \"-infile=\",1,1,\"-\ -arch=\",1,1,\"-psv=\",1,1, \"-outfile=\",1,0,));\n\ - }\nelsif ( $mode eq \"ssp_template\")\n {\n \ -&seq2ssp_template ($mode,&my_get_opt ( $cl, \"-inf\ -ile=\",1,1,\"-seq=\",1,1,\"-obs=\",1,1, \"-outfile\ -=\",1,0));\n }\nelsif ( $mode eq \"psissp_templat\ -e\")\n {\n &seq2ssp_template ($mode,&my_get_op\ -t ( $cl, \"-infile=\",1,1,\"-seq=\",1,1,\"-obs=\",\ -1,1, \"-outfile=\",1,0));\n }\nelsif ( $mode eq \\ -"rna_pair\")\n{\n &seq2rna_pair($mode,&my_get_o\ -pt ( $cl, \"-pdbfile1=\",1,1, \"-pdbfile2=\",1,1, \ -\"-method=\",1,2,\"-param=\",0,0, \"-outfile=\",1,\ -0, ));\n}elsif ( $mode eq \"calc_rna_template\")\n\ -{\n &calc_rna_template($mode,&my_get_opt ( $cl,\ - \"-infile=\",1,1,\"-pdbfile=\",1,1, \"-outfile=\"\ -,1,0));\n}\nelse\n {\n print STDERR \"$mode is\ - an unknown mode of tc_generic_method.pl [FATAL]\\\ -n\";\n }\nmyexit ($EXIT_SUCCESS);\nsub seq2ssp_te\ -mplate\n {\n my ($mode, $infile,$gor_seq,$gor_ob\ -s,$outfile)=@_;\n my %s, %h;\n my $result;\n my\ - (@profiles);\n &set_temporary_dir (\"set\",$infi\ -le,\"seq.pep\");\n %s=read_fasta_seq (\"seq.pep\"\ -);\n\n \n open (R, \">result.aln\");\n \n #pri\ -nt stdout \"\\n\";\n foreach $seq (keys(%s))\n \ - {\n \n open (F, \">seqfile\");\n $\ -s{$seq}{seq}=uc$s{$seq}{seq};\n print (F \">$\ -s{$seq}{name}\\n$s{$seq}{seq}\\n\");\n close \ -(F);\n $lib_name=\"$s{$seq}{name}.ssp\";\n \ - $lib_name=&clean_file_name ($lib_name);\n \ -\n if ($mode eq \"ssp_template\"){&seq2gor_pr\ -ediction ($s{$seq}{name},$s{$seq}{seq}, \"seqfile\\ -", $lib_name,$gor_seq, $gor_obs);}\n elsif ($\ -mode eq \"psissp_template\")\n {\n &seq2msa_gor_\ -prediction ($s{$seq}{name},$s{$seq}{seq},\"seqfile\ -\", $lib_name,$gor_seq, $gor_obs);\n }\n \n \ - if ( !-e $lib_name)\n {\n print STDERR (\"GORI\ -V failed to compute the secondary structure of $s{\ -$seq}{name} [FATAL:$mode/$method/$program]\\n\");\\ -n myexit ($EXIT_FAILURE);\n }\n else\n {\n \ - print stdout \"\\tProcess: >$s{$seq}{name} _E_ $\ -lib_name \\n\";\n print R \">$s{$seq}{name} _E_ \ -$lib_name\\n\";\n }\n unshift (@profiles, $li\ -b_name);\n }\n close (R);\n &set_temporary_di\ -r (\"unset\",$mode, $method,\"result.aln\",$outfil\ -e, @profiles);\n}\n\nsub seq2tm_template\n {\n m\ -y ($mode, $infile,$arch,$psv,$outfile)=@_;\n my %\ -s, %h;\n my $result;\n my (@profiles);\n &set_t\ -emporary_dir (\"set\",$infile,\"seq.pep\");\n %s=\ -read_fasta_seq (\"seq.pep\");\n\n \n open (R, \"\ ->result.aln\");\n \n #print stdout \"\\n\";\n f\ -oreach $seq (keys(%s))\n {\n open (F, \">s\ -eqfile\");\n print (F \">$s{$seq}{name}\\n$s{\ -$seq}{seq}\\n\");\n close (F);\n $lib_na\ -me=\"$s{$seq}{name}.tmp\";\n $lib_name=&clean\ -_file_name ($lib_name);\n\n if ($mode eq \"tm\ -_template\")\n {\n &safe_system (\"t_coffee -oth\ -er_pg fasta_seq2hmmtop_fasta.pl -in=seqfile -out=$\ -lib_name -arch=$arch -psv=$psv\");\n }\n elsi\ -f ( $mode eq \"psitm_template\")\n {\n &seq2msa_\ -tm_prediction ($s{$seq}{name},$s{$seq}{seq},\"seqf\ -ile\", $lib_name,$arch, $psv);\n }\n if ( !-e\ - $lib_name)\n {\n print STDERR (\"RNAplfold fail\ -ed to compute the secondary structure of $s{$seq}{\ -name} [FATAL:$mode/$method/$program]\\n\");\n my\ -exit ($EXIT_FAILURE);\n }\n else\n {\n prin\ -t stdout \"\\tProcess: >$s{$seq}{name} _T_ $lib_na\ -me\\n\";\n print R \">$s{$seq}{name} _T_ $lib_na\ -me\\n\";\n }\n unshift (@profiles, $lib_name)\ -;\n }\n close (R);\n &set_temporary_dir (\"un\ -set\",$mode, $method,\"result.aln\",$outfile, @pro\ -files);\n}\n\nsub seq2RNA_template\n {\n my ($mo\ -de, $infile,$outfile)=@_;\n my %s, %h, ;\n my $r\ -esult;\n my (@profiles);\n &set_temporary_dir (\\ -"set\",$infile,\"seq.pep\");\n %s=read_fasta_seq \ -(\"seq.pep\");\n\n \n open (R, \">result.aln\");\ -\n \n #print stdout \"\\n\";\n foreach $seq (ke\ -ys(%s))\n {\n open (F, \">seqfile\");\n \ - print (F \">$s{$seq}{name}\\n$s{$seq}{seq}\\n\"\ -);\n close (F);\n $lib_name=\"$s{$seq}{n\ -ame}.rfold\";\n $lib_name=&clean_file_name ($\ -lib_name);\n &safe_system (\"t_coffee -other_\ -pg RNAplfold2tclib.pl -in=seqfile -out=$lib_name\"\ -);\n \n if ( !-e $lib_name)\n {\n prin\ -t STDERR (\"RNAplfold failed to compute the second\ -ary structure of $s{$seq}{name} [FATAL:$mode/$meth\ -od/$program]\\n\");\n myexit ($EXIT_FAILURE);\n \ -}\n else\n {\n print stdout \"\\tProcess: >\ -$s{$seq}{name} _F_ $lib_name\\n\";\n print R \">\ -$s{$seq}{name} _F_ $lib_name\\n\";\n }\n unsh\ -ift (@profiles, $lib_name);\n }\n close (R);\n\ - &set_temporary_dir (\"unset\",$mode, $method,\"r\ -esult.aln\",$outfile, @profiles);\n}\n\nsub psibla\ -st2profile_template \n {\n my ($mode, $infile, $\ -db, $method, $outfile)=@_;\n my %s, %h, ;\n my (\ -$result,$psiblast_output,$profile_name,@profiles);\ -\n \n &set_temporary_dir (\"set\",$infile,\"seq.\ -pep\");\n %s=read_fasta_seq (\"seq.pep\");\n ope\ -n (R, \">result.aln\");\n \n #print stdout \"\\n\ -\";\n foreach $seq (keys(%s))\n {\n open \ -(F, \">seqfile\");\n print (F \">$A\\n$s{$seq\ -}{seq}\\n\");\n close (F);\n $psiblast_o\ -utput=&run_blast ($s{$seq}{name},$method, $db, \"s\ -eqfile\",\"outfile\");\n if ( -e $psiblast_ou\ -tput)\n {\n %profile=blast_xml2profile($s{$seq}{\ -name}, $s{$seq}{seq},$maxid, $minid,$mincov,$psibl\ -ast_output);\n unlink ($psiblast_output);\n \n\ - $profile_name=\"$s{$seq}{name}.prf\";\n $prof\ -ile_name=&clean_file_name ($profile_name);\n uns\ -hift (@profiles, $profile_name);\n output_profil\ -e ($profile_name, %profile);\n print stdout \"\\\ -tProcess: >$s{$seq}{name} _R_ $profile_name [$prof\ -ile{n} Seq.] [$SERVER/blast/$db][$CACHE_STATUS]\\n\ -\";\n print R \">$s{$seq}{name} _R_ $profile_nam\ -e\\n\";\n }\n }\n close (R);\n &set_temporary\ -_dir (\"unset\",$mode, $method,\"result.aln\",$out\ -file, @profiles);\n}\n\nsub blast2pdb_template \n \ - {\n my ($mode, $infile, $db, $method, $outfile)=\ -@_;\n my %s, %h, ;\n my ($result,$blast_output);\ -\n &set_temporary_dir (\"set\",$infile,\"seq.pep\\ -");\n %s=read_fasta_seq (\"seq.pep\");\n open (R\ -, \">result.aln\");\n \n \n #print stdout \"\\n\\ -";\n foreach $seq (keys(%s))\n {\n open (\ -F, \">seqfile\");\n print (F \">$A\\n$s{$seq}\ -{seq}\\n\");\n close (F);\n \n $bla\ -st_output=&run_blast ($s{$seq}{name},$method, $db,\ - \"seqfile\",\"outfile\");\n %p=blast_xml2pro\ -file($s{$seq}{name}, $s{$seq}{seq},$maxid, $minid,\ -$mincov,$blast_output);\n unlink ($blast_outp\ -ut);\n if ($p{n}>1)\n {\n $pdbid=id2pdbid($\ -p{1}{identifyer});\n if ( length ($pdbid)>5){$pd\ -bid=id2pdbid($p{1}{definition});}\n \n print R\ - \">$s{$seq}{name} _P_ $pdbid\\n\";\n print stdo\ -ut \"\\tProcess: >$s{$seq}{name} _P_ $pdbid [$SERV\ -ER/blast/$db][$CACHE_STATUS]\\n\";\n }\n else\ -\n {\n print R \">$s{$seq}{name}\\n\";\n print\ - stdout \"\\tProcess: >$s{$seq}{name} _P_ No Templ\ -ate Found [$SERVER/blast/$db][$CACHE_STATUS]\\n\";\ -\n }\n }\n close (R);\n &set_temporary_dir (\\ -"unset\",$mode, $method,\"result.aln\",$outfile);\\ -n}\nsub blast_msa\n {\n my ($infile,$outfile)=\ -@_;\n my ($a, %seq);\n %s1=&read_fasta_seq (\ -$infile);\n foreach $s (keys (%s1))\n {\n \ -$i=$s1{$s}{order};\n $s{$i}{name}=$s;\n $s{$i}{seq\ -}=$s1{$s}{seq};\n $s{$i}{len}=length( $s{$i}{seq})\ -;\n $s{n}++;\n }\n `formatdb -i $infile`;\\ -n `blastpgp -i $infile -d $infile -m7 -j4 > io`\ -;\n &set_blast_type (\"io\");\n \n %FB=&x\ -ml2tag_list (\"io\", \"BlastOutput\");\n \n \ -open (F, \">$outfile\");\n print F \"! TC_LIB_F\ -ORMAT_01\\n\";\n print F \"$s{n}\\n\";\n for\ - ( $a=0; $a<$s{n}; $a++)\n {\n print F \"$s{$\ -a}{name} $s{$a}{len} $s{$a}{seq}\\n\";\n }\n \ - for ( $a=0; $a<$FB{n}; $a++)\n {\n %p=blas\ -t_xml2profile ($s{$a}{name}, $s{$a}{seq},100, 0, 0\ -, $FB{$a}{body});\n for ($b=1; $b<$p{n}; $b++)\n \ - {\n my $l=length ($p{$b}{Qseq});\n my $hi\ -t=$p{$b}{definition};\n my $Qstart=$p{$b}{Qsta\ -rt};\n my $Hstart=$p{$b}{Hstart};\n my $id\ -entity=$p{$b}{identity};\n my @lrQ=split (//,$\ -p{$b}{Qseq});\n my @lrH=split (//,$p{$b}{Hseq}\ -);\n my $i= $s1{$s{$a}{name}}{order}+1;\n \ -my $j= $s1{$hit}{order}+1;\n #if ( $j==$i){nex\ -t;}\n printf F \"# %d %d\\n\", $i, $j;\n #\ - print F \"\\n$p{$b}{Qseq} ($Qstart)\\n$p{$b}{Hs\ -eq} ($Hstart)\";\n for ($c=0; $c<$l; $c++)\n \ - {\n my $rQ=$lrQ[$c];\n my $rH=$lrH[$c];\n \ -my $n=0;\n \n if ($rQ ne \"-\"){$n++, $Qstart++;\ -}\n if ($rH ne \"-\"){$n++; $Hstart++;}\n \n if\ - ( $n==2)\n {\n printf F \"\\t%d %d %d\\n\\ -", $Qstart-1, $Hstart-1,$identity;\n }\n \ -}\n }\n }\n print F \"! SEQ_1_TO_N\\n\";\ -\n close (F);\n return $output;\n \n }\n\n\ -sub seq2msa\n {\n my ($mode, $infile, $method,\ - $param, $outfile)=@_;\n &set_temporary_dir (\"\ -set\",$infile,\"seq.pep\");\n $param.=\" >/dev/\ -null 2>&1 \";\n \n #make sure test.pep is in\ - FASTA\n &safe_system (\"t_coffee -other_pg seq\ -_reformat -in seq.pep -output fasta_seq > x\");\n \ - `mv x seq.pep`;\n \n if ( $method eq \"bl\ -astpgp\")\n {\n &blast_msa (\"seq.pep\", \"re\ -sult.aln\");\n }\n elsif ( $method eq \"mu\ -scle\")\n {\n `muscle -in seq.pep -out result\ -.aln $param`;\n }\n elsif ( $method eq \"p\ -robcons\")\n {\n `probcons seq.pep >result.al\ -n 2>/dev/null`;\n }\n elsif ( $method eq \\ -"mafft\")\n {\n `mafft --quiet --localpair --\ -maxiterate 1000 seq.pep> result.aln 2>/dev/null`\\ -n }\n elsif ( $method=~/prank/)\n {\n\ - `$method -d=seq.pep -o=result.aln -quiet 2>/dev/n\ -ull`;\n `mv result.aln.1.fas result.aln`;\n }\ -\n else\n {\n `$method -infile=seq.pep -ou\ -tfile=result.aln`;\n }\n \n &set_tempor\ -ary_dir (\"unset\",$mode, $method,\"result.aln\",$\ -outfile);\n myexit ($EXIT_SUCCESS);\n }\n\nsub\ - seq2thread_pair\n {\n my ($mode, $infile, $pd\ -bfile1, $method, $param, $outfile)=@_;\n &set_t\ -emporary_dir (\"set\",$infile,\"seq.pep\",$pdbfile\ -1,\"struc.pdb\");\n if ($method eq \"fugueali\"\ -)\n {\n #Env Variable that need to be defined\ - for Fugue\n if (!$ENV{FUGUE_LIB_LIST}){$ENV{FUGUE\ -_LIB_LIST}=\"DUMMY\";}\n if (!$ENV{HOMSTRAD_PATH})\ - {$ENV{HOMSTRAD_PATH}=\"DUMMY\";}\n if (!$ENV{HOM\ -S_PATH}){$ENV{HOMS_PATH}=\"DUMMY\";}\n \n `joy str\ -uc.pdb >x 2>x`;\n &check_file(\"struc.tem\", \"Joy\ - failed [FATAL:$program/$method]\");\n `melody -t \ -struc.tem >x 2>x`;\n &check_file(\"struc.tem\", \"\ -Melody failed [FATAL:$program/$method]\");\n `fugu\ -eali -seq seq.pep -prf struc.fug -print > tmp_resu\ -lt.aln`;\n \n &check_file(\"tmp_result.aln\", \"Fu\ -gue failed [FATAL:$program/$method]\");\n &safe_sy\ -stem (\"t_coffee -other_pg seq_reformat -in tmp_re\ -sult.aln -output fasta_aln >result.aln\");\n \ -}\n elsif ( $method eq \"t_coffee\")\n {\n\ - &safe_system (\"t_coffee -in Pstruc.pdb Sseq.pep \ -Mslow_pair -outfile result.aln -quiet\");\n }\ -\n else\n {\n &safe_system (\"$method -inf\ -ile=seq.pep -pdbfile1=struc.pdb -outfile=result.al\ -n $param>x 2>x\");\n }\n &set_temporary_di\ -r (\"unset\",$mode,$method,\"result.aln\",$outfile\ -);\n myexit ($EXIT_SUCCESS);\n }\nsub seq2pdbi\ -d_pair\n {\n my ($mode, $pdbfile1, $pdbfile2, \ -$method, $param, $outfile)=@_;\n my ($name);\n\\ -n \n &set_temporary_dir (\"set\");\n $nam\ -e=$pdbfile1.\" \".$pdbfile2;\n\n if ( &cache\ -_file(\"GET\",\"\",\"$name\",\"$method\",\"dali\",\ -$outfile,\"EBI\"))\n {return $outfile;}\n \ -else\n {\n if ($method eq \"dalilite\")\n {\ -\n $pdbfile1=~/(....)(.)/;\n $id1=$1; $c1=\ -$2;\n \n $pdbfile2=~/(....)(.)/;\n $id\ -2=$1; $c2=$2;\n \n $command=\"t_coffee -ot\ -her_pg dalilite.pl --pdb1 $id1 --chainid1 $c1 --pd\ -b2 $id2 --chainid2 $c2 --email=$EMAIL >dali_stder\ -r 2>dali_stderr\";\n $dali=`$command`;\n \\ -n open (F, \"dali_stderr\");\n while ()\ -\n {\n if ( /JobId: dalilite-(\\S+)/)\n {\\ -n $jobid=$1;\n }\n }\n close (F);\n \ - unlink (\"dali_stderr\");\n \n $output\ -1=\"dalilite-$jobid.txt\";\n if ( -e $output1)\ -\n {\n unlink ($output1);\n &url2file (\"h\ -ttp://www.ebi.ac.uk/Tools/es/cgi-bin/jobresults.cg\ -i/dalilite/dalilite-$jobid/aln.html\", \"output2\"\ -);\n \n if ( -e \"output2\")\n {\n my ($\ -seq1, $seq2);\n $seq1=$seq2=\"\";\n \n \ - open (F, \"output2\");\n while ()\n \ - {\n $l=$_;\n if ( $l=~/Query\\s+(\\S+)/)\\ -n {\n $seq1.=$1;\n }\n elsif ( $l=\ -~/Sbjct\\s+(\\S+)/)\n {\n $seq2.=$1;\n \ - }\n }\n close (F);\n unlink (\\ -"output2\");\n if ($seq1 ne \"\" && $seq2 ne \ -\"\")\n {\n $output3=\">$A\\n$seq1\\n>$B\\ -\n$seq2\\n\";\n $output3=~s/\\./-/g;\n open (F\ -, \">result.aln\");\n print F \"$output3\";\n \ -close (F);\n }\n }\n }\n }\n \ - }\n &cache_file(\"SET\",\"\",\"$name\",\"$met\ -hod\",\"dali\",\"result.aln\",\"EBI\");\n &set_\ -temporary_dir (\"unset\",$mode, $method, \"result.\ -aln\",$outfile);\n myexit ($EXIT_SUCCESS);\n }\ -\nsub seq2pdb_pair\n {\n my ($mode, $pdbfile1,\ - $pdbfile2, $method, $param, $outfile)=@_;\n \n\ - &set_temporary_dir (\"set\",$pdbfile1,\"pdb1.p\ -db\",$pdbfile2,\"pdb2.pdb\");\n if ($method eq \ -\"t_coffee\")\n {\n &safe_system (\"t_coffee \ --in Ppdb1.pdb Ppdb2.pdb -quiet -outfile=result.aln\ -\");\n }\n elsif ( $method eq \"TMalign\")\ -\n {\n if ( &safe_system (\"TMalign pdb1.pdb \ -pdb2.pdb >tmp1\")==$EXIT_SUCCESS)\n {\n `tai\ -l -4 tmp1 > tmp2`;\n \n open (F, \"tmp2\")\ -;\n while ()\n {\n unshift(@l, $_);\\ -n }\n close (F);\n open (F, \">resul\ -t.aln\");\n $l[3]=~s/[^a-zA-Z0-9-]/\\-/g;\n \ - $l[1]=~s/[^a-zA-Z0-9-]/\\-/g;\n print F \">$\ -A\\n$l[3]\\n>$B\\n$l[1]\\n\";\n close (F);\n \ - }\n else\n {\n print \"ERROR: TMalign faile\ -d to align the considered structures[tc_generic_me\ -thod.pl]\\n\";\n `rm result.aln >/dev/null 2>/\ -dev/null`;\n }\n }\n elsif ( $method eq \ -\"mustang\")\n {\n if ( &safe_system (\"musta\ -ng -i pdb1.pdb pdb2.pdb -F fasta >/dev/null 2>/dev\ -/null\")==$EXIT_SUCCESS)\n {\n `mv results.a\ -fasta result.aln`;\n }\n else\n {\n print \ -\"ERROR: mustang failed to align the considered st\ -ructures[tc_generic_method.pl]\\n\";\n `rm res\ -ult.aln >/dev/null 2>/dev/null`;\n }\n }\n \ - else\n {\n if ( &safe_system (\"$method -p\ -dbfile1=pdb1.pep -pdbfile2=pdb2.pdb -outfile=resul\ -t.aln $param>x 2>x\")==$EXIT_SUCCESS)\n {\n \ -`mv results.afasta result.aln`;\n }\n else\n {\ -\n print \"ERROR: $method failed to align the \ -considered structures[tc_generic_method.pl]\\n\";\\ -n `rm result.aln >/dev/null 2>/dev/null`;\n \ -}\n }\n &set_temporary_dir (\"unset\",$mod\ -e, $method, \"result.aln\",$outfile);\n myexit \ -($EXIT_SUCCESS);\n }\n\nsub seq2profile_pair\n {\ -\n my ($mode, $profile1, $profile2, $method, $p\ -aram, $outfile)=@_;\n \n \n if ($method e\ -q \"clustalw\")\n {\n &set_temporary_dir (\"s\ -et\",$profile1,\"prf1.aln\",$profile2,\"prf2.aln\"\ -);\n `clustalw -profile1=prf1.aln -profile2=prf2.a\ -ln -outfile=result.aln`;\n &set_temporary_dir (\"u\ -nset\",$mode, $method, \"result.aln\",$outfile);\n\ - }\n elsif ( $method eq \"hhalign\")\n \ - {\n hhalign ( $profile1,$profile2,$outfile,$para\ -m);\n }\n else\n {\n \n `$method -pro\ -file1=prf1.aln -profile2=prf2.aln -outfile=result.\ -aln $param>x 2>x`;\n }\n \n myexit ($EXI\ -T_SUCCESS);\n }\n\nsub pg_is_installed\n {\n \ -my @ml=@_;\n my $r, $p, $m;\n my $supported=\ -0;\n \n my $p=shift (@ml);\n $r=`which $p\ - 2>/dev/null`;\n if ($r eq \"\"){return 0;}\n \ - else {return 1;}\n }\nsub check_pg_is_installed\ -\n {\n my @ml=@_;\n my $r=&pg_is_installed \ -(@ml);\n if (!$r)\n {\n print STDERR \"\\n\ -Program $p Supported but Not Installed on your sys\ -tem [FATAL:tc_generic_method]\\n\";\n myexit ($EXI\ -T_FAILURE);\n }\n else\n {\n return 1\ -;\n }\n }\nsub set_temporary_dir\n {\n m\ -y @list=@_;\n my $dir_mode, $a, $mode, $method;\ -\n\n $dir_mode=shift (@list);\n\n \n if (\ - $dir_mode eq \"set\")\n {\n $initial_dir=cwd\ -();\n if ( !$tmp_dir)\n {\n srand;\n $ra\ -nd=rand (100000);\n $tmp_dir=\"$TMPDIR/tmp4tco\ -ffee_profile_pair_dir_$$_P_$rand\";\n }\n if ( !\ --d $tm_dir)\n {\n `mkdir $tmp_dir`;\n }\n \ -\n for ( $a=0; $a<=$#list; $a+=2)\n {\n `cp\ - $list[$a] $tmp_dir/$list[$a+1]`;\n }\n chdi\ -r $tmp_dir;\n }\n elsif ( $dir_mode eq \"u\ -nset\")\n {\n $mode=shift (@list);\n $method=\ -shift (@list);\n \n if (!-e $list[0])\n {\n \ -print STDERR (\"Program $method failed to produce \ -$list[1] [FATAL:$mode/$method/$program]\\n\");\n \ - myexit ($EXIT_FAILURE);\n }\n else\n {\n \ - chdir $initial_dir;\n # `t_coffee -other_pg \ -seq_reformat -in $tmp_dir/$list[0] -output fasta_a\ -ln -out $tmp_dir/result2.aln`;\n `cp $tmp_dir/\ -$list[0] $tmp_dir/result2.aln`;\n if ( $list[1\ -] eq \"stdout\")\n {\n open (F, \"$tmp_dir/\ -result2.aln\");\n while (){print $_;}close(F);\ -\n }\n else\n {\n `mv $tmp_dir/re\ -sult2.aln $list[1]`;\n }\n shift (@list)\ -; shift (@list);\n foreach $f (@list)\n \ -{\n `mv $tmp_dir/$f .`;\n }\n }\n }\\ -n }\nsub clean_dir\n {\n my $dir=@_[0];\n \ -if ( !-d $dir){return ;}\n elsif (!($dir=~/tmp/\ -)){return ;}#safety check 1\n elsif (($dir=~/\\\ -*/)){return ;}#safety check 2\n else\n {\n\ - `rm -rf $dir`;\n }\n return;\n }\n\nsub \ -myexit\n {\n my $code=@_[0];\n &clean_dir (\ -$tmp_dir);\n exit ($code);\n }\n\nsub my_get_o\ -pt\n {\n my @list=@_;\n my $cl, $a, $argv, \ -@argl;\n \n @argl=();\n $cl=shift @list;\\ -n for ( $a=0; $a<=$#list; $a+=3)\n {\n $op\ -tion=$list[$a];\n $optional=$list[$a+1];\n $status\ -=$list[$a+2];\n $argv=\"\";\n if ($cl=~/$option(\\\ -S+)/){$argv=$1;}\n @argl=(@argl,$argv);\n \n \n #$\ -optional:0=>optional\n #$optional:1=>must be set\n\ - #$status: 0=>no requirement\n #$status: 1=>must b\ -e an existing file\n #$status: 2=>must be an insta\ -lled package\n \n\n if ($optional==0){;}\n elsif (\ - $optional==1 && $argv eq \"\")\n {\n print \ -STDERR \"ERROR: Option $option must be set [FATAL:\ -$program/$mode/$method]\\n\";\n myexit ($EXIT_\ -FAILURE);\n }\n if ($status==0){;}\n elsif ($sta\ -tus ==1 && $argv ne \"\" && !-e $argv)\n {\n \ - print STDERR \"ERROR: File $argv must exist [FATA\ -L:$program/$mode/$method]\\n\";\n myexit ($EXI\ -T_FAILURE);\n }\n elsif ( $status==2 && $argv ne\ - \"\" && &check_pg_is_installed ($argv)==0)\n {\\ -n print STDERR \"ERROR: $argv is not installed\ - [FATAL:$program/$mode/$method]\\n\";\n myexit\ - ($EXIT_FAILURE);\n }\n }\n\n return @ar\ -gl;\n }\n\nsub check_file \n {\n my ($file,\ - $msg)=@_;\n\n if ( !-e $file)\n {\n print\ - \"\\n$msg\\n\";\n myexit ($EXIT_FAILURE);\n \ -}\n }\nsub hhalign\n {\n my ($aln1, $aln2, \ -$outfile, $param)=@_;\n my $h1, $h2;\n \n \ - $h{0}{index}=0;\n $h{1}{index}=1;\n \n $\ -h{0}{aln}=$aln1;\n $h{1}{aln}=$aln2;\n\n \n\n\ - %{$h{0}}=aln2psi_profile (%{$h{0}});\n %{$h\ -{1}}=aln2psi_profile (%{$h{1}});\n\n $param=~s/\ -#S/ /g;\n $param=~s/#M/\\-/g;\n $param=~s/#E\ -/\\=/g;\n \n\n \n $command=\"hhalign -i $\ -h{0}{a3m} -t $h{1}{a3m} -tc $outfile.tmp -rank 1 -\ -mapt 0 $param\";\n `$command`;\n \n # `hha\ -lign -i $h{0}{a3m} -t $h{1}{a3m} -tc $outfile.tmp \ --rank 1 -mapt 0 -gapf 0.8 -gapg 0.8`;\n \n\n \ - # To run global use the following\n \n open\ - (I, \"$outfile.tmp\");\n open (O, \">$outfile\\ -");\n $h{0}{cons}=s/\\./x/g;\n $h{1}{cons}=s\ -/\\./x/g;\n\n print O \"! TC_LIB_FORMAT_01\\n2\\ -\n$h{0}{name} $h{0}{len} $h{0}{seq}\\n$h{1}{name} \ -$h{1}{len} $h{1}{seq}\\n#1 2\\n\";\n \n whil\ -e ()\n {\n if (/(\\d+)\\s+(\\d+)\\s+(\\d+)\ -/)\n {\n print O \"\\t$h{0}{$1}\\t$h{1}{$2}\\ -\t$3\\n\";\n }\n }\n print O \"! SEQ_1_T\ -O_N\\n\";\n\n close (O);\n close (I);\n }\n\ -\nsub aln2psi_profile\n {\n my (%h)=@_;\n m\ -y ($aln,$i,$hv, $a, @c, $n);\n \n $i=$h{index\ -};\n $aln=$h{aln};\n\n `cp $aln $$.hhh_aln`;\ -\n $command=\"t_coffee -other_pg seq_reformat -\ -in $aln -output hasch\";\n $hv=`$command`;chomp\ - ($hv);\n \n $h{a2m}=\"$tmp/$hv.tmp4hhpred.a\ -2m\";\n $h{a3m}=\"$tmp/$hv.tmp4hhpred.a3m\";\n \ - if ( -e $h{a3m}){;}\n else\n {\n `hhcon\ -sensus -M 50 -i $h{aln} -oa2m $h{a2m}`;\n if (!-e\ - $h{a2m})\n {\n print STDERR \"Program tc_ge\ -neric_method.pl FAILED to run:\\n\\thhconsensus -\ -M 50 -i $h{aln} -oa2m $h{a2m}\";\n myexit ($EX\ -IT_FAILURE);\n }\n \n `hhconsensus -M 50 -i $h{\ -aln} -oa3m $h{a3m}`;\n if (!-e $h{a3m})\n {\n \ - print STDERR \"Program tc_generic_method.pl FAIL\ -ED to run:\\n\\thhconsensus -M 50 -i $h{aln} -oa3\ -m $h{a3m}\";\n myexit ($EXIT_FAILURE);\n }\n\ - `buildali.pl $h{a3m} -n 1`;\n }\n \\ -n \n $h{a2m_seq}=`head -n 2 $h{a2m} | grep -\ -v \">\"`;chomp ($h{a2m_seq});\n $h{a3m_seq}=`he\ -ad -n 2 $h{a3m} | grep -v \">\"`;chomp ($h{a3m_seq\ -});\n $h{cons}=$h{a2m_seq};\n $h{seq}=`head \ --n 2 $h{aln} | grep -v \">\"`;chomp ($h{seq});\n \ - \n \n\n @c=split (//, $h{cons});\n $h{l\ -en}=$#c+1;\n for ($n=0,$a=0, $b=0; $a<$h{len};$\ -a++)\n {\n if ( $c[$a]=~/[A-Z]/)\n {\n \ -$h{++$n}=++$b;\n\n }\n elsif ( $c[$a]=~/[a-z\\.]\ -/)\n {\n ++$b;\n }\n }\n \n $na\ -me=`head -n 2 $h{aln} | grep \">\"`;\n $name=~/\ -\\>(\\S+)/;\n $h{name}=$1;\n \n `cp $h{a2\ -m} $i.a2m`;\n `cp $h{a3m} $i.a3m`;\n `cp $h{\ -aln} $i.hh_aln`;\n \n return %h;\n }\n\nsub\ - read_fasta_seq \n {\n my $f=@_[0];\n my %h\ -seq;\n my (@seq, @com, @name);\n my ($a, $s,\ -$nseq);\n\n open (F, $f);\n while ()\n \ - {\n $s.=$_;\n }\n close (F);\n\n \n \ - @name=($s=~/>(\\S*).*\\n[^>]*/g);\n \n @s\ -eq =($s=~/>.*.*\\n([^>]*)/g);\n @com =($s=~/>\\\ -S*(.*)\\n([^>]*)/g);\n\n \n $nseq=$#name+1;\\ -n \n for ($a=0; $a<$nseq; $a++)\n {\n m\ -y $s;\n my $n=$name[$a];\n $hseq{$n}{name}=$n;\n $\ -seq[$a]=~s/[^A-Za-z]//g;\n $hseq{$n}{order}=$a;\n \ -$hseq{$n}{seq}=$seq[$a];\n $hseq{$n}{com}=$com[$a]\ -;\n \n }\n return %hseq;\n }\n\nsub file_\ -contains \n {\n my ($file, $tag, $max)=(@_);\n\ - my ($n);\n $n=0;\n \n if ( !-e $file \ -&& ($file =~/$tag/)) {return 1;}\n elsif ( !-e \ -$file){return 0;}\n else \n {\n open (FC, \ -\"$file\");\n while ( )\n {\n if ( ($_=~\ -/$tag/))\n {\n close (FC);\n return 1;\n \ - }\n elsif ($max && $n>$max)\n {\n \ -close (FC);\n return 0;\n }\n $n++;\n \ - }\n }\n close (FC);\n return 0;\n }\n\ - \n \nsub file2string\n {\n my $f=@_[0];\ -\n my $string, $l;\n open (F,\"$f\");\n w\ -hile ()\n {\n\n $l=$_;\n #chomp ($l);\n $s\ -tring.=$l;\n }\n close (F);\n $string=~\ -s/\\r\\n//g;\n $string=~s/\\n//g;\n return $\ -string;\n }\n\n\nsub my_get_opt\n {\n my @lis\ -t=@_;\n my $cl, $a, $argv, @argl;\n \n @a\ -rgl=();\n $cl=shift @list;\n for ( $a=0; $a<\ -=$#list; $a+=3)\n {\n $option=$list[$a];\n $o\ -ptional=$list[$a+1];\n $status=$list[$a+2];\n $arg\ -v=\"\";\n if ($cl=~/$option(\\S+)/){$argv=$1;}\n @\ -argl=(@argl,$argv);\n \n \n #$optional:0=>optional\ -\n #$optional:1=>must be set\n #$status: 0=>no req\ -uirement\n #$status: 1=>must be an existing file\n\ - #$status: 2=>must be an installed package\n \n\n \ -if ($optional==0){;}\n elsif ( $optional==1 && $ar\ -gv eq \"\")\n {\n print STDERR \"ERROR: Opti\ -on $option must be set [FATAL:$program/$mode/$meth\ -od]\\n\";\n myexit ($EXIT_FAILURE);\n }\n if\ - ($status==0){;}\n elsif ($status ==1 && $argv ne \ -\"\" && !-e $argv)\n {\n print STDERR \"ERRO\ -R: File $argv must exist [FATAL:$program/$mode/$me\ -thod]\\n\";\n myexit ($EXIT_FAILURE);\n }\n \ -elsif ( $status==2 && $argv ne \"\" && &check_pg_i\ -s_installed ($argv)==0)\n {\n print STDERR \\ -"ERROR: $argv is not installed [FATAL:$program/$mo\ -de/$method]\\n\";\n myexit ($EXIT_FAILURE);\n \ - }\n }\n\n return @argl;\n }\n\nsub ta\ -g2value \n {\n \n my $tag=(@_[0]);\n my \ -$word=(@_[1]);\n my $return;\n \n $tag=~/\ -$word=\"([^\"]+)\"/;\n $return=$1;\n return \ -$return;\n }\n \nsub hit_tag2pdbid\n {\n \ - my $tag=(@_[0]);\n my $pdbid;\n \n $t\ -ag=~/id=\"(\\S+)\"/;\n $pdbid=$1;\n $pdbid=~\ -s/_//;\n return $pdbid;\n }\nsub id2pdbid \n \ -{\n my $in=@_[0];\n my $id;\n \n $in=~\ -/(\\S+)/;\n $id=$in;\n \n if ($id =~/pdb/\ -)\n {\n $id=~/pdb(.*)/;\n $id=$1;\n }\n \ - $id=~s/[|��_]//g;\n return $id;\n }\nsu\ -b set_blast_type \n {\n my $file =@_[0];\n \ -if (&file_contains ($file,\"EBIApplicationResult\"\ -,100)){$BLAST_TYPE=\"EBI\";}\n elsif (&file_con\ -tains ($file,\"NCBI_BlastOutput\",100)) {$BLAST_TY\ -PE=\"NCBI\";}\n else\n {\n $BLAST_TYPE=\"\\ -";\n }\n return $BLAST_TYPE;\n }\nsub bla\ -st_xml2profile \n {\n my ($name,$seq,$maxid, $\ -minid, $mincov, $file)=(@_);\n my (%p, $a, $str\ -ing, $n);\n \n\n\n if ($BLAST_TYPE eq \"EBI\\ -" || &file_contains ($file,\"EBIApplicationResult\\ -",100)){%p=ebi_blast_xml2profile(@_);}\n elsif \ -($BLAST_TYPE eq \"NCBI\" || &file_contains ($file,\ -\"NCBI_BlastOutput\",100)){%p=ncbi_blast_xml2profi\ -le(@_);}\n else \n {\n print \"***********\ -* ERROR: Blast Returned an unknown XML Format ****\ -******************\";\n myexit ($EXIT_FAILURE);\n \ - }\n for ($a=0; $a<$p{n}; $a++)\n {\n \ -my $name=$p{$a}{name};\n $p{$name}{seq}=$p{$a}{seq\ -};\n }\n return %p;\n }\nsub ncbi_blast_x\ -ml2profile \n {\n my ($name,$seq,$maxid, $mini\ -d, $mincov, $string)=(@_);\n my ($L,$l, $a,$b,$\ -c,$d,$nhits,@identifyerL);\n \n \n $seq=~\ -s/[^a-zA-Z]//g;\n $L=length ($seq);\n \n \ -%hit=&xml2tag_list ($string, \"Hit\");\n \n \ -\n for ($nhits=0,$a=0; $a<$hit{n}; $a++)\n \ - {\n my ($ldb,$id, $identity, $expectation, $start\ -, $end, $coverage, $r);\n my (%ID,%DE,%HSP);\n \n \ -$ldb=\"\";\n\n %ID=&xml2tag_list ($hit{$a}{body}, \ -\"Hit_id\");\n $identifyer=$ID{0}{body};\n \n %DE=\ -&xml2tag_list ($hit{$a}{body}, \"Hit_def\");\n $de\ -finition=$DE{0}{body};\n \n %HSP=&xml2tag_list ($h\ -it{$a}{body}, \"Hsp\");\n for ($b=0; $b<$HSP{n}; $\ -b++)\n {\n my (%START,%END,%E,%I,%Q,%M);\n\n\ - \n %START=&xml2tag_list ($HSP{$b}{body}, \"H\ -sp_query-from\");\n %HSTART=&xml2tag_list ($HS\ -P{$b}{body}, \"Hsp_hit-from\");\n \n %LEN=\ - &xml2tag_list ($HSP{$b}{body}, \"Hsp_align-len\"\ -);\n %END= &xml2tag_list ($HSP{$b}{body}, \"H\ -sp_query-to\");\n %HEND= &xml2tag_list ($HSP{\ -$b}{body}, \"Hsp_hit-to\");\n %E=&xml2tag_list\ - ($HSP{$b}{body}, \"Hsp_evalue\");\n %I=&x\ -ml2tag_list ($HSP{$b}{body}, \"Hsp_identity\")\ -;\n %Q=&xml2tag_list ($HSP{$b}{body}, \"Hs\ -p_qseq\");\n %M=&xml2tag_list ($HSP{$b}{bo\ -dy}, \"Hsp_hseq\");\n \n for ($e=0; $e<$Q{\ -n}; $e++)\n\n {\n $qs=$Q{$e}{body};\n $ms=\ -$M{$e}{body};\n \n $expectation=$E{$e}{body};\n \ - $identity=($LEN{$e}{body}==0)?0:$I{$e}{body}/$LEN\ -{$e}{body}*100;\n $start=$START{$e}{body};\n $en\ -d=$END{$e}{body};\n $Hstart=$HSTART{$e}{body};\n \ - $Hend=$HEND{$e}{body};\n \n $coverage=(($end-$st\ -art)*100)/$L;\n\n \n if ($identity>$maxid || $ide\ -ntity<$minid || $coverage<$mincov){next;}\n @lr1=\ -(split (//,$qs));\n @lr2=(split (//,$ms));\n $l=\ -$#lr1+1;\n for ($c=0;$c<$L;$c++){$p[$nhits][$c]=\\ -"-\";}\n for ($d=0,$c=0; $c<$l; $c++)\n {\n \ - $r=$lr1[$c];\n if ( $r=~/[A-Za-z]/)\n \ - {\n \n $p[$nhits][$d + $start-1]=$lr2[$c];\\ -n $d++;\n }\n }\n $Qseq[$nhits]=$qs;\\ -n $Hseq[$nhits]=$ms;\n $QstartL[$nhits]=$start;\\ -n $HstartL[$nhits]=$Hstart;\n $identityL[$nhits]\ -=$identity;\n $endL[$nhits]=$end;\n $definitionL\ -[$nhits]=$definition;\n $identifyerL[$nhits]=$ide\ -ntifyer;\n $comment[$nhits]=\"$ldb|$identifyer [E\ -val=$expectation][id=$identity%][start=$Hstart end\ -=$Hend]\";\n $nhits++;\n }\n }\n }\n\ - \n $profile{n}=0;\n $profile{$profile{n}\ -}{name}=$name;\n $profile{$profile{n}}{seq}=$se\ -q;\n $profile {n}++;\n \n for ($a=0; $a<$\ -nhits; $a++)\n {\n $n=$a+1;\n \n $profile{$n}\ -{name}=\"$name\\_$a\";\n $profile{$n}{seq}=\"\";\n\ - $profile{$n}{Qseq}=$Qseq[$a];\n $profile{$n}{Hseq\ -}=$Hseq[$a];\n $profile{$n}{Qstart}=$QstartL[$a];\\ -n $profile{$n}{Hstart}=$HstartL[$a];\n $profile{$n\ -}{identity}=$identityL[$a];\n $profile{$n}{definit\ -ion}=$definitionL[$a];\n $profile{$n}{identifyer}=\ -$identifyerL[$a];\n $profile{$n}{comment}=$comment\ -[$a];\n for ($b=0; $b<$L; $b++)\n {\n if ($p\ -[$a][$b])\n {\n $profile{$n}{seq}.=$p[$a][$\ -b];\n }\n else\n {\n $profile{$n}\ -{seq}.=\"-\";\n }\n }\n }\n \n \ -$profile{n}=$nhits+1;\n return %profile;\n }\n\ -sub ebi_blast_xml2profile \n {\n my ($name,$se\ -q,$maxid, $minid, $mincov, $string)=(@_);\n my \ -($L,$l, $a,$b,$c,$d,$nhits,@identifyerL,$identifye\ -r);\n \n\n \n $seq=~s/[^a-zA-Z]//g;\n \ -$L=length ($seq);\n %hit=&xml2tag_list ($string\ -, \"hit\");\n \n for ($nhits=0,$a=0; $a<$hit\ -{n}; $a++)\n {\n my ($ldb,$id, $identity, $ex\ -pectation, $start, $end, $coverage, $r);\n my (%Q,\ -%M,%E,%I);\n \n $ldb=&tag2value ($hit{$a}{open}, \\ -"database\");\n $identifyer=&tag2value ($hit{$a}{o\ -pen}, \"id\");\n\n $description=&tag2value ($hit{$\ -a}{open}, \"description\");\n \n %Q=&xml2tag_list \ -($hit{$a}{body}, \"querySeq\");\n %M=&xml2tag_list\ - ($hit{$a}{body}, \"matchSeq\");\n %E=&xml2tag_lis\ -t ($hit{$a}{body}, \"expectation\");\n %I=&xml2tag\ -_list ($hit{$a}{body}, \"identity\");\n \n\n for (\ -$b=0; $b<$Q{n}; $b++)\n {\n\n $qs=$Q{$b}{bod\ -y};\n $ms=$M{$b}{body};\n \n $expectat\ -ion=$E{$b}{body};\n $identity=$I{$b}{body};\n \ - \n \n $start=&tag2value ($Q{$b}{o\ -pen}, \"start\");\n $end=&tag2value ($Q{$b}{op\ -en}, \"end\");\n $startM=&tag2value ($M{$b}{op\ -en}, \"start\");\n $endM=&tag2value ($M{$b}{op\ -en}, \"end\");\n $coverage=(($end-$start)*100)\ -/$L;\n \n # print \"$id: ID: $identity COV:\ - $coverage [$start $end]\\n\";\n \n \n \ - if ($identity>$maxid || $identity<$minid || $cove\ -rage<$mincov){next;}\n # print \"KEEP\\n\";\n\\ -n \n @lr1=(split (//,$qs));\n @lr2=(sp\ -lit (//,$ms));\n $l=$#lr1+1;\n for ($c=0;$\ -c<$L;$c++){$p[$nhits][$c]=\"-\";}\n for ($d=0,\ -$c=0; $c<$l; $c++)\n {\n $r=$lr1[$c];\n if\ - ( $r=~/[A-Za-z]/)\n {\n \n $p[$nhits\ -][$d + $start-1]=$lr2[$c];\n $d++;\n }\n \ - }\n \n \n $identifyerL[$nhits]=$ide\ -ntifyer;\n $comment[$nhits]=\"$ldb|$identifyer\ - [Eval=$expectation][id=$identity%][start=$startM \ -end=$endM]\";\n $nhits++;\n }\n }\n \ -\n $profile{n}=0;\n $profile{$profile{n}}{na\ -me}=$name;\n $profile{$profile{n}}{seq}=$seq;\n\ - $profile {n}++;\n \n for ($a=0; $a<$nhit\ -s; $a++)\n {\n $n=$a+1;\n $profile{$n}{name}=\ -\"$name\\_$a\";\n $profile{$n}{seq}=\"\";\n $profi\ -le{$n}{identifyer}=$identifyerL[$a];\n \n $profile\ -{$n}{comment}=$comment[$a];\n for ($b=0; $b<$L; $b\ -++)\n {\n if ($p[$a][$b])\n {\n $prof\ -ile{$n}{seq}.=$p[$a][$b];\n }\n else\n \ - {\n $profile{$n}{seq}.=\"-\";\n }\n \ -}\n }\n $profile{n}=$nhits+1;\n \n r\ -eturn %profile;\n }\nsub output_profile\n {\n \ - my ($name,%profile)=(@_);\n my ($a);\n open\ - (P, \">$name\");\n for ($a=0; $a<$profile{n}; \ -$a++)\n {\n print P \">$profile{$a}{name} $pr\ -ofile{$a}{comment}\\n$profile{$a}{seq}\\n\";\n \ - }\n close (P);\n return;\n }\nsub blast_x\ -ml2hit_list\n {\n my $string=(@_[0]);\n ret\ -urn &xml2tag_list ($string, \"hit\");\n }\nsub xm\ -l2tag_list \n {\n my ($string_in,$tag)=@_;\n \ - my $tag_in, $tag_out;\n my %tag;\n \n \ -if (-e $string_in)\n {\n $string=&file2string\ - ($string_in);\n }\n else\n {\n $stri\ -ng=$string_in;\n }\n $tag_in1=\"<$tag \";\\ -n $tag_in2=\"<$tag>\";\n $tag_out=\"/$tag>\"\ -;\n $string=~s/>/>##1/g;\n $string=~s//g;\n @l=($string=~/(\\<[^>]+\\>)/g);\n $t\ -ag{n}=0;\n $in=0;$n=-1;\n \n \n\n foreach $\ -t (@l)\n {\n\n $t=~s/<#//;\n $t=~s/#>//;\n \n\ - if ( $t=~/$tag_in1/ || $t=~/$tag_in2/)\n {\n \\ -n $in=1;\n $tag{$tag{n}}{open}=$t;\n $\ -n++;\n \n }\n elsif ($t=~/$tag_out/)\n {\n\ - \n\n $tag{$tag{n}}{close}=$t;\n $tag{\ -n}++;\n $in=0;\n }\n elsif ($in)\n {\n \ -\n $tag{$tag{n}}{body}.=$t;\n }\n }\n \ -\n return %tag;\n }\n\n\nsub seq2gor_predictio\ -n \n {\n my ($name, $seq,$infile, $outfile, $g\ -or_seq, $gor_obs)=(@_);\n my ($l);\n \n `\ -gorIV -prd $infile -seq $gor_seq -obs $gor_obs > g\ -or_tmp`;\n open (GR, \">$outfile\");\n open \ -(OG, \"gor_tmp\");\n\n while ()\n {\n \ -\n $l=$_;\n if ($l=~/\\>/){print GR \"$l\";}\n els\ -if ( $l=~/Predicted Sec. Struct./)\n {\n $l=\ -~s/Predicted Sec. Struct\\.//;\n print GR \"$l\ -\";\n }\n }\n close (GR);\n close (OG\ -);\n return;\n }\nsub seq2msa_tm_prediction \n\ - {\n my ($name, $seq,$infile, $outfile, $arch,\ - $psv)=(@_);\n my (%p,%gseq,%R, $blast_output, \ -%s, $l);\n \n $blast_output=&run_blast ($nam\ -e,\"blastp\", \"uniprot\", $infile, \"outfile\");\\ -n \n \n %p=blast_xml2profile($name,$seq,$\ -maxid, $minid,$mincov,$blast_output);\n \n \\ -n open (F, \">tm_input\");\n for ($a=0; $a<$\ -p{n}; $a++)\n {\n my $s;\n \n $s=$p{$a}{seq};\ -\n $s=uc($s);\n print F \">$p{$a}{name}\\n$s\\n\";\ -\n #print stdout \">$p{$a}{name}\\n$s\\n\";\n \ - }\n close (F);\n print \"\\tPSITM: kept $p\ -{n} Homologues for Sequence $p{0}{name}\\n\";\n \ - &safe_system (\"t_coffee -other_pg fasta_seq2hmmt\ -op_fasta.pl -in=tm_input -out=tm_output -arch=$arc\ -h -psv=$psv\");\n unlink (\"tm_input\");\n %\ -gs=read_fasta_seq(\"tm_output\");\n foreach $s \ -(keys(%gs))\n {\n my (@list, $seq, @plist, @p\ -seq, $L, $PL);\n \n \n #Prediction\n $seq=$gs{$s}{\ -seq};\n $seq=uc($seq);\n $L=length($seq);\n @list=\ -split //, $seq;\n \n #Original Profile Sequence\n \ -$pseq=$p{$s}{seq};\n $pseq=uc($pseq);\n $PL=length\ -($pseq);\n @plist=split //, $pseq;\n \n for ($c=0,\ -$b=0; $b<$PL; $b++)\n {\n my $r=$plist[$b];\\ -n if($r ne \"-\" && $r ne \"X\")\n {\n \ -$r=$plist[$b]=$list[$c++];\n }\n }\n \n if\ - ($c!=$L)\n {\n print \"ERROR: Could Not Thr\ -ead the Prediction Back [FATAL:tc_generic_method.p\ -l]\\n\";\n myexit ($EXIT_FAILURE);\n }\n for\ - ($b=0;$b<$PL; $b++)\n {\n my $r=$plist[$b];\ -\n if ( $r ne \"-\" && $r ne \"X\")\n {\\ -n $R{$b}{$r}++;\n }\n }\n }\n $L=l\ -ength ($p{0}{seq});\n open (R2, \">$outfile\");\ -\n print R2 \">$name\\n\";\n \n for ($a=0\ -; $a<$L; $a++)\n {\n \n my ($v,$v_max,$r,$r_m\ -ax, @rl);\n \n $v=$v_max=0;\n @rl=keys (%{$R{$a}})\ -;\n foreach $r (@rl)\n {\n\n $v=$R{$a}{$r};\\ -n if ($v>=$v_max)\n {\n $v_max=$v;\n $\ -r_max=$r;\n }\n }\n print R2 \"$r_max\";\n\ - }\n print R2 \"\\n\";\n close (R2);\n \ - return;\n }\nsub seq2msa_gor_prediction \n {\\ -n my ($name, $seq,$infile, $outfile, $gor_seq, \ -$gor_obs)=(@_);\n my (%p,%gseq,%R, $blast_outpu\ -t, %s, $l);\n \n \n $blast_output=&run_bl\ -ast ($name,\"blastp\", \"uniprot\", $infile, \"out\ -file\");\n %p=blast_xml2profile($name,$seq,$max\ -id, $minid,$mincov,$blast_output);\n \n open\ - (F, \">gor_input\");\n for ($a=0; $a<$p{n}; $a\ -++)\n {\n my $s;\n \n $s=$p{$a}{seq};\n $s=~s\ -/\\-//g;\n $s=~s/X//g;\n \n $s=uc($s);\n print F \\ -">$p{$a}{name}\\n$s\\n\";\n }\n close (F);\ -\n print \"\\tPSIGOR: kept $p{n} Homologues fo\ -r Sequence $p{0}{name}\\n\";\n \n `gorIV -pr\ -d gor_input -seq $gor_seq -obs $gor_obs > gor_tmp`\ -;\n unlink (\"gor_input\");\n \n open (GR\ -, \">gor_output\");\n open (OG, \"gor_tmp\");\n\ - \n while ()\n {\n \n my $l;\n $l=$\ -_;\n \n if ($l=~/\\>/){print GR \"$l\";}\n elsif (\ - $l=~/Predicted Sec. Struct./)\n {\n $l=~s/P\ -redicted Sec. Struct\\.//;\n print GR \"$l\";\\ -n }\n }\n close (GR);\n close (OG);\n\ - \n\n %gs=read_fasta_seq(\"gor_output\");\n \ - foreach $s (keys(%gs))\n {\n my (@list, $\ -seq, @plist, @pseq, $L, $PL);\n \n \n #Prediction\\ -n $seq=$gs{$s}{seq};\n $seq=uc($seq);\n $L=length(\ -$seq);\n @list=split //, $seq;\n \n #Original Prof\ -ile Sequence\n $pseq=$p{$s}{seq};\n $pseq=uc($pseq\ -);\n $PL=length($pseq);\n @plist=split //, $pseq;\\ -n \n $tseq=\"\";\n for ($c=0,$b=0; $b<$PL; $b++)\n\ - {\n my $r=$plist[$b];\n if($r ne \"-\" \ -&& $r ne \"X\")\n {\n $r=$plist[$b]=$list[$\ -c++];\n }\n $tseq.=$r;\n }\n \n if ($c\ -!=$L)\n {\n print \"ERROR: Could Not Thread \ -the Prediction Back [FATAL:tc_generic_method.pl]\\\ -n\";\n print \"SEQ:$seq\\nPSEQ:$pseq\\nTSEQ:$t\ -seq\";\n \n myexit ($EXIT_FAILURE);\n }\\ -n for ($b=0;$b<$PL; $b++)\n {\n my $r=$plist\ -[$b];\n if ( $r ne \"-\" && $r ne \"X\")\n \ - {\n $R{$b}{$r}++;\n }\n }\n }\n \ -\n $L=length ($p{0}{seq});\n open (R2, \">$o\ -utfile\");\n print R2 \">$name\\n\";\n \n \ - for ($a=0; $a<$L; $a++)\n {\n \n my ($v,$v_m\ -ax,$r,$r_max, @rl);\n \n $v=$v_max=0;\n @rl=keys (\ -%{$R{$a}});\n foreach $r (@rl)\n {\n\n $v=$R\ -{$a}{$r};\n if ($v>=$v_max)\n {\n $v_ma\ -x=$v;\n $r_max=$r;\n }\n }\n print R2 \"$\ -r_max\";\n }\n print R2 \"\\n\";\n clos\ -e (R2);\n return;\n }\n\nsub run_blast\n {\n \ - my ($name, $method, $db,$infile, $outfile, $run\ -)=(@_);\n if (!$run){$run=1;}\n \n \n \ -if (&cache_file(\"GET\",$infile,$name,$method,$db,\ -$outfile,$SERVER)){return $outfile;}\n else\n \ - {\n \n if ( $SERVER eq \"EBI\")\n {\n $c\ -l_method=$method;\n if ($cl_method =~/wu/)\n \ - {\n $cl_method=~s/wu//;\n if ( $cl_method e\ -q \"psiblast\")\n {\n print STDERR \"\\n**\ -*************WARNING: PSI BLAST cannot be used wit\ -h the NCBI BLAST Client. Use server=EBI Or server=\ -LOCAL. blastp will be used instead***********\\n\"\ -;\n $cl_method=\"blastp\";\n }\n \n $com\ -mand=\"t_coffee -other_pg wublast.pl --email $EMAI\ -L $infile -D $db -p $cl_method --outfile $outfile \ --o xml>/dev/null 2>/dev/null\";\n &safe_system ( \ -$command);\n if (-e \"$outfile.xml\") {`mv $outfi\ -le.xml $outfile`;}\n }\n else\n {\\ -n if ($cl_method eq \"psiblast\"){$cl_method =\"b\ -lastp -j5\";}\n \n $command=\"t_coffee -other_pg\ - blastpgp.pl --email $EMAIL $infile -d $db --outfi\ -le $outfile -p $cl_method --mode PSI-Blast>/dev/nu\ -ll 2>/dev/null\";\n &safe_system ( $command);\n \ -\n if (-e \"$outfile.xml\") {`mv $outfile.xml $ou\ -tfile`;}\n }\n }\n elsif ($SERVER eq \"NCB\ -I\")\n {\n if ($db eq \"uniprot\"){$cl_db=\"\ -nr\";}\n else {$cl_db=$db;}\n \n if ( \ -$method eq \"psiblast\")\n {\n print STDERR\ - \"\\n***************WARNING: PSI BLAST cannot be \ -used with the NCBI BLAST Client. Use server=EBI Or\ - server=LOCAL. blastp will be used instead********\ -***\\n\";\n $cl_method=\"blastp\";\n }\n \ - else\n {\n $cl_method=$method;\n }\\ -n $command=\"blastcl3 -p $cl_method -d $cl_db \ --i $infile -o $outfile -m 7\";\n &mysystem ($c\ -ommand);\n }\n elsif ($SERVER =~/CLIENT_(.*)/)\n\ - {\n my $client=$1;\n $command=\"$client\ - -p $method -d $db -i $infile -o $outfile -m 7\";\\ -n &mysystem ($command);\n }\n elsif ( $SERVE\ -R eq \"LOCAL_blastall\")\n {\n if ($method e\ -q \"blastp\")\n {\n $command=\"blastall -d \ -$db -i $infile -o $outfile -m7 -p blastp\";\n \ - }\n &mysystem ($command);\n }\n elsif ( $S\ -ERVER eq \"LOCAL\")\n {\n\n if ($ENV{\"BLAST\ -_DB_DIR\"})\n {\n $x=$ENV{\"BLAST_DB_DIR\"}\ -;\n $cl_db=\"$x$db\";\n }\n else\n \ - {\n $cl_db=$db;\n }\n \n if ($met\ -hod eq \"blastp\")\n {\n $command=\"blastpg\ -p -d $cl_db -i $infile -o $outfile -m7 -j1\";\n \ - }\n elsif ($method eq \"psiblast\")\n \ - {\n $command=\"blastpgp -d $cl_db -i $infile -o\ - $outfile -m7 -j5\";\n }\n elsif ($method e\ -q \"blastn\")\n {\n $command=\"blastall -p blas\ -tn -d $cl_db -i $infile -o $outfile -m7 -W6\";\n \ -} \n &mysystem ($command);\n }\n else\n {\\ -n print (\"*************** ERROR: $SERVER is a\ -n Unknown Server***********\");\n }\n \n if ( !-\ -e $outfile)\n {\n \n if ( $run==$BLAST_M\ -AX_NRUNS)\n {\n print STDERR \"COM: $comman\ -d\\n\";\n print STDERR (\"BLAST failed against $n\ -ame [FATAL:$mode/$method/$program]\\n\");\n if (\ - $SERVER eq \"EBI\" && !($method=~/wu/))\n {\n \ - print STDERR (\"Try WuBlast instead\");\n \ - return run_blast ($name,\"wublastp\", $db,$infil\ -e, $outfile);\n }\n }\n else\n \ -{\n print STDERR \"(Blast for $name failed [$com\ -mand][Attempt $run/$BLAST_MAX_NRUNS] [Try again]\\\ -n\";\n return run_blast ($name, $method, $db,$inf\ -ile, $outfile, $run+1);\n }\n }\n\n &cache\ -_file(\"SET\",$infile,$name,$method,$db,$outfile,$\ -SERVER);\n return $outfile;\n }\n }\nsub mys\ -ystem \n {\n my $command=@_[0];\n my $count\ -=0;\n my $r;\n \n while (($r=&safe_system\ -($command))!=$EXIT_SUCCESS && $count<5)\n {\n\ - print \"\\nCOMMAND $command Failed. Will try agai\ -n\\n\";\n $count++;\n }\n return $r;\n }\\ -nsub cache_file\n {\n my ($cache_mode,$infile,\ -$name,$method,$db, $outfile,$server)=(@_);\n my\ - $cache_file;\n #Protect names so that they can\ - be turned into legal filenames\n $name=&clean_\ -file_name ($name);\n\n if ($db=~/\\//)\n {\ -\n $db=~/([^\\/]+)$/;\n $db=$1;\n }\n $cac\ -he_file_sh=\"$name.$method.$db.$server.tmp\";\n \ - $cache_file=\"$CACHE/$name.$method.$db.$server.tm\ -p\";\n \n if ($infile ne \"\")\n {\n $c\ -ache_file_infile_sh=\"$name.$method.$db.$server.in\ -file.tmp\";\n $cache_file_infile=\"$CACHE/$name.$m\ -ethod.$db.$server.infile.tmp\";\n }\n \n \ - if ($cache_mode eq \"GET\")\n {\n if ($CACH\ -E eq \"\" || $CACHE eq \"no\" || $CACHE eq \"ignor\ -e\" || $CACHE eq \"local\" || $CACHE eq \"update\\ -"){return 0;}\n elsif ( !-d $CACHE)\n {\n pr\ -int STDERR \"ERROR: Cache Dir: $CACHE Does not Exi\ -st\";\n return 0;\n }\n else\n {\n if \ -( -e $cache_file && &fasta_file1_eq_fasta_file2($i\ -nfile,$cache_file_infile)==1)\n {\n `cp $ca\ -che_file $outfile`;\n $CACHE_STATUS=\"READ CACHE\\ -";\n return 1;\n }\n }\n }\n elsi\ -f ($cache_mode eq \"SET\")\n {\n if ($CACHE e\ -q \"\" || $CACHE eq \"no\" || $CACHE eq \"ignore\"\ - || $CACHE eq \"local\" || $CACHE eq \"update\"){\ -return 0;}\n elsif ( !-d $CACHE)\n {\n print\ - STDERR \"ERROR: Cache Dir: $CACHE Does not Exist\\ -";\n return 0;\n }\n elsif (-e $outfile)\n \ - {\n `cp $outfile $cache_file`;\n if ($cac\ -he_file_infile ne \"\"){ `cp $infile $cache_file_i\ -nfile`;}\n\n #functions for updating the cache\ -\n #`t_coffee -other_pg clean_cache.pl -file $\ -cache_file_sh -dir $CACHE`;\n #`t_coffee -othe\ -r_pg clean_cache.pl -file $cache_file_infile_sh -d\ -ir $CACHE`;\n return 1;\n }\n }\n $C\ -ACHE_STATUS=\"COMPUTE CACHE\";\n return 0;\n }\ -\nsub file1_eq_file2\n {\n my ($f1, $f2)=@_;\n\ - if ( $f1 eq \"\"){return 1;}\n elsif ( $f2 \ -eq \"\"){return 1;}\n elsif ( !-e $f1){return 0\ -;}\n elsif ( !-e $f2){return 0;}\n elsif ($f\ -1 eq \"\" || $f2 eq \"\" || `diff $f1 $f2` eq \"\"\ -){return 1;}\n \n return 0;\n }\nsub clean_\ -file_name \n {\n my $name=@_[0];\n \n $n\ -ame=~s/[^A-Za-z1-9.-]/_/g;\n return $name;\n }\ -\nsub url2file\n {\n my ($address, $out)=(@_);\ -\n \n if (&pg_is_installed (\"wget\"))\n {\n\ - return &safe_system (\"wget $address -O$out >/d\ -ev/null 2>/dev/null\");\n }\n elsif (&pg_is_ins\ -talled (\"curl\"))\n {\n return &safe_system \ -(\"curl $address -o$out >/dev/null 2>/dev/null\");\ -\n }\n else\n {\n print stderr \"ERRO\ -R: neither curl nor wget are installed. Imnpossibl\ -e to fectch remote file [FATAL]\\n\";\n exit ($EXI\ -T_FAILURE);\n }\n }\nsub fasta_file1_eq_fast\ -a_file2\n {\n my ($f1, $f2)=@_;\n my (%s1, \ -%s2);\n my @names;\n %s1=read_fasta_seq (%f1\ -);\n %s2=read_fasta_seq (%f2);\n\n @names=(k\ -eys (%s1));\n \n foreach $n (keys(%s1))\n \ - {\n if ($s1{$n}{seq} ne $s2{$n}{seq}){return 0;\ -}\n } \n \n foreach $n (keys(%s2))\n \ - {\n if ($s1{$n}{seq} ne $s2{$n}{seq}){return 0;\ -}\n }\n return 1;\n }\n \nsub safe_system\ - \n{\n my $com=@_[0];\n my $pid;\n my $status;\\ -n if ($com eq \"\"){return 1;}\n\n\n if (($pid =\ - fork ()) < 0){return (-1);}\n if ($pid == 0)\n \ - {\n exec ($com);\n }\n else\n {\n \ -$PIDCHILD=$pid;\n }\n \n waitpid ($pid,WTER\ -MSIG);\n return $?; #contains the status of the e\ -xit\n}\nEND {\n kill ($PIDCHILD);\n}\n\n\nsub rea\ -d_template_file\n{\n my $pdb_templates = @_[0];\n \ -open (TEMP, \"<$pdb_templates\");\n my %temp_h;\n \ -while ()\n{\n $line = $_;\n $line =~/(\\S\ -+)\\s(\\S+)/;\n $temp_h{$1}= $2;\n}\n close(TEMP\ -);\n return %temp_h;\n}\n\nsub calc_rna_template\n\ -{\n my ($mode, $infile, $pdbfile, $outfile)=@_;\n \ -my %s, %h ;\n my $result;\n my (@profiles);\n &set\ -_temporary_dir (\"set\",$infile,\"seq.pep\");\n %s\ -=read_fasta_seq (\"seq.pep\");\n \n %pdb_template_\ -h = &read_template_file($pdbfile);\n my $pdb_chain\ -;\n open (R, \">result.aln\");\n\n\n #print stdout\ - \"\\n\";\n foreach $seq (keys(%s))\n {\n if ($pd\ -b_template_h{$seq} eq \"\")\n {\n next;\n }\n \ - open (F, \">seqfile\");\n print (F \">$s{$seq}{n\ -ame}\\n$s{$seq}{seq}\\n\");\n close (F);\n $pdb_\ -chain = $pdb_template_h{$seq};\n $lib_name=\"$s{$\ -seq}{name}.rfold\";\n $lib_name=&clean_file_name \ -($lib_name);\n safe_system (\"t_coffee -other_pg \ -RNAplfold2tclib.pl -in=seqfile -out=$lib_name\");\\ -n \n safe_system (\"secondary_struc.py seqfile \ -$CACHE$pdb_chain $lib_name\");\n \n if ( !-e $l\ -ib_name)\n {\n print STDERR (\"RNAplfold failed\ - to compute the secondary structure of $s{$seq}{na\ -me} [FATAL:$mode/$method/$program]\\n\");\n myex\ -it ($EXIT_FAILURE);\n }\n else\n {\n print st\ -dout \"\\tProcess: >$s{$seq}{name} _F_ $lib_name\\\ -n\";\n print R \">$s{$seq}{name} _F_ $lib_name\\\ -n\";\n }\n unshift (@profiles, $lib_name);\n }\n\ - close (R);\n &set_temporary_dir (\"unset\",$mode,\ - $method,\"result.aln\",$outfile, @profiles);\n}\n\ -\n\n\nsub seq2rna_pair{\n my ($mode, $pdbfile1, $p\ -dbfile2, $method, $param, $outfile)=@_;\n\n if ($m\ -ethod eq \"runsara.py\")\n {\n open(TMP,\"<$pdbfi\ -le1\");\n my $count = 0;\n my $line;\n while (<\ -TMP>)\n {\n $line = $_;\n if ($count ==1)\n \ - {\n last;\n }\n $count += 1;\n }\n \n my\ - $y = length($line);\n\n $chain1 = substr($line,l\ -ength($line)-3,1);\n close TMP;\n open(TMP,\"<$p\ -dbfile2\");\n my $count = 0;\n while ()\n \ -{\n $line = $_;\n if ($count ==1)\n {\n l\ -ast;\n }\n $count += 1;\n }\n $chain2 = subs\ -tr($line,length($line)-3,1);\n close TMP;\n \n \ -\n system(\"runsara.py $pdbfile1 $chain1 $pdbfile\ -2 $chain2 -s -o tmp >/dev/null 2>/dev/null\");\n \ -open(TMP,\"$outfile\") or die\ - \"cannot open the $outfile file:$!\\n\";\n\n my \ -$switch = 0;\n my $seqNum = 0;\n foreach my $lin\ -e ()\n {\n next unless ($line=~/SARAALI/);\ -\n if ($line=~/>/)\n {\n $switch =0;\n p\ -rint OUT \">seq$seqNum\\n\";\n $seqNum++; \n\ - }\n if ($switch < 2){\n $switch++;\n ne\ -xt;\n }\n \n if ($line =~/REMARK\\s+SARAALI\\s\ -+([^\\*]+)\\*/)\n {\n my $string = $1;\n p\ -rint OUT \"$string\\n\";\n }\n }\n close TMP; \ -\n close OUT;\n }\n}$program=\"T-COFFEE (Version_\ -8.07)\";\\n\n","*TC_METHOD_FORMAT_01\n************\ -******generic_method.tc_method*************\n*\n* \ - Incorporating new methods in T-Coffee\n* \ - Cedric Notredame 26/08/08\n*\n*****************\ -**************************************\n*This file\ - is a method file\n*Copy it and adapt it to your n\ -eed so that the method \n*you want to use can be i\ -ncorporated within T-Coffee\n*********************\ -**********************************\n* \ - USAGE *\n*******\ -************************************************\n\ -*This file is passed to t_coffee via -in:\n*\n* t_\ -coffee -in Mgeneric_method.method\n*\n* The method\ - is passed to the shell using the following\n*call\ -:\n*\n*\n*Conventions:\n* \n*: no_name <=>\ - Replaced with a space\n*:   <=> Replac\ -ed with a space\n*\n******************************\ -*************************\n* ALN_\ -MODE *\n****************\ -***************************************\n*pairwise\ - ->all Vs all (no self )[(n2-n)/2aln]\n*m_pairwi\ -se ->all Vs all (no self)[n^2-n]^2\n*s_pairwise ->\ -all Vs all (self): [n^2-n]/2 + n\n*multiple ->Al\ -l the sequences in one go\n*\nALN_MODE pairwise\n\ -*\n***********************************************\ -********\n* OUT_MODE \ - *\n*********************************\ -**********************\n* mode for the output:\n*E\ -xternal methods: \n* aln -> alignmnent File (Fasta\ - or ClustalW Format)\n* lib-> Lib file (TC_LIB_FOR\ -MAT_01)\n*Internal Methods:\n* fL -> Internal Func\ -tion returning a List (Librairie)\n* fA -> Interna\ -l Function returning an Alignmnent\n*\nOUT_MODE a\ -ln\n**********************************************\ -*********\n* SEQ_TYPE \ - *\n********************************\ -***********************\n*G: Genomic, S: Sequence,\ - P: PDB, R: Profile\n*Examples:\n*SEQTYPE S sequen\ -ces against sequences (default)\n*SEQTYPE S_P sequ\ -ence against structure\n*SEQTYPE P_P structure aga\ -inst structure\n*SEQTYPE PS mix of sequences and s\ -tructure \n*\nSEQ_TYPE S\n*\n\n*******************\ -************************************\n* \ - COMMAND LINE *\n*EXEC\ -UTABLE PARAM1 IN_FLAG OUT_FLAG PARAM *\ -\n************************************************\ -*******\n*****************************************\ -**************\n* EXECUTABLE \ - *\n***************************\ -****************************\n*name of the executa\ -ble\n*passed to the shell: executable\n* \nEXECUTA\ -BLE tc_generic_method.pl\n*\n*********************\ -**********************************\n* \ - IN_FLAG *\n******\ -*************************************************\\ -n*IN_FLAG\n*flag indicating the name of the in com\ -ing sequences\n*IN_FLAG S no_name ->no flag\n*IN_F\ -LAG S &bnsp-in&bnsp -> \" -in \"\n*\nIN_FLAG -inf\ -ile=\n*\n*****************************************\ -**************\n* OUT_FLAG \ - *\n***************************\ -****************************\n*OUT_FLAG\n*flag ind\ -icating the name of the out-coming data\n*same con\ -ventions as IN_FLAG\n*OUT_FLAG S no_name ->no flag\ -\n*if you want to redirect, pass the parameters vi\ -a PARAM1\n*set OUT_FLAG to >\n*\nOUT_FLAG -outfil\ -e=\n*\n*******************************************\ -************\n* PARAM_1 \ - *\n***************************\ -****************************\n*\n*Parameters sent to the EXECUTABLE and speci\ -fied *before* IN_FLAG \n*If there is more than 1 P\ -ARAM line, the lines are\n*concatenated\n*Command_\ -line: @EP@PARAM@-gapopen%e10%s-gapext%e20\n* %s wh\ -ite space\n* %e equal sign\n*\n*PARAM1 \n*\n*\n*\n\ -**************************************************\ -*****\n* PARAM_2 \ - *\n**********************************\ -*********************\n*\n*\ -Parameters sent to the EXECUTABLE and specified \n\ -*after* IN_FLAG and *before* OUT_FLAG\n*If there i\ -s more than 1 PARAM line, the lines are\n*concaten\ -ated\n*\n*PARAM1 \n*\n*\n*************************\ -******************************\n* \ - PARAM *\n***********\ -********************************************\n*\n*Parameters sent to the EXEC\ -UTABLE and specified *after* OUT_FLAG\n*If there i\ -s more than 1 PARAM line, the lines are\n*concaten\ -ated\n*\nPARAM -mode=seq_msa -method=clustalw\nPAR\ -AM -OUTORDER=INPUT -NEWTREE=core -align -gapopen\ -=-15\n*\n*****************************************\ -**************\n* END \ - *\n***************************\ -****************************\n","*TC_METHOD_FORMAT\ -_01\n***************clustalw_method.tc_method*****\ -****\nEXECUTABLE clustalw\nALN_MODE pairwise\nIN_\ -FLAG -INFILE=\nOUT_FLAG -OUTFILE=\nOUT_MODE aln\ -\nPARAM -gapopen=-10\nSEQ_TYPE S\n**************\ -***********************************\n","$VersionTa\ -g = \ - \ - 2.43;\n$SILENT=\\ -" >/dev/null 2>/dev/null\";\n$EXIT_SUCCESS=0;\n$EX\ -IT_FAILURE=1;\n$INTERNET=-1;\n\n\n\n\n\nuse FileHa\ -ndle;\nuse Env qw(HOME);\n\nmy %onelett_prot=&fill\ -_onelett_prot();\nmy %threelett_prot=&fill_threele\ -tt_prot();\nmy %onelett_RNA=&fill_onelett_RNA();\n\ -my %threelett_RNA=&fill_threelett_RNA();\nmy %onel\ -ett_DNA=&fill_onelett_DNA();\nmy %threelett_DNA=&f\ -ill_threelett_DNA();\n\n\n\n\n\nmy %onelett = (\n'\ -P' => \\%onelett_prot,\n'D' => \\%onelett_DNA,\n'R\ -' => \\%onelett_RNA\n);\n\n\nmy %threelett = (\n'P\ -' => \\%threelett_prot,\n'D' => \\%threelett_DNA,\\ -n'R' => \\%threelett_RNA\n);\n\n\n\n\n\n\n\nif($AR\ -GV[0]=~/help/ ||$ARGV[0]=~/man/ || $ARGV[0]=~/HELP\ -/ || $ARGV[0]=~/Man/ || $ARGV[0] eq \"-h\" || $AR\ -GV[0] eq \"-H\" )\n{die \"SYNTAX: extract_from_pd\ -b Version $VersionTag \n Minimum: [ext\ -ract_from_pdb file] \n OR \n [... | ex\ -tract_from_pdb]\n Flags (Default setting on the f\ -irst line)\n -version...................[Return\ -s the Version Number]\n -force..........\ -...........[Forces the file to be treated like a P\ -DB file]\n [R\ -egenerates the header and SEQRES fields]\n \ - -force_name................[Forces the file to \ -be named after name]]\n -infile.....file\ -...........[Flag can be omited]\n \ -[File must be pdb or fro pgm]\n \ - [File can also be compressed Z \ -or gz]\n [In \ -the case of a compressed file, you can omit the gz\ -|Z extension]\n -netfile................\ -...[File will be fetch from the net using wget]\n \ - [wget or curl\ - must be installed]\n \ - [ftp://ftp.gnu.org/pub/gnu/wget/]\n \ - [http://curl.haxx.\ -se/]\n [Must \ -also be used to retrieve the file from a local pdb\ - copy (cf netaddress)]\n -netaddress....\ -............[Address used for the retrieving the n\ -etfile]\n [ht\ -tp://www.rcsb.org/pdb/cgi/export.cgi/%%.pdb.gz?for\ -mat=PDB&pdbId=%%&compression=gz]\n \ - [http://www.expasy.ch/cgi-bi\ -n/get-pdb-entry.pl?%%]\n \ - [local -> will get the file from pdb_d\ -ir (see pdb_dir)]\n -netcompression.....\ -.......[Extension if the netfile comes compressed]\ -\n [gz]\n \ - -pdb_dir...................[address of the \ -repertory where the pdb is installed]\n \ - [Supports standard ftp \ -style installation OR every stru in DIR]\n \ - [Give the ..../pdb/s\ -tructure/ dir]\n \ - [If value omitted, the pg gets it from the env\ - variable PDB_DIR]\n -netcompression_pg.\ -........[gunzip]\n -is_pdb_name........n\ -ame...[Returns 1 if the name is a PDB ID, 0 otherw\ -ise]\n -get_pdb_chains.....name...[Retur\ -ns the list of chains corresponding to the entry]\\ -n -get_pdb_id.........name...[Returns th\ -e PDB id within the provided pdb file]\n \ - -get_fugue_name.....name...[Turns a name into a n\ -ame valid for fugue]\n \ - [Uses the netaddress to do so]\n -cha\ -in......FIRST..........[Extract the first chain on\ -ly]\n A B C..........[Extract Several chai\ -ns if needed]\n ALL............[Extract al\ -l the chains] \n -ligand.....ALL........\ -....[Extract the ligands in the chain (HETATM)]\n \ - ,[Extract All \ -the named lignds]\n -ligand_only...............\ -[Extract only the ligands]\n -ligand_lis\ -t...............[Extract the list of ligands]\n \ - -coor..........[Coordinates of the fr\ -agment to extract]\n [Omit end to \ -include the Cter]\n -num........absolute\ -.......[absolute: relative to the seq] \n \ - file...........[file: relative to fi\ -le]\n -num_out....new............[new: s\ -tart 1->L]\n old............\ -[old: keep the file coordinates]\n -dele\ -te........[Delete from residue start t\ -o residue end]\n -atom.......CA.............[At\ -oms to include, ALL for all of them]\n CA \ -O N.........[Indicate several atoms if needed]\n \ - -code.......3..............[Use the 1 letter cod\ -e or the 3 letters code]\n -mode.......raw.....\ -.......[Output original pdb file]\n \ - pdb............[Output something that look\ -s like pdb]\n fasta..........[Output the s\ -equences in fasta format]\n simple........\ -.[Output a format easy to parse in C ]\n \ - -seq_field..ATOM...........[Field used to extrac\ -t the sequence]\n SEQRES.........[Use the \ -complete sequence]\n -seq......................\ -.[Equivalent to -mode fasta]\n -model......1..\ -............[Chosen Model in an NMR file]\n \ - -nodiagnostic..............[Switches Error Mes\ -sages off]\n -debug.....................\ -[Sets the DEBUG ON]\n -no_remote_pdb_dir\ -.........[Do not look for a remote file]\n \ - -cache_pdb.................[Cache Value, defaul\ -t is $HOME/.t_coffee/cache, other values: NO<=> No\ - cache]\n\n Environement Variables\n \ - These variables can be set from the environement\ -\n Command line values with the correspo\ -nding flag superseed evironement value\n \ - NO_REMOTE_PDB_DIR..........[Prevents the program \ -from searching remote file: faster]\n PD\ -B_DIR....................[Indicates where PDB file\ - must be fetched (localy)]\n\n PROBLEMS: please c\ -ontact cedric.notredame\\@europe.com\\n\";\n exit\ - ($EXIT_SUCCESS);\n}\n\n$np=0;\n$n_para=$#ARGV;\n$\ -model=1;\n$pdb_dir=$ENV{'PDB_DIR'};if ($pdb_dir){$\ -pdb_dir.=\"/\";}\n$debug=$ENV{'DEBUG_EXTRACT_FROM_\ -PDB'};\n\n$no_remote_pdb_dir=$ENV{NO_REMOTE_PDB_DI\ -R};\n$HOME=$ENV{'HOME'};\nif ( $ENV{CACHE_4_TCOFFE\ -E})\n{$cache=$ENV{CACHE_4_TCOFFEE};}\nelse\n{\n \ - $cache=\"$HOME/.t_coffee/cache/\";\n}\n\n \n$ne\ -taddress=\"http://www.rcsb.org/pdb/files/%%.pdb.gz\ -\";\n$netcompression_pg=\"gunzip\";\n$netcompressi\ -on=\"gz\";\n\n foreach ($np=0; $np<=$n_para; $np+\ -+)\n{ \n $value=$ARGV[$np];\n \n i\ -f ($np==0 && !($value=~/^-.*/))\n{ \n $pdb_\ -file= $ARGV[$np];\n}\n elsif ( !($value=~/^-.*/\ -))\n{\n print \"@ARGV\";\n die;\n} \n \n els\ -if ($value eq \"-nodiagnostic\"){$nodiagnostic=1;}\ -\n elsif ($value eq \"-force\")\n{\n $force_pdb\ -=1;\n}\n elsif ($value eq \"-force_name\")\n{\n\ - $force_name=$ARGV[++$np];\n $force_pdb=1;\n}\n \ - \n elsif ($value eq \"-is_pdb_name\")\n{\n $pd\ -b_file= $ARGV[++$np];\n \n $is_pdb_name=1;\n \n} \\ -n elsif ($value eq \"-debug\")\n{\n $debug=1;\n\ -}\n elsif ($value eq \"-get_pdb_chains\")\n{\n \ -$pdb_file= $ARGV[++$np];\n $get_pdb_chains=1;\n}\n\ - elsif ($value eq \"-get_pdb_ligands\")\n{\n $g\ -et_pdb_ligands=1;\n}\n \n elsif ($value eq \\ -"-get_pdb_id\")\n{\n $pdb_file= $ARGV[++$np];\n $g\ -et_pdb_id=1;\n \n}\n \n elsif ( $value eq \"\ --get_fugue_name\")\n{\n $pdb_file= $ARGV[++$np];\n\ - $get_fugue_name=1;\n}\n elsif ( $value eq \"-i\ -nfile\")\n{\n $pdb_file= $ARGV[++$np];\n} \n\ - elsif ($value eq \"-netfile\")\n{\n $netfile=1\ -;\n if ( !($ARGV[$np+1]=~/^-.*/)){$pdb_file= $ARGV\ -[++$np];}\n}\n elsif ( $value eq \"-num\")\n{\\ -n $numbering= $ARGV[++$np];\n}\n elsif ( \ - $value eq \"-num_out\")\n{\n $numbering_out\ -= $ARGV[++$np];\n}\n elsif ( $value eq \"-netad\ -dress\")\n{\n $netadress=$ARGV[++$np];\n}\n \n\ - elsif ( $value eq \"-netcompression\")\n{\n $\ -netcompression=$ARGV[++$np];\n}\n elsif ( $valu\ -e eq \"-pdb_dir\")\n{\n if ( !($ARGV[$np+1]=~/^-.\ -*/)){$pdb_dir= \"$ARGV[++$np]/\";}\n}\n elsif \ -( $value eq \"-no_remote_pdb_dir\")\n{\n $no_remot\ -e_pdb_dir=1;\n if ( !($ARGV[$np+1]=~/^-.*/)){$pdb_\ -dir= \"$ARGV[++$np]/\";}\n}\n elsif ( $value eq\ - \"-cache\")\n{\n $cache=$ARGV[++$np];\n}\n \n \ - elsif ($value eq \"-netcompression_pg\")\n{\n \ - $netcompression_pg=$ARGV[++$np];\n}\n elsif (\ -$value eq \"-mode\")\n{\n $MODE=$ARGV[++$np]\ -;\n}\n\n elsif ( $value eq \"-model\")\n{\n \ - $model= $ARGV[++$np];\n}\n elsif ($value eq \ -\"-seq_field\" )\n{\n $seq_field= $ARGV[++$n\ -p];\n} \n elsif ($value eq \"-coor\" )\n{\n \ - $start= $ARGV[++$np];\n \n if (($ARGV[\ -$np+1] eq \"\") ||($ARGV[$np+1]=~/^-.*/)){$end=\"*\ -\";} \n else {$end= $ARGV[++$np];} \n \ - $coor_set=1;\n}\n elsif ($value eq \"-del\ -ete\" )\n{\n $delete_start= $ARGV[++$np];\n \ - $delete_end= $ARGV[++$np];\n $delete_s\ -et=1;\n}\n elsif ($value eq \"-code\")\n{\n \ - $code= $ARGV[++$np];\n}\n elsif ($value eq\ - \"-no_hetatm\")\n{\n $no_hetatm=1;\n}\n \ -elsif ($value eq \"-chain\")\n{\n while (!($\ -ARGV[$np+1] eq \"\") &&!($ARGV[$np+1]=~/^-.*/))\n{\ -\n ++$np;\n @c_chain=(@chain, $ARGV[$\ -np]);\n $hc_chain{$ARGV[$np]}=$#c_chain+1;\n\ -} \n}\n elsif ($value eq \"-atom\")\n\ -{\n\n while (!($ARGV[$np+1] eq \"\") && !($A\ -RGV[$np+1]=~/^-.*/))\n{\n ++$np;\n $at\ -om[$n_atom++]= $ARGV[$np];\n $atom_list{$AR\ -GV[$np]}=1; \n} \n \n}\n elsif ( $v\ -alue eq \"-unfold\")\n{\n $unfold=1;\n}\n elsif\ - ($value eq \"-seq\" ||$value eq \"-fasta\" )\n{\n\ - $MODE=\"fasta\";\n}\n elsif ( $value eq \ -\"-version\")\n{\n print STDERR \"\\nextract_from\ -_pdb: Version $VersionTag\\n\";\n &myexit ($EXIT_S\ -UCCESS);\n}\n elsif ( $value eq \"-ligand\")\n{\ -\n while (!($ARGV[$np+1] eq \"\") && !($ARGV[$np+1\ -]=~/^-.*/))\n{\n ++$np;\n $ligand=1;\n \ - $ligand_list{$ARGV[$np]}=1; \n} \n $hc_chai\ -n{'LIGAND'}=1;\n}\n elsif ( $value eq \"-ligand\ -_only\")\n{\n $ligand_only=1;\n}\n}\nif ( $debug)\\ -n{\n print STDERR \"\\n[DEBUG:extract_from_pdb]\ - NO_REMOTE_PDB_DIR: $no_remote_pdb_dir\\n\";\n \ -print STDERR \"\\n[DEBUG:extract_from_pdb] PDB_DIR\ -: $pdb_dir\\n\";\n}\n\nif ( $is_pdb_name)\n{\n \ -if (remote_is_pdb_name($pdb_file, $netaddress))\n{\ -\n print \"1\";\n}\n else\n{\n print \"0\";\n}\\ -n exit ($EXIT_SUCCESS);\n}\n \n\nif (!$force\ -_name)\n{\n $pdb_file=~/([^\\/]*)$/;\n $forc\ -e_name=$1;\n}\n\n$local_pdb_file=$pdb_file;\n\nif \ -( $debug){print STDERR \"\\n[DEBUG: extract_from_p\ -db] Scan For $local_pdb_file\\n\";}\n\n$mem=$no_re\ -mote_pdb_dir;\n$no_remote_pdb_dir=1;\n$tmp_pdb_fil\ -e=get_pdb_file ($local_pdb_file);\n\nif ( !-e $tmp\ -_pdb_file || $tmp_pdb_file eq \"\")\n{\n $local\ -_pdb_file=$pdb_file;\n ($local_pdb_file, $suffi\ -x_chain)=&pdb_name2name_and_chain($local_pdb_file)\ -;\n\n if ($local_pdb_file)\n{\n if ( $debug){pr\ -int STDERR \"\\nSplit $pdb_file into $local_pdb_fi\ -le and $suffix_chain \\n\";}\n $tmp_pdb_file=get_p\ -db_file ($local_pdb_file);\n if ( $tmp_pdb_file ne\ - \"\")\n{\n @c_chain=();\n @c_chain=($suff\ -ix_chain);\n %hc_chain=();\n $hc_chain{$su\ -ffix_chain}=1;\n}\n}\n}\n\n$no_remote_pdb_dir=$mem\ -;\nif ($no_remote_pdb_dir==0)\n{\n if ( !-e $tm\ -p_pdb_file || $tmp_pdb_file eq \"\")\n{\n \n $loca\ -l_pdb_file=$pdb_file;\n ($local_pdb_file, $suffix_\ -chain)=&pdb_name2name_and_chain($local_pdb_file);\\ -n if ($local_pdb_file)\n{\n if ( $debug){print\ - STDERR \"\\nSplit $pdb_file into $local_pdb_file \ -and $suffix_chain \\n\";}\n $tmp_pdb_file=get_\ -pdb_file ($local_pdb_file); \n if ( $tmp_pd\ -b_file ne \"\")\n{\n @c_chain=();\n @c_chain=($s\ -uffix_chain);\n %hc_chain=();\n $hc_chain{$suffi\ -x_chain}=1;\n}\n}\n}\n}\n\nif ( $debug){print STDE\ -RR \"\\n$pdb_file copied into ##$tmp_pdb_file##\\n\ -\";}\n\n\nif ( !-e $tmp_pdb_file || $tmp_pdb_file \ -eq \"\")\n{\n if ($is_pdb_name)\n{\n print \"0\ -\\n\"; exit ($EXIT_SUCCESS);\n}\n else\n{\n \n \ - print \"\\nEXTRACT_FROM_PDB: NO RESULT for $pdb_f\ -ile [FATAL:EXTRACT_FROM_PDB Version $VersionTag]\\\ -n\";\n &myexit ($EXIT_FAILURE); \n}\n}\n\n\n\n\ -\n%molecule_type=&pdbfile2chaintype($tmp_pdb_file)\ -;\n\n$pdb_id=&get_pdb_id ($tmp_pdb_file);\n\nif ( \ -$pdb_id eq \"\"){$pdb_id=$force_name;}\n\n@f_chain\ -=&get_chain_list ($tmp_pdb_file);\n\n\n\nif ( $get\ -_pdb_chains)\n{\n print \"@f_chain\\n\";\n &\ -myexit ($EXIT_SUCCESS);\n}\nif ( $get_pdb_ligands)\ -\n{\n %complete_ligand_list=&get_ligand_list ($\ -tmp_pdb_file);\n print $complete_ligand_list{\"\ -result\"};\n &myexit ($EXIT_SUCCESS);\n}\n\nels\ -if ( $get_pdb_id ||$get_fugue_name )\n{\n if \ - (@c_chain && $c_chain[0] eq \"FIRST\"){$pdb_id=$p\ -db_id.$f_chain[0];}\n elsif (@c_chain && $c_cha\ -in[0] ne \" \"){$pdb_id=$pdb_id.$c_chain[0];}\n \ - \n print \"$pdb_id\\n\";\n &myexit ($EXIT_S\ -UCCESS);\n \n}\nelsif ( $is_pdb_name)\n{\n p\ -rintf \"1\\n\";\n &myexit ($EXIT_SUCCESS);\n}\n\ - \n\n$structure_file=vtmpnam();\n\n\n$INFILE=vfop\ -en (\"$tmp_pdb_file\", \"r\"); \n$TMP=vfopen (\"$s\ -tructure_file\", \"w\");\n\n$print_model=1;\n$in_m\ -odel=0;\nwhile ( <$INFILE>)\n{\n $line=$_;\n if \ -($line =~/^MODEL\\s*(\\d*)/)\n{\n if ($1==$model)\\ -n{\n $in_model=1;\n $print_model=1;\n \ -$is_nmr=1;\n}\n elsif ( $in_model==0)\n{\n $pr\ -int_model=0;\n}\n elsif ( $in_model==1)\n{\n l\ -ast;\n}\n}\n if ($print_model){print $TMP $line;}\ - \n\n}\nclose ($TMP);\nclose ($INFILE);\n\n \n\n \ - if ($numbering eq \"\"){$numbering=\"absolute\";}\ -\n if ($numbering_out eq \"\"){$numbering_out=\"n\ -ew\";}\n\n if ( $delete_set && $coor_set) {die \"\ --delete and -coor are mutually exclusive, sorry\\n\ -\";}\n if ( $n_atom==0){$atom_list[$n_atom++]=\"A\ -LL\";$atom_list{$atom_list[0]}=1;}\n if ( $seq_fi\ -eld eq \"\"){$seq_field=\"ATOM\";}\n \n if ( $MO\ -DE eq \"\"){$MODE=\"pdb\";}\n elsif ( $MODE eq \"\ -simple\" && $code==0){$code=1;}\n\n if ( $code==0\ -){$code=3;}\n\n\nif ($f_chain[0] eq \" \"){$hc_cha\ -in{' '}=1;$c_chain[0]=\" \";}\nelsif (!@c_chain){$\ -hc_chain{FIRST}=1;$c_chain[0]=\"FIRST\";}#make sur\ -e the first chain is taken by default\n\nif ($h\ -c_chain{ALL}) \n{\n @c_chain=@f_chain;\n \ - foreach $e (@c_chain){$hc_chain{$e}=1;}\n}\nelsif\ -($hc_chain{FIRST})\n{\n @c_chain=($f_chain[0]);\n \ -$hc_chain{$f_chain[0]}=1;\n}\n\n\n$MAIN_HOM_CODE=&\ -get_main_hom_code ($structure_file);\n$INFILE=vfop\ -en ($structure_file, \"r\");\n\n\nif ( $MODE eq \"\ -raw_pdb\" || $MODE eq \"raw\")\n{\n while (<$IN\ -FILE>)\n{ print \"$_\";}\n close ( $INFILE);\n \ - &myexit($EXIT_SUCCESS);\n} \nif ( $MODE eq \\ -"raw4fugue\" )\n{\n while (<$INFILE>)\n{ \n $l=\ -$_;\n if ($l=~/^SEQRES/)\n{\n \n $c= subst\ -r($l,11,1);\n if ($hc_chain {$c}){print \"$l\"\ -;}\n}\n elsif ( $l=~/^ATOM/)\n{\n $c=substr($l\ -,21,1);\n if ($hc_chain {$c}){print \"$l\";}\n\ -}\n}\n close ( $INFILE);\n &myexit($EXIT_SUC\ -CESS);\n} \n\nif ( $MODE eq \"pdb\")\n{\n\n \ -$read_header=0;\n while (<$INFILE>) \n{\n $\ -line=$_;\n if ($line =~ /^HEADER/){print \"\ -$line\";$read_header=1;}\n}\n close ($INFILE);\\ -n\n if (!$read_header)\n{\n print \"HEADER U\ -NKNOWN 00-JAN-00 \ - $force_name\\n\";\n}\n\n $INFILE=vfopen ($stru\ -cture_file, \"r\");\n \n print \"COMPND 1 \ -CHAIN:\";\n $last=pop(@c_chain);\n foreach $\ -c ( @c_chain){ print \" $c,\";}\n if ( $last eq\ - \" \"){print \" NULL;\\n\";}\n else \n{\n \ - print \" $last;\\n\";\n}\n @c_chain=(@c_chain,\ - $last);\n \n print \"REMARK Output of the p\ -rogram extract_from_pdb (Version $VersionTag)\\n\"\ -;\n print \"REMARK Legal PDB format not Guarant\ -eed\\n\";\n print \"REMARK This format is not m\ -eant to be used in place of the PDB format\\n\";\n\ - print \"REMARK The header refers to the origin\ -al entry\\n\";\n print \"REMARK The sequence fr\ -om the original file has been taken in the field: \ -$seq_field\\n\";\n print \"REMARK extract_from_\ -pdb, 2001, 2002, 2003, 2004, 2005 2006 (c) CNRS an\ -d Cedric Notredame\\n\"; \n if ( $coor_set)\n\ -{\n print \"REMARK Partial chain: Start $sta\ -rt End $end\\n\";\n}\n if ( $is_nmr)\n{\n \ - print \"REMARK NMR structure: MODEL $model\\n\";\\ -n}\n \n if ( $n_atom!=0)\n{\n print \"R\ -EMARK Contains Coordinates of: \";\n foreach\ - $a (@atom){print \"$a \";}\n print \"\\n\";\ -\n} \n}\n\n\n\n\nmy $residue_index = -999;\nmy $o\ -ld_c = \"TemporaryChain\";\n\nwhile (<$INFILE>) \n\ -{\n $line=$_;\n\n\n if ($line =~ /^SEQRES/)\n{\n\n\ - @field=/(\\S*)\\s*/g;\n \n \n $c= substr($_,11,\ -1);\n\n \n $l=$#field;\n for ($a=4; $a<$#field \ -;)\n{\n if (!$onelett{$molecule_type{$c}}->{$fie\ -ld[$a]})\n{\n splice @field, $a, 1;\n}\n else\ - \n{\n $a++;\n}\n}\n \n if ( $c ne $in_chain)\\ -n{\n $pdb_chain_list[$n_pdb_chains]=$c;\n $pdb\ -_chain_len [$n_pdb_chains]=$len;\n $in_chain=$c;\ -\n $n_pdb_chains++;\n}\n \n for ( $a=4; $a<$#fi\ -eld;$a++)\n{\n @{$complete_seq{$c}}->[$complete_\ -seq_len{$c}++]=$field[$a];\n}\n}\n elsif ( $lin\ -e=~/^ATOM/ || ($line=~/^HETATM/ && &is_aa(substr($\ -line,17,3),substr($line,21,1)) && !$no_hetatm))\n{\ -\n\n \n $RAW_AT_ID=$AT_ID=substr($line,12,4);\\ -n $RES_ID=&is_aa(substr($line,17,3),substr($line,2\ -1,1));\n $CHAIN=substr($line,21,1);\n\n $RES_NO\ -=substr($line,22,4);\n $HOM_CODE=substr ($line, 26\ -, 1);\n $TEMP=substr($line,60,6);\n \n $TEMP=~s/\\\ -s//g;\n $AT_ID=~s/\\s//g;\n $RES_ID=~s/\\s/\ -/g;\n $RES_NO=~s/\\s//g;\n \n if ( $HOM_CO\ -DE ne $MAIN_HOM_CODE){next;}\n elsif ( $already_re\ -ad2{$CHAIN}{$RES_ID}{$AT_ID}{$RES_NO}){next;}\n el\ -se{$already_read2{$CHAIN}{$RES_ID}{$AT_ID}{$RES_NO\ -}=1;}\n \n \n if ($coor_set && $numbering eq \"fil\ -e\" && $residue_index ne $RES_NO)\n{\n \n \ -if ( $RES_NO<=$start){$real_start{$CHAIN}++;}\n \ - if ( $RES_NO<=$end){$real_end{$CHAIN}++;}\n}\n e\ -lsif ($numbering eq \"absolute\")\n{\n $real_s\ -tart{$CHAIN}=$start;\n $real_end{$CHAIN}=$end;\ -\n}\n\n $KEY=\"ALL\";\n if ( $CHAIN \ -ne $in_atom_chain)\n{\n \n $pdb_atom_chain_l\ -ist[$n_pdb_atom_chains]=$c;\n $pdb_atom_chain_le\ -n [$n_pdb_atom_chains]=$len;\n $in_atom_chain=$c\ -;\n $n_pdb_atom_chains++;\n}\n \n if ( $residue_\ -index ne $RES_NO)\n{\n $residue_index = $RES_\ -NO;\n @{$atom_seq{$CHAIN}}->[$atom_seq_len{$C\ -HAIN}++]=$RES_ID;;\n}\n}\n\n}\nclose ($INFILE);\n\\ -n\n\n\n\n\n$INFILE=vfopen ($structure_file, \"r\")\ -;\nforeach $c (@c_chain)\n{\n if ( $seq_field e\ -q \"SEQRES\"){@pdb_seq=@{$complete_seq{$c}};}\n el\ -sif ( $seq_field eq \"ATOM\") {@pdb_seq=@{$atom_s\ -eq{$c}};}\n \n\n $full_length=$l=$#pdb_seq+1;\n \\ -n if ( $real_end{$c}==\"*\"){$real_end{$c}=$full_l\ -ength;}\n if ( $coor_set)\n{ \n\n if ( $real\ -_end{$c} < $l){splice @pdb_seq, $real_end{$c}, $l;\ -}\n if ( $real_start{$c} < $l){splice @pdb_seq,\ - 0, $real_start{$c}-1;} \n $l=$#pdb_seq;\\ -n}\n\n elsif ( $delete_set)\n{\n splice @pdb_se\ -q, $delete_start, $delete_end-$delete_start+1;\n \ - $l=$#pdb_seq;\n}\n \n $new_fasta_name=\"$pdb_id$\ -c\";\n if ( $coor_set)\n{\n if ( $n_pdb_chains=\ -=0){$new_fasta_name=\"$new_fasta_name$c\";}\n $\ -new_fasta_name= $new_fasta_name.\"\\_$start\\_$end\ -\";\n}\n \n if ( $MODE eq \"pdb\")\n{\n $nl=\ -1;\n $n=0;\n \n foreach $res ( @pdb_seq)\\ -n {\n if ( !$n)\n {\n \n printf \"SEQRES %2\ -d %1s %4d \", $nl,$c, $l;\n $nl++;\n }\n $\ -res=~s/\\s//g;\n \n if ($code==1){ print\ -f \"%3s \",$onelett{$molecule_type{$c}}->{$res};}\\ -n elsif ($code==3){ printf \"%3s \",$res};\n\ - \n $n++; \n if ( $n==13){$n=0;p\ -rint \"\\n\";}\n}\n if ( $n!=0){print \"\\n\"; $\ -n=0;}\n}\n elsif ( $MODE eq \"simple\")\n{\n pri\ -nt \"# SIMPLE_PDB_FORMAT\\n\";\n if ( $new_fasta\ -_name eq \" \"){$new_fasta_name=\"dummy_name\";}\n\ - print \">$new_fasta_name\\n\";\n\n foreach $r\ -es ( @pdb_seq)\n{\n print \"$onelett{$molecu\ -le_type{$c}}->{$res}\";\n}\n print \"\\n\";\n}\n\ - elsif ( $MODE eq \"fasta\")\n{\n $n=0;\n prin\ -t \">$new_fasta_name\\n\";\n \n foreach $res (\ - @pdb_seq)\n{\n\n print \"$onelett{$molecule\ -_type{$c}}->{$res}\";\n $n++;\n \ - if ( $n==60){print \"\\n\"; $n=0;}\n}\n print \\ -"\\n\"; \n}\n}\n\nif ( $MODE eq \"fasta\")\n{\n \ - &myexit($EXIT_SUCCESS);\n \n}\n\n \n $charcou\ -nt=0;\n $inchain=\"BEGIN\";\n $n=0;\n while (<$\ -INFILE>) \n{\n $line=$_;\n \n if ($line \ -=~/^ATOM/ || ($line=~/^HETATM/))\n{\n $line_head\ -er=\"UNKNWN\";\n $RES_ID=substr($line,17,3);\n $ch\ -ain = substr($line,21,1);\n\n if ($line =~/^ATOM/)\ -\n{\n $line_header=\"ATOM\";\n $RES_ID=(&i\ -s_aa($RES_ID,$chain))?&is_aa($RES_ID,$chain):$RES_\ -ID;\n}\n elsif ($line=~/^HETATM/ && ($ligand_list \ -{$RES_ID} ||$ligand_list {'ALL'} || !&is_aa($RES_I\ -D,$chain)))\n{\n $line_header=\"HETATM\";\n}\n\ - elsif ($line=~/^HETATM/ && (&is_aa($RES_ID,$chain\ -) && !$no_hetatm))\n{\n $line_header=\"ATOM\";\ -\n $RES_ID=&is_aa($RES_ID,$chain);\n}\n else\n\ -{\n next;\n}\n\n \n\n $X=substr($line,30,8); \ - \n $Y=substr($line,38,8);\n $Z=substr($line,46,\ -8);\n $TEMP=substr($line,60,6);\n \n $RAW_AT_ID=$A\ -T_ID=substr($line,12,4);\n $CHAIN=substr($line,21,\ -1);\n $RES_NO=substr($line,22,4);\n $HOM_CODE=subs\ -tr ($line, 26, 1);\n \n $X=~s/\\s//g;\n $Y=~s/\\s/\ -/g;\n $Z=~s/\\s//g;\n $TEMP=~s/\\s//g;\n \n $AT_ID\ -=~s/\\s//g;\n $RES_ID=~s/\\s//g;\n $RES_NO=~s/\\s/\ -/g;\n\n \n if ( $HOM_CODE ne $MAIN_HOM_CODE){next;\ -}\n elsif ( $already_read{$CHAIN}{$RES_ID}{$AT_ID}\ -{$RES_NO}){next;}\n else{$already_read{$CHAIN}{$RE\ -S_ID}{$AT_ID}{$RES_NO}=1;}\n \n $KEY=\"ALL\";\n\n \ - if ( $RES_NO ==0){$start_at_zero=1;}\n\n $RE\ -S_NO+=$start_at_zero; \n \n if ( $current_chain\ - ne $CHAIN)\n{\n $current_chain=$CHAIN;\n \ -$pos=$current_residue=0;\n $offset=($coor_set)\ -?($real_start{$CHAIN}-1):0;\n if ( $seq_fie\ -ld eq \"SEQRES\"){@ref_seq=@{$complete_seq{$CHAIN}\ -};}\n elsif ( $seq_field eq \"ATOM\") {@ref_s\ -eq=@{$atom_seq{$CHAIN}};}\n}\n \n if ($current_res\ -idue != $RES_NO)\n{\n $current_residue=$RES_NO\ -;\n if ( $seq_field eq \"SEQRES\"){$pos=$cu\ -rrent_residue;}\n elsif ( $seq_field eq \"ATOM\ -\"){$pos++;}\n}\n \n \n if ($n_atom==0 || $atom_li\ -st{$AT_ID}==1 || $atom_list{$KEY}==1)\n{ \n \\ -n $do_it=(!@c_chain || $hc_chain{$CHAIN} ||$hc\ -_chain{'LIGAND'} );\n \n $do_it= ($do_it==\ -1) && ($coor_set==0 ||($pos>=$real_start{$CHAIN} &\ -& $pos<=$real_end{$CHAIN}));\n $do_it= ($do_it\ -==1) && ($delete_set==0 || $pos<$delete_start ||$p\ -os>$delete_end );\n if ($ligand==0 && $line_he\ -ader eq \"HETATM\" ){$do_it=0;}\n if ($ligand_\ -only==1 && $line_header eq \"ATOM\" ){$do_it=0;}\n\ - if ($ligand==1 && $line_header eq \"HETATM\" \ -&& $ligand_list{$RES_ID}==0 && $ligand_list{\"ALL\\ -"}==0){$do_it=0;} \n \n \n if ( $do_it\ -)\n{\n $n++;\n $out_pos=$pos;\n \n if ( \ -$delete_set)\n{\n if ( $out_pos< $delete_start)\ -{;}\n else {$offset=$delete_end-$delete_start;}\ -\n} \n \n if ( $numbering_out \ -eq \"new\"){$out_pos-=$offset;}\n elsif ( $\ -numbering_out eq \"old\"){$out_pos=$RES_NO;}\n \ - \n \n \n if ( $code==1){$R\ -ES_ID=$onelett{$molecule_type{$c}}->{$RES_ID};}\n \ - \n if ($unfold)\n{\n $unfolded_x+=5\ -;\n $X=$unfolded_x;\n $Y=0;\n $Z=0;\n \ - $float=1;\n}\n else\n{\n $float=3;\\ -n}\n\n if ( $MODE eq \"pdb\")\n{\n prin\ -tf \"%-6s%5d %-4s %3s %s%4d %8.3f%8.3f%8.3f 1.\ -00 %5.2f\\n\",$line_header, $n, $RAW_AT_ID,$RES_ID\ -,$CHAIN,$out_pos, $X, $Y, $Z,$TEMP; \n}\n \ - elsif ( $MODE eq \"simple\")\n{\n if ( $RES\ -_ID eq \"\"){$RES_ID=\"X\";}\n printf \"%-6s %5\ -s %s %2s %4d %8.3f %8.3f %8.3f\\n\",$line_heade\ -r, $AT_ID, $RES_ID,($CHAIN eq\"\" || $CHAIN eq \" \ -\")?\"A\":$CHAIN,$out_pos, $X, $Y, $Z,$TEMP;\n}\n\\ -n}\n}\n}\n}\nprint \"\\n\";\nclose($INFILE);\n\n\n\ -if ( $error ne \"\") \n{$error=$error.\"\\nDiagnos\ -tic: SEQRES and the residues in ATOM are probab\ -ly Incompatible\\n\";\n $error=$error. \"Recom\ -endation: Rerun with '-fix 1' in order to ignore t\ -he SEQRES sequences\\n\";\n}\nif (!$nodiagnostic){\ -print STDERR $error;}\n&myexit ( $EXIT_SUCCESS);\n\ -\nsub remote_is_pdb_name \n{\n my $in=@_[0];\n \ - my $netaddress=@_[1];\n my $ref_file, $pdb;\\ -n my $value;\n\n if ( $in=~/[^\\w\\d\\:\\_]/\ -){return 0;}\n \n \n $ref_file=\"$cache/p\ -db_entry_type.txt\";\n \n if ( !-e $ref_file\ - || (-M $ref_file)>2 || -z $ref_file)\n{\n &url2fi\ -le(\"ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_\ -entry_type.txt\", $ref_file);\n}\n \n $pdb=subs\ -tr ($in,0, 4);\n \n \n $value=`grep -c $p\ -db $ref_file`;\n return $value;\n}\n \nsub\ - is_pdb_file\n{\n my @arg=@_;\n\n if ( !-e $\ -arg[0]){return 0;}\n \n $F=vfopen ($arg[0], \ -\"r\");\n while ( <$F>)\n{\n if (/^HEADER/)\n{\\ -n close $F;\n return 1;\n}\n elsif ( /^SEQ\ -RES/)\n{\n close $F;\n return 1;\n}\n elsi\ -f ( /^ATOM/)\n{\n close $F;\n return 1;\n}\ -\n}\n return 0;\n}\nsub get_pdb_id\n{\n my $\ -header_file=@_[0];\n my $id;\n \n\n $F=vf\ -open (\"$header_file\", \"r\");\n\n while ( <$F\ ->)\n{\n \n if ( /HEADER/)\n{\n $id=substr($_,6\ -2,4 );\n return $id;\n}\n}\n close ($F);\n \ - \n return \"\";\n}\n\nsub get_ligand_list\n{\ -\n my $pdb_file=@_[0];\n my $chain;\n my \ -$ligand;\n my %complete_ligand_list;\n \n\n \ - $F=vfopen ($pdb_file, \"r\");\n while ( <$F>\ -)\n{\n if ( /^HETATM/)\n{\n $line=$_;\n $c\ -hain=substr($line,21,1);\n $ligand=substr($lin\ -e,17,3);\n \n if (!$complete_ligand_list{$\ -chain}{$ligand})\n{\n \n $complete_ligand_list{\\ -"result\"}.=\"CHAIN $chain LIGAND $ligand\\n\";\n \ - $complete_ligand_list{$chain}{$ligand}=1;\n}\n}\n\ -}\n close ($F);\n return %complete_ligand_li\ -st;\n}\n\nsub get_chain_list \n{\n my $header_f\ -ile;\n my @chain_list;\n my @list;\n my $\ -n_chains;\n my %chain_hasch;\n my $pdb_file=\ -@_[0];\n my $c;\n my %hasch;\n my $chain;\ -\n \n \n $F=vfopen ($pdb_file, \"r\");\n \ - while ( <$F>)\n{\n\n\n if (/SEQRES\\s+\\d+\\s+(\\\ -S+)/)\n{\n $chain = substr($_,11,1);$chain=~s/\ -\\s//g;if ( $chain eq \"\"){$chain=\" \";}\n \n\ - \n if ($chain && !$hasch{$chain}){$hasch{\ -$chain}=1;push @chain_list, $chain;}\n}\n if (/^AT\ -OM/ || /^HETATM/)\n{\n $chain = substr($_,21,1\ -); $chain=~s/\\s//g;if ( $chain eq \"\"){$chain=\"\ - \";}\n if ($chain && !$hasch{$chain}){$hasch{\ -$chain}=1;push @chain_list, $chain;}\n}\n}\n \n\ - \n close ($F);\n if (!@chain_list)\n{\n @\ -chain_list=(\"A\");\n}\n return @chain_list;\n}\ -\n\nsub token_is_in_list\n{\n\n my @list=@_;\n \ - my $a;\n \n for ($a=1; $a<=$#list; $a++)\\ -n{\n if ( $list[$a] eq $list[0]){return $a;}\n}\n}\ -\n\nsub pdb_name2name_and_chain \n{\n my $pdb_f\ -ile=@_[0];\n my $pdb_file_in;\n my @array;\n\ - my $chain;\n my $c;\n\n $pdb_file_in=$pd\ -b_file;\n\n $pdb_file=~/^(.{4})/;$pdb_id=$1;\n \ - @array=($pdb_file=~/([\\w])/g);\n \n \n $c\ -hain=uc ($array[4]);\n $chain=($chain eq \"\")?\ -\"FIRST\":$chain;\n \n return ( $pdb_id, $ch\ -ain);\n\n if ( $#array==3){return ($pdb_id, \"F\ -IRST\");}\n elsif ( $#array<4){ return ($pdb_id\ -, \"\");}\n else {return ( $pdb_id, $chain);}\n\ - \n \n \n}\nsub get_main_hom_code \n{\n\ - my $pdb_file=@_[0];\n my %hom, $n, $best, $\ -best_h;\n open (F, $pdb_file);\n while ()\ -\n{\n if ( $_=~/^ATOM/)\n{\n $h=substr ($_,26,\ - 1);\n $n=++$hom{$h};\n if ($n>$best)\n{\n\ - $best=$n;\n $best_h=$h;\n}\n}\n}\n close (F)\ -;\n return $best_h;\n}\n\n\nsub get_pdb_file \n\ -{\n my ($pdb_file_in)=(@_);\n my $result;\n \ - my @letter;\n my @chain;\n my $v;\n my\ - $pdb_file=$pdb_file_in;\n\n $pdb_file=($pdb_fi\ -le_in=~/\\S+_S_(\\S+)/)?$1:$pdb_file_in;\n\n if\ - ($no_remote_pdb_dir==0)\n{\n $no_remote_pdb_dir=1\ -;\n $result=get_pdb_file3 ($pdb_file);\n $no_remot\ -e_pdb_dir=0;\n if ( $result){return $result;}\n el\ -se\n{\n \n lc ($pdb_file);\n $result=g\ -et_pdb_file3($pdb_file);\n return $result;\n}\ -\n}\n else\n{\n return get_pdb_file3 ($pdb_file\ -);\n}\n \n}\n\nsub get_pdb_file3 \n{\n my $p\ -db_file_in=@_[0];\n my $result;\n my @letter\ -;\n my @chain;\n my $lcfile;\n my $ucfile\ -;\n my $pdb_file=$pdb_file_in;\n \n $lcfi\ -le=lc $pdb_file;\n $ucfile=uc $pdb_file;\n\n \ - if ( ($result=get_pdb_file2 ($pdb_file))){return \ -$result;}\n \n\n if ($lcfile ne $pdb_file &&\ - ($result=get_pdb_file2 ($lcfile))){return $result\ -;}\n if ($ucfile ne $pdb_file && ($result=get_p\ -db_file2 ($ucfile))){return $result;}\n \n \n\ - \n return \"\";\n}\nsub get_pdb_file2\n{\n \ - my $pdb_file=@_[0];\n my $return_value;\n \ - \n $return_value=\"\";\n \n if ( ($resul\ -t=get_pdb_file1 ($pdb_file))){$return_value=$resul\ -t;}\n elsif ( !($pdb_file=~/\\.pdb/) && !($pdb_\ -file=~/\\.PDB/))\n{\n if ( ($result=get_pdb_file1 \ -(\"$pdb_file.pdb\"))){$return_value=$result;}\n el\ -sif ( ($result=get_pdb_file1 (\"$pdb_file.PDB\")))\ -{$return_value=$result;}\n\n elsif ( ($result=get_\ -pdb_file1 (\"pdb$pdb_file.pdb\"))){$return_value=$\ -result;} \n elsif ( ($result=get_pdb_file1 (\"pdb$\ -pdb_file.PDB\"))){$return_value=$result;}\n elsif \ -( ($result=get_pdb_file1 (\"PDB$pdb_file.PDB\"))){\ -$return_value=$result;}\n elsif ( ($result=get_pdb\ -_file1 (\"PDB$pdb_file.pdb\"))){$return_value=$res\ -ult;}\n \n \n elsif ( ($result=get_pdb_file1 (\"$p\ -db_file.ent\"))){$return_value=$result;}\n elsif (\ - ($result=get_pdb_file1 (\"pdb$pdb_file.ent\"))){$\ -return_value=$result;}\n elsif ( ($result=get_pdb_\ -file1 (\"PDB$pdb_file.ent\"))){$return_value=$resu\ -lt;}\n\n elsif ( ($result=get_pdb_file1 (\"$pdb_fi\ -le.ENT\"))){$return_value=$result;}\n elsif ( ($re\ -sult=get_pdb_file1 (\"pdb$pdb_file.ENT\"))){$retur\ -n_value=$result;}\n elsif ( ($result=get_pdb_file1\ - (\"PDB$pdb_file.ENT\"))){$return_value=$result;}\\ -n \n \n \n}\n return $return_value;\n}\n \ns\ -ub get_pdb_file1\n{\n my ($pdb_file)=(@_);\n \ - my $return_value;\n \n\n $return_value=\"\"\ -;\n if ( ($result=get_pdb_file0 ($pdb_file))){$\ -return_value=$result;}\n elsif ( ($result=get_p\ -db_file0 (\"$pdb_file.Z\"))){$return_value=$result\ -;}\n elsif ( ($result=get_pdb_file0 (\"$pdb_fil\ -e.gz\"))){$return_value=$result;}\n elsif ( ($r\ -esult=get_pdb_file0 (\"$pdb_file.GZ\"))){$return_v\ -alue=$result;}\n return $return_value;\n}\nsub \ -get_pdb_file0 \n{ \n my ($pdb_file, $attempt)=(\ -@_);\n my $pdb_file=@_[0];\n my $tmp_pdb_fil\ -e; \n my $return_value;\n\n if ( !$attemp\ -t){$attempt=1;}\n \n $local_pdb_file=\"$pdb_\ -file\";\n if ( $local_pdb_file eq \"\")\n{\n $t\ -mp_pdb_file=vtmpnam();\n open F, \">$tmp_pdb_file\\ -";\n \n while (){print F \"$_\";}\n close (\ -F);\n \n if (-e $tmp_pdb_file && &is_pdb_file ( $l\ -ocal_pdb_file))\n{return $tmp_pdb_file;}\n}\n\n \ - $local_pdb_file=\"$pdb_file\";\n &debug_print \ -(\"\\nTry access local file: $local_pdb_file\");\n\ - \n $local_pdb_file=&check_pdb_file4compress\ -ion ($local_pdb_file);\n if ( -e $local_pdb_fil\ -e && (&is_pdb_file ($local_pdb_file) || $force_pdb\ -))\n{\n &debug_print ( \"\\n\\tIs in Current Dir\"\ -);\n $tmp_pdb_file=vtmpnam();\n `cp $local_pdb_fil\ -e $tmp_pdb_file`;\n return $tmp_pdb_file;\n}\n \ -else\n{\n &debug_print (\"\\n\\tFile Not in Curren\ -t Dir\");\n}\n\n if ($pdb_file=~/^pdb/||$pdb_fi\ -le=~/^PDB/){$pdb_div=substr ($pdb_file, 4, 2);}\n \ - else\n{\n $pdb_div=substr ($pdb_file, 1, 2);\\ -n}\n $local_pdb_file=\"$pdb_dir/$pdb_div/$pdb_f\ -ile\";\n $local_pdb_file=&check_pdb_file4compre\ -ssion ( $local_pdb_file);\n &debug_print (\"\\n\ -Try access file From PDB_DIR: $local_pdb_file\");\\ -n if ($pdb_dir && -e $local_pdb_file && &is_pdb\ -_file ($local_pdb_file))\n{\n &debug_print ( \"\\n\ -\\tIs in Local PDB DIR\");\n $tmp_pdb_file=vtmpnam\ -();\n `cp $local_pdb_file $tmp_pdb_file`;\n return\ - $tmp_pdb_file;\n}\n\n $local_pdb_file=\"$pdb_d\ -ir/$pdb_file\";\n $local_pdb_file=&check_pdb_fi\ -le4compression ( $local_pdb_file);\n &debug_pri\ -nt (\"\\nTry access file From PDB_DIR: local_pdb_f\ -ile\");\n if ($pdb_dir && -e $local_pdb_file &&\ - &is_pdb_file ($local_pdb_file))\n{\n &debug_print\ - ( \"\\n\\tIs in Local PDB DIR\");\n $tmp_pdb_file\ -=vtmpnam();\n `cp $local_pdb_file $tmp_pdb_file`;\\ -n return $tmp_pdb_file;\n}\n\n $local_pdb_file=\ -\"$pdb_dir$pdb_file\";\n $local_pdb_file=&check\ -_pdb_file4compression ( $local_pdb_file);\n &de\ -bug_print (\"\\nTry access file From PDB_DIR: $loc\ -al_pdb_file\");\n if ($pdb_dir && -e $local_pdb\ -_file && &is_pdb_file ($local_pdb_file))\n{\n &deb\ -ug_print ( \"\\n\\tIs in Local PDB DIR\");\n $tmp_\ -pdb_file=vtmpnam();\n `cp $local_pdb_file $tmp_pdb\ -_file`;\n return $tmp_pdb_file;\n}\n else\n{&de\ -bug_print ( \"\\n\\tNot In Local Pdb Dir\");}\n\n \ - if ($cache ne \"NO\" && $cache ne \"no\")\n{\n\\ -n $local_pdb_file=\"$cache/$pdb_file\";\n $local_p\ -db_file=&check_pdb_file4compression ( $local_pdb_f\ -ile);\n &debug_print(\"\\nTry access file From Cac\ -he: $local_pdb_file\");\n if (-e $local_pdb_file &\ -& &is_pdb_file ($local_pdb_file))\n{\n &debug_\ -print ( \"\\n\\tIs in T-Coffee Cache\");\n $tm\ -p_pdb_file=vtmpnam();\n `cp $local_pdb_file $t\ -mp_pdb_file`;\n return $tmp_pdb_file;\n}\n els\ -e{&debug_print ( \"\\n\\tNot in Cache Dir\");}\n}\\ -n\n if (!$no_remote_pdb_dir) \n{\n\n my $return\ -_value=\"\";\n if ( &remote_is_pdb_name ($pdb_file\ -, $netaddress)==1)\n{\n &debug_print (\"\\n***\ -**************************************************\ -**\\nTry Remote Access for $pdb_file\");\n $tm\ -p_pdb_file=vtmpnam();\n $netcommand=$netaddres\ -s;\n $netcommand=~s/%%/$pdb_file/g;\n &url\ -2file(\"$netcommand\", \"$tmp_pdb_file.$netcompres\ -sion\");\n &debug_print(\"\\nREMOTE: $netcomma\ -nd\\n\");\n \n $compressed_tmp_file_name=\"\ -$tmp_pdb_file.$netcompression\";\n \n if (\ -$netcompression && -B $compressed_tmp_file_name)\n\ -{\n my $r;\n &debug_print (\"\\n\\tFile Found Re\ -motely\");\n if (($r=safe_system ( \"$netcompress\ -ion_pg $compressed_tmp_file_name\")!=$EXIT_SUCCESS\ -) && $attempts<5)\n{\n &debug_print (\"\\n\\t\ -Proper Download Failed Try again\");\n unlink\ - $compressed_tmp_file_name;\n print \"\\nFail\ -ed to Download $compressed_tmp_file_name. New Atte\ -mpt $attempt/5\\n\";\n return &get_pdb_file0(\ -$pdb_file, $attempt+1);\n}\n elsif ($r== $EXIT_SU\ -CCESS)\n{\n &debug_print (\"\\n\\tProper Down\ -load Succeeded \");\n $return_value=$tmp_pdb_\ -file;\n}\n else\n{\n &debug_print (\"\\n\\tP\ -roper Download Failed \");\n &debug_print (\"\ -\\nFile Not Found Remotely\");\n unlink $comp\ -ressed_tmp_file_name;\n}\n}\n else\n{\n &debu\ -g_print (\"\\nFile Not Found Remotely\");\n unlin\ -k $compressed_tmp_file_name;\n}\n if ($cache n\ -e \"no\" && $cache ne \"update\" && -e $return_val\ -ue)\n{\n `cp $return_value $cache/$pdb_file.pdb`;\ -\n}\n}\n &debug_print (\"\\nRemote Download Finish\ -ed\");\n return $return_value;\n}\n return \"\"\ -;\n}\n\nsub check_pdb_file4compression \n{\n my\ - $file=@_[0];\n my $tmp;\n my $r;\n \n \ - $tmp=&vtmpnam();\n if (-e $tmp){unlink $tmp;}\\ -n \n $file=~s/\\/\\//\\//g;\n if (-B $\ -file && ($file=~/\\.Z/)) {`cp $file $tmp.Z`;`rm $t\ -mp`;`gunzip $tmp.Z $SILENT`;$r=$tmp;}\n elsif (\ --B $file && ($file=~/\\.gz/)){`cp $file $tmp.gz`;`\ -gunzip $tmp.gz $SILENT`;return $r=$tmp;}\n elsi\ -f (-B $file ){`cp $file $tmp.gz`;`gunzip $tmp.gz $\ -SILENT`;$r=$tmp;}\n elsif ( -e $file ) {$r= $fi\ -le;}\n elsif ( -e \"$file.gz\" ){ `cp $file.gz \ -$tmp.gz`;`gunzip $tmp.gz $SILENT`;$r=$tmp;} \ - \n elsif ( -e \"$file.Z\") {`cp $file.Z $tmp.\ -Z`; `gunzip $tmp.Z $SILENT`;$r=$tmp;}\n else {\ -$r= $file;}\n\n if ( -e \"$tmp.Z\"){unlink \"$t\ -mp.Z\";}\n if ( -e \"$tmp.gz\"){unlink \"$tmp.g\ -z\";}\n \n return $r;\n \n}\n\nsub vtmpna\ -m\n{\n my $tmp_file_name;\n $tmp_name_counte\ -r++;\n $tmp_file_name=\"tmp_file_for_extract_fr\ -om_pdb$$.$tmp_name_counter\";\n $tmp_file_list[\ -$ntmp_file++]=$tmp_file_name;\n if ( -e $tmp_fi\ -le_name) {return &vtmpnam ();}\n else {return $\ -tmp_file_name;}\n}\n\n\n\n \n\n\nsub safe_syste\ -m \n{\n my $com=@_[0];\n my $pid;\n my $status;\ -\n if ($com eq \"\"){return 1;}\n if (($pid = fo\ -rk ()) < 0){return (-1);}\n if ($pid == 0)\n{\n \ - exec ($com);\n}\n else\n{\n $PIDCHILD=$pid;\n\ -}\n \n waitpid ($pid,WTERMSIG);\n return $?; #c\ -ontains the status of the exit\n}\nEND {\n kill (\ -$PIDCHILD);\n clean(@tmp_file_list);\n}\nsub myex\ -it\n{\n my $exit_status=@_[0];\n \n &clea\ -n(@tmp_file_list);\n exit ( $exit_status);\n}\n\ -\nsub clean\n{\n my @fl=@_;\n my $file;\n \n\n \ - if ( $debug){print STDERR \"remove $#fl files\\n\ -\";}\n\n foreach $file ( @fl)\n{\n if (-e $f\ -ile)\n{\n if ( $debug){print STDERR \"\\n$file [\ -NOT DELETED]\";}\n else{unlink ($file);} \n}\n}\\ -n}\nsub vfopen \n{\n my $file=@_[0];\n my $m\ -ode=@_[1];\n my $tmp;\n my $F = new FileHand\ -le;\n \n \n $tmp=$file;\n \n \n if \ -( $mode eq \"r\" && !-e $file){ die \"Cannot open \ -file $file [FATAL: EXTRACT_FROM_PDB]\\n\";}\n e\ -lsif ($mode eq \"w\"){$tmp=\">$file\";}\n elsif\ - ($mode eq \"a\"){$tmp=\">>$file\";}\n \n \n\ - open ($F,$tmp);\n return $F;\n}\nsub debug_\ -print\n{\n my $message =@_[0];\n if ($debug)\ -{print STDERR \"NO_REMOTE_PDB_DIR: $no_remote_pdb_\ -dir - $message [DEBUG:extract_from_pdb]\";}\n r\ -eturn;\n}\nsub is_aa \n{\n my ($aa, $chain) =@_\ -;\n\n my $one;\n my $trhee;\n \n if ( \ -$onelett{$molecule_type{$chain}}->{$aa} eq 'X' || \ -!$onelett{$molecule_type{$chain}}->{$aa} ){return \ -'';}\n else\n{\n $one=$onelett{$molecule_type{$\ -chain}}->{$aa};\n\n $three=$threelett{$molecule_ty\ -pe{$chain}}->{$one};\n \n\n return $three;\n}\n\n}\ -\nsub pg_is_installed\n{\n my @ml=@_;\n my $\ -r, $p, $m;\n my $supported=0;\n \n my $p=\ -shift (@ml);\n $r=`which $p 2>/dev/null`;\n \ -if ($r eq \"\"){return 0;}\n else {return 1;}\n\ -}\nsub check_pg_is_installed\n{\n my @ml=@_;\n \ - my $r=&pg_is_installed (@ml);\n if (!$r)\n{\\ -n print STDERR \"\\nProgram $p Supported but Not I\ -nstalled on your system [FATAL:tc_generic_method]\\ -\n\";\n myexit ($EXIT_FAILURE);\n}\n else\n{\n \ -return 1;\n}\n}\n\nsub url2file\n{\n my ($addre\ -ss, $out, $wget_arg, $curl_arg)=(@_);\n my ($pg\ -, $flag, $r, $arg, $count);\n \n if ( $INTER\ -NET==-1)\n{\n $INTERNET=1;\n $INTERNET=&check_inte\ -rnet_connection();\n}\n if ($INTERNET==0){exit(\ -$EXIT_FAILURE);}\n \n if (&pg_is_installed (\\ -"wget\")) {$pg=\"wget\"; $flag=\"-O\";$arg=$wget\ -_arg;}\n elsif (&pg_is_installed (\"curl\")){$p\ -g=\"curl\"; $flag=\"-o\";$arg=$curl_arg;}\n els\ -e\n{\n print stderr \"ERROR: neither curl nor wget\ - are installed. Imnpossible to tectch remote file \ -[FATAL]\\n\";\n exit ($EXIT_FAILURE);\n}\n \n \ - $r=safe_system (\"$pg $flag$out $address >/dev/nu\ -ll 2>/dev/null\");\n if ($r!=$EXIT_SUCCESS)\n{\\ -n unlink $file;\n}\n return $r;\n}\n\n\n\nsub c\ -heck_internet_connection\n{\n my $internet;\n \ - \n if ( -e \"x\"){unlink (\"x\");}\n url2f\ -ile(\"www.google.com\", \"x\");\n \n if ( !-\ -e \"x\" || -s \"x\" < 10){$internet=0;}\n else \ -{$internet=1;}\n if (-e \"x\"){unlink \"x\";}\n\ - return $internet;\n}\n\n\n\nsub pdbfile2chaint\ -ype\n {\n my $file=@_[0];\n my %ct;\n my\ - $F;\n \n $F=vfopen ($file, \"r\");\n whi\ -le (<$F>)\n {\n my $line=$_;\n if ($line =~/^\ -ATOM/)\n {\n my $C=substr($line,21,1);\n \ - if (!$ct{$C})\n { \n my $r=substr($line,17\ -,3);\n $r=~s/\\s+//;\n if (length ($r)==1){$ct{$\ -C}=\"R\";}\n elsif (length ($r)==2){$ct{$C}=\"D\"\ -;}\n elsif (length ($r)==3){$ct{$C}=\"P\";}\n el\ -se \n {\n print \"ERROR: Could not read RE\ -S_ID field in file $file [FATAL:extract_from_pdb]\\ -\n\";die;\n }\n }\n }\n }\n clo\ -se ($F);\n return %ct;\n }\n \n \n\n\n\ns\ -ub fill_threelett_RNA\n{\n\n my %threelett=(\n 'A'\ -, ' A',\n 'T', ' T',\n 'U', ' U',\n 'C', ' C',\ -\n 'G', ' G',\n 'I', ' I', #Inosine\n );\n \n re\ -turn %threelett;\n\n}\n\n\nsub fill_onelett_RNA\n{\ -\n my %onelett=(\n ' A' => 'A',\n ' T' => 'T',\ -\n ' U' => 'U',\n ' C' => 'C',\n ' G' => 'G',\n\ - 'CSL' => 'X',\n 'UMS' => 'X',\n ' I' => 'I',\n '\ -A' => 'A',\n 'T' => 'T',\n 'U' => 'U',\n 'C' => 'C\ -',\n 'G' => 'G',\n 'I' => 'I',\n );\n\n return %on\ -elett;\n\n}\n\n\nsub fill_onelett_DNA\n{\n my %o\ -nelett=(\n ' DA', 'A',\n ' DT', 'T',\n ' DC', 'C',\ -\n ' DG', 'G',\n 'DA', 'A',\n 'DT', 'T',\n 'DC', '\ -C',\n 'DG', 'G',\n );\n\n return %onelett;\n\n}\n\\ -nsub fill_threelett_DNA\n{\n\n my %threelett=(\n '\ -A', ' DA',\n 'T', ' DT',\n 'C', ' DC',\n 'G', ' DG\ -',\n );\n\n return %threelett;\n\n}\n\n\n\n\nsub f\ -ill_threelett_prot\n{ \n my %threelett;\n\n %th\ -reelett=(\n'A', 'ALA',\n'C', 'CYS',\n'D', 'ASP',\n\ -'E', 'GLU',\n'F', 'PHE',\n'G', 'GLY',\n'H', 'HIS',\ -\n'I', 'ILE',\n'K', 'LYS',\n'L', 'LEU',\n'N', 'ASN\ -',\n'M', 'MET',\n'P', 'PRO',\n'Q', 'GLN',\n'R', 'A\ -RG',\n'S', 'SER',\n'T', 'THR',\n'V', 'VAL',\n'W', \ -'TRP',\n'Y', 'TYR',\n);\n\nreturn %threelett;\n\n\\ -n}\n\nsub fill_onelett_prot\n{\n my %onelett;\n\ - \n %onelett=(\n\n'10A', 'X',\n'11O', 'X',\n\ -'12A', 'X',\n'13P', 'X',\n'13R', 'X',\n'13S', 'X',\ -\n'14W', 'X',\n'15P', 'X',\n'16A', 'X',\n'16G', 'X\ -',\n'1AN', 'X',\n'1AP', 'X',\n'1AR', 'X',\n'1BH', \ -'X',\n'1BO', 'X',\n'1C5', 'X',\n'1CU', 'X',\n'1DA'\ -, 'X',\n'1GL', 'X',\n'1GN', 'X',\n'1IN', 'X',\n'1L\ -U', 'L',\n'1MA', 'X',\n'1MC', 'X',\n'1MG', 'X',\n'\ -1MZ', 'X',\n'1NA', 'X',\n'1NB', 'X',\n'1NI', 'X',\\ -n'1PA', 'A',\n'1PC', 'X',\n'1PE', 'X',\n'1PG', 'X'\ -,\n'1PI', 'A',\n'1PM', 'X',\n'1PN', 'X',\n'1PU', '\ -X',\n'1PY', 'X',\n'1UN', 'X',\n'24T', 'X',\n'25T',\ - 'X',\n'26P', 'X',\n'2AB', 'X',\n'2AM', 'X',\n'2AN\ -', 'X',\n'2AP', 'X',\n'2AR', 'X',\n'2AS', 'D',\n'2\ -BL', 'X',\n'2BM', 'X',\n'2CP', 'X',\n'2DA', 'X',\n\ -'2DG', 'X',\n'2DP', 'X',\n'2DT', 'X',\n'2EP', 'X',\ -\n'2EZ', 'X',\n'2FG', 'X',\n'2FL', 'X',\n'2FP', 'X\ -',\n'2FU', 'X',\n'2GL', 'X',\n'2GP', 'X',\n'2HP', \ -'X',\n'2IB', 'X',\n'2IP', 'X',\n'2LU', 'L',\n'2MA'\ -, 'X',\n'2MD', 'X',\n'2ME', 'X',\n'2MG', 'X',\n'2M\ -L', 'L',\n'2MO', 'X',\n'2MR', 'R',\n'2MU', 'X',\n'\ -2MZ', 'X',\n'2NO', 'X',\n'2NP', 'X',\n'2OG', 'X',\\ -n'2PA', 'X',\n'2PC', 'X',\n'2PE', 'X',\n'2PG', 'X'\ -,\n'2PH', 'X',\n'2PI', 'X',\n'2PL', 'X',\n'2PP', '\ -X',\n'2PU', 'X',\n'2SI', 'X',\n'2TB', 'X',\n'34C',\ - 'X',\n'35G', 'X',\n'3AA', 'X',\n'3AD', 'X',\n'3AH\ -', 'H',\n'3AN', 'X',\n'3AP', 'X',\n'3AT', 'X',\n'3\ -BT', 'X',\n'3CH', 'X',\n'3CN', 'X',\n'3CO', 'X',\n\ -'3CP', 'X',\n'3DR', 'X',\n'3EP', 'X',\n'3FM', 'X',\ -\n'3GA', 'X',\n'3GP', 'X',\n'3HB', 'X',\n'3HC', 'X\ -',\n'3HP', 'X',\n'3IB', 'X',\n'3ID', 'X',\n'3IN', \ -'X',\n'3MA', 'X',\n'3MB', 'X',\n'3MC', 'X',\n'3MD'\ -, 'D',\n'3MF', 'X',\n'3MP', 'X',\n'3MT', 'X',\n'3O\ -L', 'X',\n'3PA', 'X',\n'3PG', 'X',\n'3PO', 'X',\n'\ -3PP', 'X',\n'3PY', 'X',\n'49A', 'X',\n'4AB', 'X',\\ -n'4AM', 'X',\n'4AN', 'X',\n'4AP', 'X',\n'4BA', 'X'\ -,\n'4BT', 'X',\n'4CA', 'X',\n'4CO', 'X',\n'4HP', '\ -X',\n'4IP', 'X',\n'4MO', 'X',\n'4MV', 'X',\n'4MZ',\ - 'X',\n'4NC', 'X',\n'4NP', 'X',\n'4OX', 'X',\n'4PB\ -', 'X',\n'4PN', 'X',\n'4PP', 'X',\n'4SC', 'X',\n'4\ -SU', 'X',\n'4TB', 'X',\n'55C', 'X',\n'5AD', 'X',\n\ -'5AN', 'X',\n'5AT', 'X',\n'5CM', 'X',\n'5GP', 'X',\ -\n'5HP', 'E',\n'5HT', 'X',\n'5IT', 'X',\n'5IU', 'X\ -',\n'5MB', 'X',\n'5MC', 'X',\n'5MD', 'X',\n'5MP', \ -'X',\n'5MU', 'X',\n'5NC', 'X',\n'5OB', 'X',\n'5PA'\ -, 'X',\n'5PV', 'X',\n'6AB', 'X',\n'6CT', 'X',\n'6H\ -A', 'X',\n'6HC', 'X',\n'6HG', 'X',\n'6HT', 'X',\n'\ -6IN', 'X',\n'6MO', 'X',\n'6MP', 'X',\n'6PG', 'X',\\ -n'6WO', 'X',\n'70U', 'X',\n'7DG', 'X',\n'7HP', 'X'\ -,\n'7I2', 'X',\n'7MG', 'X',\n'7MQ', 'X',\n'7NI', '\ -X',\n'87Y', 'X',\n'8AD', 'X',\n'8BR', 'X',\n'8IG',\ - 'X',\n'8IN', 'X',\n'8OG', 'X',\n'95A', 'X',\n'9AD\ -', 'X',\n'9AM', 'X',\n'9AP', 'X',\n'9DG', 'X',\n'9\ -DI', 'X',\n'9HX', 'X',\n'9OH', 'X',\n'9TA', 'X',\n\ -'A12', 'X',\n'A15', 'X',\n'A23', 'X',\n'A24', 'X',\ -\n'A26', 'X',\n'A2G', 'X',\n'A2P', 'X',\n'A32', 'X\ -',\n'A3P', 'X',\n'A4P', 'X',\n'A5P', 'X',\n'A70', \ -'X',\n'A76', 'X',\n'A77', 'X',\n'A78', 'X',\n'A79'\ -, 'X',\n'A80', 'X',\n'A85', 'X',\n'A88', 'X',\n'A9\ -A', 'X',\n'AA3', 'X',\n'AA4', 'X',\n'AA6', 'X',\n'\ -AAA', 'X',\n'AAB', 'X',\n'AAC', 'X',\n'AAE', 'X',\\ -n'AAG', 'R',\n'AAH', 'X',\n'AAM', 'X',\n'AAN', 'X'\ -,\n'AAP', 'X',\n'AAR', 'R',\n'AAS', 'X',\n'AAT', '\ -X',\n'ABA', 'X',\n'ABC', 'X',\n'ABD', 'X',\n'ABE',\ - 'X',\n'ABH', 'X',\n'ABI', 'X',\n'ABK', 'X',\n'ABM\ -', 'X',\n'ABN', 'X',\n'ABP', 'X',\n'ABR', 'X',\n'A\ -BS', 'X',\n'ABU', 'X',\n'AC1', 'X',\n'AC2', 'X',\n\ -'ACA', 'X',\n'ACB', 'D',\n'ACC', 'C',\n'ACD', 'X',\ -\n'ACE', 'X',\n'ACH', 'X',\n'ACI', 'X',\n'ACL', 'R\ -',\n'ACM', 'X',\n'ACN', 'X',\n'ACO', 'X',\n'ACP', \ -'X',\n'ACQ', 'X',\n'ACR', 'X',\n'ACS', 'X',\n'ACT'\ -, 'X',\n'ACV', 'V',\n'ACX', 'X',\n'ACY', 'X',\n'AD\ -2', 'X',\n'AD3', 'X',\n'ADC', 'X',\n'ADD', 'X',\n'\ -ADE', 'X',\n'ADH', 'X',\n'ADI', 'X',\n'ADM', 'X',\\ -n'ADN', 'X',\n'ADP', 'X',\n'ADQ', 'X',\n'ADR', 'X'\ -,\n'ADS', 'X',\n'ADT', 'X',\n'ADU', 'X',\n'ADW', '\ -X',\n'ADX', 'X',\n'AE2', 'X',\n'AEA', 'X',\n'AEB',\ - 'X',\n'AEI', 'D',\n'AEN', 'X',\n'AET', 'T',\n'AF1\ -', 'X',\n'AF3', 'X',\n'AFA', 'D',\n'AFP', 'X',\n'A\ -G7', 'X',\n'AGB', 'X',\n'AGF', 'X',\n'AGL', 'X',\n\ -'AGM', 'R',\n'AGN', 'X',\n'AGP', 'X',\n'AGS', 'X',\ -\n'AGU', 'X',\n'AH0', 'X',\n'AH1', 'X',\n'AHA', 'X\ -',\n'AHB', 'D',\n'AHC', 'X',\n'AHF', 'X',\n'AHG', \ -'X',\n'AHH', 'X',\n'AHM', 'X',\n'AHO', 'X',\n'AHP'\ -, 'X',\n'AHS', 'X',\n'AHT', 'Y',\n'AHU', 'X',\n'AH\ -X', 'X',\n'AI1', 'X',\n'AI2', 'X',\n'AIB', 'X',\n'\ -AIC', 'X',\n'AIM', 'X',\n'AIP', 'X',\n'AIQ', 'X',\\ -n'AIR', 'X',\n'AJ3', 'X',\n'AKB', 'X',\n'AKG', 'X'\ -,\n'AKR', 'X',\n'AL1', 'X',\n'AL2', 'X',\n'AL3', '\ -X',\n'AL4', 'X',\n'AL5', 'X',\n'AL6', 'X',\n'AL7',\ - 'X',\n'AL8', 'X',\n'AL9', 'X',\n'ALA', 'A',\n'ALB\ -', 'X',\n'ALC', 'X',\n'ALD', 'L',\n'ALE', 'X',\n'A\ -LF', 'X',\n'ALG', 'X',\n'ALL', 'X',\n'ALM', 'A',\n\ -'ALN', 'A',\n'ALO', 'T',\n'ALP', 'X',\n'ALQ', 'X',\ -\n'ALR', 'X',\n'ALS', 'X',\n'ALT', 'A',\n'ALY', 'K\ -',\n'ALZ', 'X',\n'AMA', 'X',\n'AMB', 'X',\n'AMC', \ -'X',\n'AMD', 'X',\n'AMG', 'X',\n'AMH', 'X',\n'AMI'\ -, 'X',\n'AML', 'X',\n'AMN', 'X',\n'AMO', 'X',\n'AM\ -P', 'X',\n'AMQ', 'X',\n'AMR', 'X',\n'AMS', 'X',\n'\ -AMT', 'X',\n'AMU', 'X',\n'AMW', 'X',\n'AMX', 'X',\\ -n'AMY', 'X',\n'ANA', 'X',\n'ANB', 'X',\n'ANC', 'X'\ -,\n'AND', 'X',\n'ANE', 'X',\n'ANI', 'X',\n'ANL', '\ -X',\n'ANO', 'X',\n'ANP', 'X',\n'ANS', 'X',\n'ANT',\ - 'X',\n'AOE', 'X',\n'AOP', 'X',\n'AP1', 'X',\n'AP2\ -', 'X',\n'AP3', 'X',\n'AP4', 'X',\n'AP5', 'X',\n'A\ -P6', 'X',\n'APA', 'X',\n'APB', 'X',\n'APC', 'X',\n\ -'APE', 'F',\n'APF', 'X',\n'APG', 'X',\n'APH', 'A',\ -\n'API', 'X',\n'APL', 'X',\n'APM', 'X',\n'APN', 'G\ -',\n'APP', 'X',\n'APQ', 'X',\n'APR', 'X',\n'APS', \ -'X',\n'APT', 'X',\n'APU', 'X',\n'APX', 'X',\n'APY'\ -, 'X',\n'APZ', 'X',\n'AQS', 'X',\n'AR1', 'X',\n'AR\ -2', 'X',\n'ARA', 'X',\n'ARB', 'X',\n'ARC', 'X',\n'\ -ARD', 'X',\n'ARG', 'R',\n'ARH', 'X',\n'ARI', 'X',\\ -n'ARM', 'R',\n'ARN', 'X',\n'ARO', 'R',\n'ARP', 'X'\ -,\n'ARQ', 'X',\n'ARS', 'X',\n'AS1', 'R',\n'AS2', '\ -X',\n'ASA', 'D',\n'ASB', 'D',\n'ASC', 'X',\n'ASD',\ - 'X',\n'ASE', 'X',\n'ASF', 'X',\n'ASI', 'X',\n'ASK\ -', 'D',\n'ASL', 'X',\n'ASM', 'N',\n'ASO', 'X',\n'A\ -SP', 'D',\n'ASQ', 'X',\n'ASU', 'X',\n'ATA', 'X',\n\ -'ATC', 'X',\n'ATD', 'X',\n'ATF', 'X',\n'ATG', 'X',\ -\n'ATH', 'X',\n'ATM', 'X',\n'ATO', 'X',\n'ATP', 'X\ -',\n'ATQ', 'X',\n'ATR', 'X',\n'ATT', 'X',\n'ATY', \ -'X',\n'ATZ', 'X',\n'AUC', 'X',\n'AUR', 'X',\n'AVG'\ -, 'X',\n'AXP', 'X',\n'AYA', 'A',\n'AZ2', 'X',\n'AZ\ -A', 'X',\n'AZC', 'X',\n'AZD', 'X',\n'AZE', 'X',\n'\ -AZI', 'X',\n'AZL', 'X',\n'AZM', 'X',\n'AZR', 'X',\\ -n'AZT', 'X',\n'B12', 'X',\n'B1F', 'F',\n'B2A', 'A'\ -,\n'B2F', 'F',\n'B2I', 'I',\n'B2V', 'V',\n'B3I', '\ -X',\n'B3P', 'X',\n'B7G', 'X',\n'B96', 'X',\n'B9A',\ - 'X',\n'BA1', 'X',\n'BAA', 'X',\n'BAB', 'X',\n'BAC\ -', 'X',\n'BAF', 'X',\n'BAH', 'X',\n'BAI', 'X',\n'B\ -AK', 'X',\n'BAL', 'A',\n'BAM', 'X',\n'BAO', 'X',\n\ -'BAP', 'X',\n'BAR', 'X',\n'BAS', 'X',\n'BAT', 'F',\ -\n'BAY', 'X',\n'BAZ', 'X',\n'BB1', 'X',\n'BB2', 'X\ -',\n'BBA', 'X',\n'BBH', 'X',\n'BBS', 'X',\n'BBT', \ -'X',\n'BBZ', 'X',\n'BCA', 'X',\n'BCB', 'X',\n'BCC'\ -, 'X',\n'BCD', 'X',\n'BCL', 'X',\n'BCN', 'X',\n'BC\ -R', 'X',\n'BCS', 'C',\n'BCT', 'X',\n'BCY', 'X',\n'\ -BCZ', 'X',\n'BDA', 'X',\n'BDG', 'X',\n'BDK', 'X',\\ -n'BDM', 'X',\n'BDN', 'X',\n'BDS', 'X',\n'BE1', 'X'\ -,\n'BE2', 'X',\n'BEA', 'X',\n'BEF', 'X',\n'BEN', '\ -X',\n'BEO', 'X',\n'BEP', 'X',\n'BER', 'X',\n'BES',\ - 'X',\n'BET', 'X',\n'BEZ', 'X',\n'BF2', 'X',\n'BFA\ -', 'X',\n'BFD', 'X',\n'BFP', 'X',\n'BFS', 'X',\n'B\ -FU', 'X',\n'BG6', 'X',\n'BGF', 'X',\n'BGG', 'X',\n\ -'BGL', 'X',\n'BGN', 'X',\n'BGP', 'X',\n'BGX', 'X',\ -\n'BH4', 'X',\n'BHA', 'X',\n'BHC', 'X',\n'BHD', 'D\ -',\n'BHO', 'X',\n'BHS', 'X',\n'BIC', 'X',\n'BIN', \ -'X',\n'BIO', 'X',\n'BIP', 'X',\n'BIS', 'X',\n'BIZ'\ -, 'X',\n'BJH', 'X',\n'BJI', 'X',\n'BJP', 'X',\n'BL\ -A', 'X',\n'BLB', 'X',\n'BLE', 'L',\n'BLG', 'P',\n'\ -BLI', 'X',\n'BLM', 'X',\n'BLV', 'X',\n'BLY', 'K',\\ -n'BM1', 'X',\n'BM2', 'X',\n'BM5', 'X',\n'BM9', 'X'\ -,\n'BMA', 'X',\n'BMD', 'X',\n'BME', 'X',\n'BMP', '\ -X',\n'BMQ', 'X',\n'BMS', 'X',\n'BMT', 'T',\n'BMU',\ - 'X',\n'BMY', 'X',\n'BMZ', 'X',\n'BNA', 'X',\n'BNG\ -', 'X',\n'BNI', 'X',\n'BNN', 'F',\n'BNO', 'L',\n'B\ -NS', 'X',\n'BNZ', 'X',\n'BO3', 'X',\n'BO4', 'X',\n\ -'BOC', 'X',\n'BOG', 'X',\n'BOM', 'X',\n'BOT', 'X',\ -\n'BOX', 'X',\n'BOZ', 'X',\n'BPA', 'X',\n'BPB', 'X\ -',\n'BPD', 'X',\n'BPG', 'X',\n'BPH', 'X',\n'BPI', \ -'X',\n'BPJ', 'X',\n'BPM', 'X',\n'BPN', 'X',\n'BPO'\ -, 'X',\n'BPP', 'X',\n'BPT', 'X',\n'BPY', 'X',\n'BR\ -B', 'X',\n'BRC', 'X',\n'BRE', 'X',\n'BRI', 'X',\n'\ -BRL', 'X',\n'BRM', 'X',\n'BRN', 'X',\n'BRO', 'X',\\ -n'BRS', 'X',\n'BRU', 'X',\n'BRZ', 'X',\n'BSB', 'X'\ -,\n'BSI', 'X',\n'BSP', 'X',\n'BT1', 'X',\n'BT2', '\ -X',\n'BT3', 'X',\n'BTA', 'L',\n'BTB', 'X',\n'BTC',\ - 'C',\n'BTD', 'X',\n'BTN', 'X',\n'BTP', 'X',\n'BTR\ -', 'W',\n'BU1', 'X',\n'BUA', 'X',\n'BUB', 'X',\n'B\ -UC', 'X',\n'BUG', 'X',\n'BUL', 'X',\n'BUM', 'X',\n\ -'BUQ', 'X',\n'BUT', 'X',\n'BVD', 'X',\n'BX3', 'X',\ -\n'BYS', 'X',\n'BZ1', 'X',\n'BZA', 'X',\n'BZB', 'X\ -',\n'BZC', 'X',\n'BZD', 'X',\n'BZF', 'X',\n'BZI', \ -'X',\n'BZM', 'X',\n'BZO', 'X',\n'BZP', 'X',\n'BZQ'\ -, 'X',\n'BZS', 'X',\n'BZT', 'X',\n'C02', 'X',\n'C1\ -1', 'X',\n'C1O', 'X',\n'C20', 'X',\n'C24', 'X',\n'\ -C2F', 'X',\n'C2O', 'X',\n'C2P', 'X',\n'C3M', 'X',\\ -n'C3P', 'X',\n'C3X', 'X',\n'C48', 'X',\n'C4M', 'X'\ -,\n'C4X', 'X',\n'C5C', 'X',\n'C5M', 'X',\n'C5P', '\ -X',\n'C5X', 'X',\n'C60', 'X',\n'C6C', 'X',\n'C6M',\ - 'X',\n'C78', 'X',\n'C8E', 'X',\n'CA3', 'X',\n'CA5\ -', 'X',\n'CAA', 'X',\n'CAB', 'X',\n'CAC', 'X',\n'C\ -AD', 'X',\n'CAF', 'C',\n'CAG', 'X',\n'CAH', 'X',\n\ -'CAL', 'X',\n'CAM', 'X',\n'CAN', 'X',\n'CAO', 'X',\ -\n'CAP', 'X',\n'CAQ', 'X',\n'CAR', 'X',\n'CAS', 'C\ -',\n'CAT', 'X',\n'CAV', 'X',\n'CAY', 'C',\n'CAZ', \ -'X',\n'CB3', 'X',\n'CB4', 'X',\n'CBA', 'X',\n'CBD'\ -, 'X',\n'CBG', 'X',\n'CBI', 'X',\n'CBL', 'X',\n'CB\ -M', 'X',\n'CBN', 'X',\n'CBO', 'X',\n'CBP', 'X',\n'\ -CBS', 'X',\n'CBX', 'X',\n'CBZ', 'X',\n'CC0', 'X',\\ -n'CC1', 'X',\n'CCC', 'X',\n'CCH', 'X',\n'CCI', 'X'\ -,\n'CCM', 'X',\n'CCN', 'X',\n'CCO', 'X',\n'CCP', '\ -X',\n'CCR', 'X',\n'CCS', 'C',\n'CCV', 'X',\n'CCY',\ - 'X',\n'CD1', 'X',\n'CDC', 'X',\n'CDE', 'X',\n'CDF\ -', 'X',\n'CDI', 'X',\n'CDL', 'X',\n'CDM', 'X',\n'C\ -DP', 'X',\n'CDR', 'X',\n'CDU', 'X',\n'CE1', 'X',\n\ -'CEA', 'C',\n'CEB', 'X',\n'CEC', 'X',\n'CED', 'X',\ -\n'CEF', 'X',\n'CEH', 'X',\n'CEM', 'X',\n'CEO', 'X\ -',\n'CEP', 'X',\n'CEQ', 'X',\n'CER', 'X',\n'CES', \ -'G',\n'CET', 'X',\n'CFC', 'X',\n'CFF', 'X',\n'CFM'\ -, 'X',\n'CFO', 'X',\n'CFP', 'X',\n'CFS', 'X',\n'CF\ -X', 'X',\n'CGN', 'X',\n'CGP', 'X',\n'CGS', 'X',\n'\ -CGU', 'E',\n'CH2', 'X',\n'CH3', 'X',\n'CHA', 'X',\\ -n'CHB', 'X',\n'CHD', 'X',\n'CHF', 'X',\n'CHG', 'G'\ -,\n'CHI', 'X',\n'CHN', 'X',\n'CHO', 'X',\n'CHP', '\ -G',\n'CHR', 'X',\n'CHS', 'F',\n'CHT', 'X',\n'CHX',\ - 'X',\n'CIC', 'X',\n'CIN', 'X',\n'CIP', 'X',\n'CIR\ -', 'X',\n'CIT', 'X',\n'CIU', 'X',\n'CKI', 'X',\n'C\ -L1', 'X',\n'CL2', 'X',\n'CLA', 'X',\n'CLB', 'A',\n\ -'CLC', 'S',\n'CLD', 'A',\n'CLE', 'L',\n'CLF', 'X',\ -\n'CLK', 'S',\n'CLL', 'X',\n'CLM', 'X',\n'CLN', 'X\ -',\n'CLO', 'X',\n'CLP', 'X',\n'CLQ', 'X',\n'CLR', \ -'X',\n'CLS', 'X',\n'CLT', 'X',\n'CLX', 'X',\n'CLY'\ -, 'X',\n'CMA', 'R',\n'CMC', 'X',\n'CMD', 'X',\n'CM\ -E', 'C',\n'CMG', 'X',\n'CMK', 'X',\n'CMN', 'X',\n'\ -CMO', 'X',\n'CMP', 'X',\n'CMR', 'X',\n'CMS', 'X',\\ -n'CMT', 'C',\n'CMX', 'X',\n'CNA', 'X',\n'CNC', 'X'\ -,\n'CND', 'X',\n'CNH', 'X',\n'CNM', 'X',\n'CNN', '\ -X',\n'CNO', 'X',\n'CNP', 'X',\n'CO2', 'X',\n'CO3',\ - 'X',\n'CO5', 'X',\n'CO8', 'X',\n'COA', 'X',\n'COB\ -', 'X',\n'COC', 'X',\n'COD', 'X',\n'COE', 'X',\n'C\ -OF', 'X',\n'COH', 'X',\n'COI', 'X',\n'COJ', 'X',\n\ -'COL', 'X',\n'COM', 'X',\n'CON', 'X',\n'COP', 'X',\ -\n'COR', 'X',\n'COS', 'X',\n'COT', 'X',\n'COY', 'X\ -',\n'CP1', 'G',\n'CP2', 'X',\n'CP4', 'X',\n'CPA', \ -'X',\n'CPB', 'X',\n'CPC', 'X',\n'CPD', 'X',\n'CPG'\ -, 'X',\n'CPH', 'X',\n'CPI', 'X',\n'CPM', 'X',\n'CP\ -N', 'G',\n'CPO', 'X',\n'CPP', 'X',\n'CPQ', 'X',\n'\ -CPR', 'X',\n'CPS', 'X',\n'CPT', 'X',\n'CPU', 'X',\\ -n'CPV', 'X',\n'CPY', 'X',\n'CR1', 'X',\n'CR6', 'X'\ -,\n'CRA', 'X',\n'CRB', 'X',\n'CRC', 'X',\n'CRG', '\ -X',\n'CRH', 'X',\n'CRO', 'T',\n'CRP', 'X',\n'CRQ',\ - 'X',\n'CRS', 'X',\n'CRT', 'X',\n'CRY', 'X',\n'CSA\ -', 'C',\n'CSB', 'X',\n'CSD', 'C',\n'CSE', 'C',\n'C\ -SH', 'X',\n'CSI', 'X',\n'CSN', 'X',\n'CSO', 'C',\n\ -'CSP', 'C',\n'CSR', 'C',\n'CSS', 'C',\n'CST', 'X',\ -\n'CSW', 'C',\n'CSX', 'C',\n'CSY', 'X',\n'CSZ', 'C\ -',\n'CT3', 'X',\n'CTA', 'X',\n'CTB', 'X',\n'CTC', \ -'X',\n'CTD', 'X',\n'CTH', 'T',\n'CTO', 'X',\n'CTP'\ -, 'X',\n'CTR', 'X',\n'CTS', 'X',\n'CTT', 'X',\n'CT\ -Y', 'X',\n'CTZ', 'X',\n'CU1', 'X',\n'CUA', 'X',\n'\ -CUC', 'X',\n'CUL', 'X',\n'CUO', 'X',\n'CUZ', 'X',\\ -n'CVI', 'X',\n'CXF', 'X',\n'CXL', 'X',\n'CXM', 'M'\ -,\n'CXN', 'X',\n'CXP', 'X',\n'CXS', 'X',\n'CY1', '\ -C',\n'CY3', 'X',\n'CYB', 'X',\n'CYC', 'X',\n'CYF',\ - 'C',\n'CYG', 'C',\n'CYH', 'X',\n'CYL', 'X',\n'CYM\ -', 'C',\n'CYN', 'X',\n'CYO', 'X',\n'CYP', 'X',\n'C\ -YQ', 'C',\n'CYS', 'C',\n'CYU', 'X',\n'CYY', 'X',\n\ -'CYZ', 'X',\n'CZH', 'X',\n'CZZ', 'C',\n'D12', 'X',\ -\n'D13', 'X',\n'D16', 'X',\n'D18', 'X',\n'D19', 'X\ -',\n'D1P', 'X',\n'D24', 'X',\n'D34', 'X',\n'D35', \ -'X',\n'D4D', 'X',\n'D4T', 'X',\n'D6G', 'X',\n'DA2'\ -, 'R',\n'DA3', 'X',\n'DA6', 'X',\n'DA7', 'X',\n'DA\ -A', 'X',\n'DAB', 'X',\n'DAC', 'X',\n'DAD', 'X',\n'\ -DAE', 'X',\n'DAF', 'X',\n'DAG', 'X',\n'DAH', 'A',\\ -n'DAJ', 'X',\n'DAK', 'X',\n'DAL', 'A',\n'DAM', 'A'\ -,\n'DAN', 'X',\n'DAO', 'X',\n'DAP', 'X',\n'DAQ', '\ -X',\n'DAR', 'R',\n'DAS', 'D',\n'DAT', 'X',\n'DAU',\ - 'X',\n'DAV', 'X',\n'DBA', 'X',\n'DBD', 'X',\n'DBF\ -', 'X',\n'DBG', 'X',\n'DBI', 'X',\n'DBV', 'X',\n'D\ -BY', 'Y',\n'DCA', 'X',\n'DCB', 'X',\n'DCE', 'X',\n\ -'DCF', 'X',\n'DCG', 'X',\n'DCH', 'X',\n'DCI', 'I',\ -\n'DCL', 'X',\n'DCM', 'X',\n'DCP', 'X',\n'DCS', 'X\ -',\n'DCT', 'X',\n'DCY', 'C',\n'DCZ', 'X',\n'DDA', \ -'X',\n'DDB', 'X',\n'DDC', 'X',\n'DDF', 'X',\n'DDG'\ -, 'X',\n'DDH', 'X',\n'DDL', 'X',\n'DDM', 'X',\n'DD\ -O', 'L',\n'DDP', 'X',\n'DDQ', 'X',\n'DDT', 'Y',\n'\ -DDU', 'X',\n'DEA', 'X',\n'DEB', 'X',\n'DEC', 'X',\\ -n'DEF', 'X',\n'DEL', 'X',\n'DEM', 'X',\n'DEN', 'X'\ -,\n'DEP', 'X',\n'DEQ', 'X',\n'DES', 'X',\n'DET', '\ -X',\n'DFC', 'X',\n'DFG', 'X',\n'DFI', 'X',\n'DFL',\ - 'X',\n'DFO', 'X',\n'DFP', 'X',\n'DFR', 'X',\n'DFT\ -', 'X',\n'DFV', 'X',\n'DFX', 'X',\n'DG2', 'X',\n'D\ -G3', 'X',\n'DG6', 'X',\n'DGA', 'X',\n'DGD', 'X',\n\ -'DGG', 'X',\n'DGL', 'E',\n'DGN', 'Q',\n'DGP', 'X',\ -\n'DGT', 'X',\n'DGX', 'X',\n'DH2', 'X',\n'DHA', 'A\ -',\n'DHB', 'X',\n'DHC', 'X',\n'DHD', 'X',\n'DHE', \ -'X',\n'DHF', 'X',\n'DHG', 'X',\n'DHI', 'H',\n'DHL'\ -, 'X',\n'DHM', 'X',\n'DHN', 'V',\n'DHP', 'X',\n'DH\ -Q', 'X',\n'DHR', 'X',\n'DHS', 'X',\n'DHT', 'X',\n'\ -DHU', 'X',\n'DHY', 'X',\n'DHZ', 'X',\n'DI2', 'X',\\ -n'DI3', 'G',\n'DI4', 'X',\n'DI5', 'X',\n'DIA', 'X'\ -,\n'DIC', 'X',\n'DIF', 'X',\n'DIG', 'X',\n'DII', '\ -X',\n'DIL', 'I',\n'DIM', 'X',\n'DIO', 'X',\n'DIP',\ - 'X',\n'DIQ', 'X',\n'DIS', 'X',\n'DIT', 'X',\n'DIV\ -', 'V',\n'DIX', 'X',\n'DIY', 'X',\n'DKA', 'X',\n'D\ -LA', 'X',\n'DLE', 'L',\n'DLF', 'X',\n'DLS', 'K',\n\ -'DLY', 'K',\n'DM1', 'X',\n'DM2', 'X',\n'DM3', 'X',\ -\n'DM4', 'X',\n'DM5', 'X',\n'DM6', 'X',\n'DM7', 'X\ -',\n'DM8', 'X',\n'DM9', 'X',\n'DMA', 'X',\n'DMB', \ -'X',\n'DMC', 'X',\n'DMD', 'X',\n'DME', 'X',\n'DMF'\ -, 'X',\n'DMG', 'G',\n'DMH', 'N',\n'DMI', 'X',\n'DM\ -J', 'X',\n'DML', 'X',\n'DMM', 'X',\n'DMN', 'X',\n'\ -DMO', 'X',\n'DMP', 'X',\n'DMQ', 'X',\n'DMR', 'X',\\ -n'DMS', 'X',\n'DMT', 'X',\n'DMV', 'X',\n'DMY', 'X'\ -,\n'DNC', 'X',\n'DND', 'X',\n'DNH', 'X',\n'DNJ', '\ -X',\n'DNN', 'X',\n'DNP', 'X',\n'DNQ', 'X',\n'DNR',\ - 'X',\n'DO2', 'X',\n'DO3', 'X',\n'DOA', 'X',\n'DOB\ -', 'X',\n'DOC', 'X',\n'DOH', 'D',\n'DOM', 'X',\n'D\ -OS', 'X',\n'DOX', 'X',\n'DP5', 'X',\n'DP7', 'X',\n\ -'DPA', 'X',\n'DPC', 'X',\n'DPD', 'X',\n'DPE', 'X',\ -\n'DPG', 'X',\n'DPH', 'F',\n'DPM', 'X',\n'DPN', 'F\ -',\n'DPO', 'X',\n'DPP', 'X',\n'DPR', 'P',\n'DPS', \ -'X',\n'DPT', 'X',\n'DPX', 'X',\n'DPY', 'X',\n'DPZ'\ -, 'X',\n'DQH', 'X',\n'DQN', 'X',\n'DR1', 'X',\n'DR\ -B', 'X',\n'DRC', 'X',\n'DRI', 'X',\n'DRP', 'X',\n'\ -DRT', 'X',\n'DRU', 'X',\n'DSA', 'X',\n'DSB', 'X',\\ -n'DSC', 'X',\n'DSD', 'X',\n'DSE', 'S',\n'DSI', 'X'\ -,\n'DSN', 'S',\n'DSP', 'D',\n'DSR', 'X',\n'DSS', '\ -X',\n'DSX', 'X',\n'DSY', 'X',\n'DTB', 'X',\n'DTD',\ - 'X',\n'DTH', 'T',\n'DTN', 'X',\n'DTO', 'X',\n'DTP\ -', 'X',\n'DTQ', 'X',\n'DTR', 'W',\n'DTT', 'X',\n'D\ -TY', 'Y',\n'DUD', 'X',\n'DUO', 'X',\n'DUR', 'X',\n\ -'DUT', 'X',\n'DVA', 'V',\n'DVR', 'X',\n'DX9', 'X',\ -\n'DXA', 'X',\n'DXB', 'X',\n'DXC', 'X',\n'DXG', 'X\ -',\n'DXX', 'X',\n'DZF', 'X',\n'E09', 'X',\n'E20', \ -'X',\n'E2P', 'X',\n'E3G', 'X',\n'E4N', 'X',\n'E4P'\ -, 'X',\n'E64', 'X',\n'E6C', 'X',\n'E96', 'X',\n'E9\ -7', 'X',\n'EA2', 'X',\n'EAA', 'X',\n'EAP', 'X',\n'\ -EBP', 'X',\n'EBW', 'X',\n'ECO', 'X',\n'EDA', 'X',\\ -n'EDC', 'X',\n'EDE', 'X',\n'EDO', 'X',\n'EDR', 'X'\ -,\n'EEB', 'X',\n'EEE', 'X',\n'EFC', 'X',\n'EFZ', '\ -X',\n'EG1', 'X',\n'EG2', 'X',\n'EG3', 'X',\n'EGC',\ - 'X',\n'EGL', 'X',\n'EHP', 'A',\n'EIC', 'X',\n'EJT\ -', 'X',\n'ELA', 'X',\n'EMB', 'X',\n'EMC', 'X',\n'E\ -MD', 'X',\n'EMM', 'X',\n'EMO', 'X',\n'EMP', 'X',\n\ -'EMR', 'X',\n'ENA', 'X',\n'ENC', 'X',\n'ENH', 'X',\ -\n'ENO', 'X',\n'ENP', 'X',\n'EOA', 'X',\n'EOH', 'X\ -',\n'EOT', 'X',\n'EOX', 'X',\n'EPA', 'X',\n'EPE', \ -'X',\n'EPH', 'X',\n'EPI', 'X',\n'EPN', 'X',\n'EPO'\ -, 'X',\n'EPT', 'X',\n'EPU', 'X',\n'EPX', 'X',\n'EP\ -Y', 'X',\n'EQI', 'X',\n'EQP', 'X',\n'EQU', 'X',\n'\ -ERG', 'X',\n'ERI', 'X',\n'ERY', 'X',\n'ESC', 'X',\\ -n'ESD', 'X',\n'ESI', 'X',\n'ESO', 'X',\n'ESP', 'X'\ -,\n'EST', 'X',\n'ESX', 'X',\n'ETA', 'X',\n'ETC', '\ -X',\n'ETD', 'X',\n'ETF', 'X',\n'ETH', 'X',\n'ETI',\ - 'X',\n'ETN', 'X',\n'ETO', 'X',\n'ETP', 'X',\n'ETR\ -', 'X',\n'ETS', 'X',\n'ETY', 'X',\n'EU3', 'X',\n'E\ -UG', 'X',\n'EYS', 'C',\n'F09', 'X',\n'F2B', 'X',\n\ -'F3S', 'X',\n'F42', 'X',\n'F43', 'X',\n'F4S', 'X',\ -\n'F6B', 'X',\n'F6P', 'X',\n'F89', 'X',\n'FA1', 'X\ -',\n'FA5', 'F',\n'FAA', 'X',\n'FAB', 'X',\n'FAC', \ -'X',\n'FAD', 'X',\n'FAF', 'X',\n'FAG', 'X',\n'FAM'\ -, 'X',\n'FAR', 'X',\n'FAS', 'X',\n'FAT', 'X',\n'FB\ -A', 'X',\n'FBE', 'X',\n'FBI', 'X',\n'FBP', 'X',\n'\ -FBQ', 'X',\n'FBS', 'X',\n'FBT', 'X',\n'FBU', 'X',\\ -n'FCA', 'X',\n'FCB', 'X',\n'FCI', 'X',\n'FCN', 'X'\ -,\n'FCO', 'X',\n'FCR', 'X',\n'FCT', 'X',\n'FCX', '\ -X',\n'FCY', 'C',\n'FD1', 'F',\n'FD2', 'F',\n'FD3',\ - 'F',\n'FD4', 'F',\n'FDA', 'X',\n'FDC', 'X',\n'FDI\ -', 'X',\n'FDP', 'X',\n'FDS', 'X',\n'FE2', 'X',\n'F\ -EA', 'X',\n'FEL', 'X',\n'FEM', 'X',\n'FEN', 'X',\n\ -'FEO', 'X',\n'FEP', 'X',\n'FER', 'X',\n'FES', 'X',\ -\n'FFB', 'X',\n'FFC', 'X',\n'FFF', 'X',\n'FFO', 'X\ -',\n'FGL', 'G',\n'FHB', 'X',\n'FHC', 'X',\n'FHP', \ -'X',\n'FHU', 'X',\n'FID', 'X',\n'FII', 'X',\n'FIP'\ -, 'X',\n'FK5', 'X',\n'FKA', 'X',\n'FKI', 'X',\n'FK\ -P', 'X',\n'FL2', 'X',\n'FL9', 'X',\n'FLA', 'A',\n'\ -FLC', 'X',\n'FLD', 'X',\n'FLE', 'L',\n'FLF', 'X',\\ -n'FLO', 'X',\n'FLP', 'X',\n'FLT', 'Y',\n'FLU', 'X'\ -,\n'FLX', 'X',\n'FM1', 'X',\n'FM2', 'X',\n'FMA', '\ -X',\n'FMB', 'X',\n'FMC', 'X',\n'FME', 'M',\n'FMN',\ - 'X',\n'FMP', 'X',\n'FMR', 'X',\n'FMS', 'X',\n'FMT\ -', 'X',\n'FNE', 'X',\n'FNP', 'X',\n'FNS', 'X',\n'F\ -OC', 'X',\n'FOE', 'X',\n'FOG', 'F',\n'FOH', 'X',\n\ -'FOK', 'X',\n'FOL', 'X',\n'FON', 'X',\n'FOP', 'X',\ -\n'FOR', 'X',\n'FOS', 'X',\n'FPA', 'X',\n'FPC', 'X\ -',\n'FPI', 'X',\n'FPO', 'X',\n'FPP', 'X',\n'FPT', \ -'X',\n'FQP', 'X',\n'FRA', 'X',\n'FRD', 'F',\n'FRU'\ -, 'X',\n'FS3', 'X',\n'FS4', 'X',\n'FSB', 'X',\n'FS\ -O', 'X',\n'FSX', 'X',\n'FTC', 'X',\n'FTP', 'X',\n'\ -FTR', 'W',\n'FTT', 'X',\n'FTY', 'Y',\n'FUA', 'X',\\ -n'FUC', 'X',\n'FUM', 'X',\n'FUP', 'X',\n'FVF', 'X'\ -,\n'FXP', 'X',\n'FXV', 'X',\n'FYA', 'F',\n'G16', '\ -X',\n'G1P', 'X',\n'G20', 'X',\n'G21', 'X',\n'G23',\ - 'X',\n'G26', 'X',\n'G28', 'X',\n'G2F', 'X',\n'G37\ -', 'X',\n'G39', 'X',\n'G3H', 'X',\n'G3P', 'X',\n'G\ -4D', 'X',\n'G6D', 'X',\n'G6P', 'X',\n'G6Q', 'X',\n\ -'G7M', 'X',\n'GA2', 'X',\n'GAA', 'X',\n'GAB', 'X',\ -\n'GAC', 'X',\n'GAI', 'X',\n'GAL', 'X',\n'GAM', 'X\ -',\n'GAN', 'X',\n'GAO', 'X',\n'GAP', 'X',\n'GAR', \ -'G',\n'GAS', 'X',\n'GAT', 'X',\n'GBC', 'X',\n'GBI'\ -, 'X',\n'GBP', 'X',\n'GBS', 'X',\n'GBX', 'X',\n'GC\ -4', 'X',\n'GCA', 'X',\n'GCD', 'X',\n'GCG', 'G',\n'\ -GCH', 'G',\n'GCK', 'X',\n'GCL', 'X',\n'GCM', 'X',\\ -n'GCN', 'X',\n'GCO', 'X',\n'GCP', 'X',\n'GCR', 'X'\ -,\n'GCS', 'X',\n'GCU', 'X',\n'GD3', 'X',\n'GDB', '\ -X',\n'GDM', 'X',\n'GDN', 'X',\n'GDP', 'X',\n'GDS',\ - 'X',\n'GDU', 'X',\n'GE1', 'X',\n'GE2', 'X',\n'GE3\ -', 'X',\n'GEA', 'X',\n'GEL', 'X',\n'GEM', 'X',\n'G\ -EN', 'X',\n'GEP', 'X',\n'GER', 'X',\n'GFP', 'X',\n\ -'GGB', 'X',\n'GGL', 'E',\n'GGP', 'X',\n'GHP', 'G',\ -\n'GIP', 'X',\n'GIS', 'X',\n'GKR', 'X',\n'GL2', 'X\ -',\n'GL3', 'G',\n'GL4', 'X',\n'GL5', 'X',\n'GL7', \ -'X',\n'GL9', 'X',\n'GLA', 'X',\n'GLB', 'X',\n'GLC'\ -, 'X',\n'GLD', 'X',\n'GLE', 'X',\n'GLF', 'X',\n'GL\ -G', 'X',\n'GLH', 'Q',\n'GLI', 'X',\n'GLL', 'X',\n'\ -GLM', 'G',\n'GLN', 'Q',\n'GLO', 'X',\n'GLP', 'X',\\ -n'GLR', 'X',\n'GLS', 'X',\n'GLT', 'X',\n'GLU', 'E'\ -,\n'GLV', 'X',\n'GLW', 'X',\n'GLY', 'G',\n'GLZ', '\ -X',\n'GM1', 'X',\n'GMA', 'X',\n'GMC', 'X',\n'GMH',\ - 'X',\n'GMP', 'X',\n'GMY', 'X',\n'GN7', 'X',\n'GNA\ -', 'X',\n'GNB', 'X',\n'GNH', 'X',\n'GNP', 'X',\n'G\ -NT', 'X',\n'GOA', 'X',\n'GOL', 'X',\n'GOX', 'X',\n\ -'GP1', 'X',\n'GP3', 'X',\n'GP4', 'X',\n'GP6', 'X',\ -\n'GP8', 'X',\n'GPB', 'E',\n'GPC', 'X',\n'GPE', 'X\ -',\n'GPG', 'X',\n'GPI', 'X',\n'GPJ', 'X',\n'GPL', \ -'K',\n'GPM', 'X',\n'GPN', 'G',\n'GPP', 'X',\n'GPR'\ -, 'X',\n'GPS', 'X',\n'GPX', 'X',\n'GR1', 'X',\n'GR\ -3', 'X',\n'GR4', 'X',\n'GSA', 'X',\n'GSB', 'X',\n'\ -GSC', 'G',\n'GSE', 'S',\n'GSH', 'X',\n'GSP', 'X',\\ -n'GSR', 'X',\n'GSS', 'X',\n'GT9', 'C',\n'GTA', 'X'\ -,\n'GTB', 'X',\n'GTD', 'X',\n'GTE', 'X',\n'GTH', '\ -T',\n'GTN', 'X',\n'GTO', 'X',\n'GTP', 'X',\n'GTR',\ - 'X',\n'GTS', 'X',\n'GTT', 'X',\n'GTX', 'X',\n'GTZ\ -', 'X',\n'GU7', 'X',\n'GUA', 'X',\n'GUD', 'X',\n'G\ -UM', 'X',\n'GUN', 'X',\n'GUP', 'X',\n'GUR', 'X',\n\ -'GW3', 'X',\n'GZZ', 'X',\n'H2B', 'X',\n'H2P', 'H',\ -\n'H2S', 'X',\n'H2U', 'X',\n'H4B', 'X',\n'H5M', 'P\ -',\n'H5P', 'X',\n'HAA', 'X',\n'HAB', 'X',\n'HAC', \ -'A',\n'HAD', 'X',\n'HAE', 'X',\n'HAG', 'X',\n'HAI'\ -, 'X',\n'HAM', 'X',\n'HAP', 'X',\n'HAQ', 'X',\n'HA\ -R', 'R',\n'HAS', 'X',\n'HAV', 'V',\n'HAX', 'X',\n'\ -HAZ', 'X',\n'HBA', 'X',\n'HBC', 'X',\n'HBD', 'X',\\ -n'HBI', 'X',\n'HBO', 'X',\n'HBU', 'X',\n'HBY', 'X'\ -,\n'HC0', 'X',\n'HC1', 'X',\n'HC4', 'X',\n'HCA', '\ -X',\n'HCC', 'X',\n'HCI', 'X',\n'HCS', 'X',\n'HDA',\ - 'X',\n'HDD', 'X',\n'HDF', 'X',\n'HDN', 'X',\n'HDS\ -', 'X',\n'HDZ', 'X',\n'HE1', 'X',\n'HE6', 'X',\n'H\ -EA', 'X',\n'HEB', 'X',\n'HEC', 'X',\n'HED', 'X',\n\ -'HEE', 'X',\n'HEF', 'X',\n'HEG', 'X',\n'HEM', 'X',\ -\n'HEN', 'X',\n'HEO', 'X',\n'HEP', 'X',\n'HEU', 'X\ -',\n'HEV', 'X',\n'HEX', 'X',\n'HEZ', 'X',\n'HF1', \ -'X',\n'HFA', 'X',\n'HFP', 'X',\n'HGA', 'Q',\n'HGB'\ -, 'X',\n'HGC', 'X',\n'HGI', 'X',\n'HGU', 'X',\n'HH\ -O', 'X',\n'HHP', 'X',\n'HIB', 'X',\n'HIC', 'H',\n'\ -HII', 'X',\n'HIN', 'X',\n'HIO', 'X',\n'HIP', 'H',\\ -n'HIS', 'H',\n'HLE', 'X',\n'HLT', 'X',\n'HMA', 'A'\ -,\n'HMB', 'X',\n'HMC', 'X',\n'HMD', 'X',\n'HMF', '\ -A',\n'HMG', 'X',\n'HMH', 'X',\n'HMI', 'L',\n'HMM',\ - 'X',\n'HMN', 'X',\n'HMO', 'X',\n'HMP', 'X',\n'HMR\ -', 'R',\n'HNI', 'X',\n'HNP', 'X',\n'HOA', 'X',\n'H\ -OE', 'X',\n'HOH', 'X',\n'HOM', 'X',\n'HOP', 'X',\n\ -'HOQ', 'X',\n'HP1', 'A',\n'HP2', 'A',\n'HP3', 'X',\ -\n'HPA', 'X',\n'HPB', 'X',\n'HPC', 'X',\n'HPD', 'X\ -',\n'HPE', 'A',\n'HPG', 'X',\n'HPH', 'F',\n'HPP', \ -'X',\n'HPQ', 'F',\n'HPR', 'X',\n'HPT', 'X',\n'HPY'\ -, 'X',\n'HQO', 'X',\n'HQQ', 'X',\n'HQU', 'X',\n'HR\ -G', 'R',\n'HRI', 'X',\n'HSA', 'X',\n'HSE', 'S',\n'\ -HSF', 'X',\n'HSM', 'X',\n'HSO', 'H',\n'HSP', 'X',\\ -n'HT1', 'X',\n'HT2', 'X',\n'HTA', 'X',\n'HTL', 'X'\ -,\n'HTO', 'X',\n'HTP', 'X',\n'HTR', 'W',\n'HUP', '\ -X',\n'HUX', 'X',\n'HV5', 'A',\n'HV7', 'X',\n'HV8',\ - 'X',\n'HXA', 'X',\n'HXC', 'X',\n'HXP', 'X',\n'HY1\ -', 'X',\n'HYA', 'X',\n'HYB', 'X',\n'HYD', 'X',\n'H\ -YG', 'X',\n'HYP', 'P',\n'I06', 'X',\n'I10', 'X',\n\ -'I11', 'X',\n'I17', 'X',\n'I2P', 'X',\n'I3N', 'X',\ -\n'I3P', 'X',\n'I40', 'X',\n'I48', 'X',\n'I4B', 'X\ -',\n'I52', 'X',\n'I5P', 'X',\n'I84', 'G',\n'IAG', \ -'G',\n'IAS', 'X',\n'IB2', 'X',\n'IBB', 'X',\n'IBP'\ -, 'X',\n'IBR', 'X',\n'IBS', 'X',\n'IBZ', 'X',\n'IC\ -1', 'X',\n'ICA', 'X',\n'ICI', 'X',\n'ICL', 'X',\n'\ -ICP', 'X',\n'ICT', 'X',\n'ICU', 'X',\n'ID2', 'X',\\ -n'IDC', 'X',\n'IDG', 'X',\n'IDH', 'X',\n'IDM', 'X'\ -,\n'IDO', 'X',\n'IDP', 'X',\n'IDR', 'X',\n'IDS', '\ -X',\n'IDT', 'X',\n'IDU', 'X',\n'IFG', 'X',\n'IFP',\ - 'X',\n'IGL', 'X',\n'IGN', 'X',\n'IGP', 'X',\n'IGU\ -', 'X',\n'IH1', 'X',\n'IH2', 'X',\n'IH3', 'X',\n'I\ -HB', 'X',\n'IHN', 'X',\n'IHP', 'X',\n'IIC', 'X',\n\ -'IIL', 'I',\n'IIP', 'X',\n'IK2', 'X',\n'IKT', 'X',\ -\n'ILA', 'I',\n'ILE', 'I',\n'ILG', 'X',\n'ILO', 'X\ -',\n'ILX', 'I',\n'IM1', 'X',\n'IM2', 'X',\n'IMC', \ -'X',\n'IMD', 'X',\n'IME', 'X',\n'IMF', 'X',\n'IMG'\ -, 'X',\n'IMH', 'X',\n'IMI', 'X',\n'IML', 'I',\n'IM\ -M', 'X',\n'IMN', 'X',\n'IMO', 'X',\n'IMP', 'X',\n'\ -IMR', 'X',\n'IMU', 'X',\n'IN0', 'D',\n'IN1', 'R',\\ -n'IN2', 'K',\n'IN3', 'L',\n'IN4', 'X',\n'IN5', 'A'\ -,\n'IN6', 'L',\n'IN7', 'X',\n'IN8', 'X',\n'IN9', '\ -X',\n'INA', 'L',\n'INB', 'X',\n'INC', 'X',\n'IND',\ - 'X',\n'INE', 'X',\n'INF', 'F',\n'ING', 'F',\n'INH\ -', 'R',\n'INI', 'X',\n'INJ', 'X',\n'INK', 'X',\n'I\ -NL', 'X',\n'INM', 'X',\n'INN', 'A',\n'INO', 'X',\n\ -'INP', 'X',\n'INQ', 'X',\n'INR', 'X',\n'INS', 'X',\ -\n'INT', 'V',\n'INU', 'X',\n'INV', 'X',\n'INW', 'X\ -',\n'INX', 'X',\n'INY', 'X',\n'INZ', 'X',\n'IOA', \ -'X',\n'IOB', 'X',\n'IOC', 'X',\n'IOD', 'X',\n'IOE'\ -, 'X',\n'IOF', 'X',\n'IOH', 'X',\n'IOL', 'X',\n'IO\ -P', 'X',\n'IP1', 'X',\n'IP2', 'X',\n'IP3', 'X',\n'\ -IP4', 'X',\n'IPA', 'X',\n'IPB', 'X',\n'IPD', 'X',\\ -n'IPG', 'G',\n'IPH', 'X',\n'IPL', 'X',\n'IPM', 'X'\ -,\n'IPN', 'X',\n'IPO', 'F',\n'IPP', 'X',\n'IPS', '\ -X',\n'IPT', 'X',\n'IPU', 'X',\n'IPY', 'A',\n'IQB',\ - 'X',\n'IQP', 'X',\n'IQS', 'X',\n'IR3', 'X',\n'IRI\ -', 'X',\n'IRP', 'X',\n'ISA', 'X',\n'ISF', 'X',\n'I\ -SO', 'X',\n'ISP', 'X',\n'ISQ', 'X',\n'ISU', 'X',\n\ -'ITM', 'X',\n'ITP', 'X',\n'ITR', 'W',\n'ITS', 'X',\ -\n'ITU', 'X',\n'IU5', 'X',\n'IUM', 'X',\n'IUR', 'X\ -',\n'IVA', 'X',\n'IYG', 'G',\n'IYR', 'Y',\n'J77', \ -'X',\n'J78', 'X',\n'J80', 'X',\n'JE2', 'X',\n'JEN'\ -, 'X',\n'JST', 'X',\n'K21', 'X',\n'KAH', 'X',\n'KA\ -I', 'X',\n'KAM', 'X',\n'KAN', 'X',\n'KAP', 'X',\n'\ -KCP', 'X',\n'KCX', 'K',\n'KDO', 'X',\n'KEF', 'X',\\ -n'KET', 'X',\n'KGR', 'X',\n'KH1', 'X',\n'KIF', 'X'\ -,\n'KIV', 'V',\n'KNI', 'X',\n'KPH', 'K',\n'KTH', '\ -X',\n'KTN', 'X',\n'KTP', 'X',\n'KWT', 'X',\n'L04',\ - 'X',\n'L1P', 'X',\n'L24', 'E',\n'L2P', 'X',\n'L34\ -', 'E',\n'L37', 'E',\n'L3P', 'X',\n'L4P', 'X',\n'L\ -75', 'X',\n'LAC', 'X',\n'LAD', 'X',\n'LAK', 'X',\n\ -'LAM', 'X',\n'LAR', 'X',\n'LAT', 'X',\n'LAX', 'X',\ -\n'LCO', 'X',\n'LCP', 'X',\n'LCS', 'X',\n'LDA', 'X\ -',\n'LDO', 'L',\n'LDP', 'X',\n'LEA', 'X',\n'LEO', \ -'X',\n'LEU', 'L',\n'LG2', 'X',\n'LG6', 'X',\n'LGC'\ -, 'X',\n'LGP', 'X',\n'LHG', 'X',\n'LHY', 'F',\n'LI\ -1', 'X',\n'LIG', 'X',\n'LIL', 'X',\n'LIM', 'X',\n'\ -LIN', 'X',\n'LIO', 'X',\n'LIP', 'X',\n'LLA', 'X',\\ -n'LLP', 'K',\n'LLY', 'K',\n'LMG', 'X',\n'LML', 'X'\ -,\n'LMT', 'X',\n'LMU', 'X',\n'LMZ', 'X',\n'LNK', '\ -X',\n'LNL', 'X',\n'LNO', 'X',\n'LOF', 'X',\n'LOL',\ - 'L',\n'LOM', 'X',\n'LOR', 'X',\n'LOS', 'X',\n'LOV\ -', 'L',\n'LOX', 'X',\n'LP1', 'X',\n'LP2', 'R',\n'L\ -PA', 'X',\n'LPC', 'X',\n'LPF', 'X',\n'LPL', 'X',\n\ -'LPM', 'X',\n'LPP', 'X',\n'LRB', 'X',\n'LRU', 'X',\ -\n'LS1', 'X',\n'LS2', 'X',\n'LS3', 'X',\n'LS4', 'X\ -',\n'LS5', 'X',\n'LTA', 'X',\n'LTL', 'X',\n'LTR', \ -'W',\n'LUM', 'X',\n'LVS', 'L',\n'LXC', 'X',\n'LY2'\ -, 'X',\n'LY3', 'X',\n'LYA', 'X',\n'LYB', 'X',\n'LY\ -C', 'X',\n'LYD', 'X',\n'LYM', 'K',\n'LYN', 'X',\n'\ -LYS', 'K',\n'LYT', 'X',\n'LYW', 'X',\n'LYZ', 'K',\\ -n'M1A', 'X',\n'M1G', 'X',\n'M2G', 'X',\n'M3L', 'K'\ -,\n'M6P', 'X',\n'M6T', 'X',\n'M7G', 'X',\n'MA1', '\ -X',\n'MA2', 'X',\n'MA3', 'X',\n'MA4', 'X',\n'MA6',\ - 'X',\n'MAA', 'A',\n'MAB', 'X',\n'MAC', 'X',\n'MAE\ -', 'X',\n'MAG', 'X',\n'MAH', 'X',\n'MAI', 'R',\n'M\ -AK', 'X',\n'MAL', 'X',\n'MAM', 'X',\n'MAN', 'X',\n\ -'MAO', 'X',\n'MAP', 'X',\n'MAR', 'X',\n'MAS', 'X',\ -\n'MAT', 'X',\n'MAU', 'X',\n'MAZ', 'X',\n'MBA', 'X\ -',\n'MBD', 'X',\n'MBG', 'X',\n'MBH', 'X',\n'MBN', \ -'X',\n'MBO', 'X',\n'MBR', 'X',\n'MBS', 'X',\n'MBV'\ -, 'X',\n'MBZ', 'X',\n'MCA', 'X',\n'MCD', 'X',\n'MC\ -E', 'X',\n'MCG', 'G',\n'MCI', 'X',\n'MCN', 'X',\n'\ -MCP', 'X',\n'MCT', 'X',\n'MCY', 'X',\n'MD2', 'X',\\ -n'MDA', 'X',\n'MDC', 'X',\n'MDG', 'X',\n'MDH', 'X'\ -,\n'MDL', 'X',\n'MDM', 'X',\n'MDN', 'X',\n'MDP', '\ -X',\n'ME6', 'X',\n'MEB', 'X',\n'MEC', 'X',\n'MEL',\ - 'X',\n'MEN', 'N',\n'MEP', 'X',\n'MER', 'X',\n'MES\ -', 'X',\n'MET', 'M',\n'MEV', 'X',\n'MF2', 'X',\n'M\ -F3', 'M',\n'MFB', 'X',\n'MFD', 'X',\n'MFU', 'X',\n\ -'MG7', 'X',\n'MGA', 'X',\n'MGB', 'X',\n'MGD', 'X',\ -\n'MGG', 'R',\n'MGL', 'X',\n'MGN', 'Q',\n'MGO', 'X\ -',\n'MGP', 'X',\n'MGR', 'X',\n'MGS', 'X',\n'MGT', \ -'X',\n'MGU', 'X',\n'MGY', 'G',\n'MHB', 'X',\n'MHF'\ -, 'X',\n'MHL', 'L',\n'MHM', 'X',\n'MHO', 'M',\n'MH\ -S', 'H',\n'MHZ', 'X',\n'MIA', 'X',\n'MIC', 'X',\n'\ -MID', 'X',\n'MIL', 'X',\n'MIM', 'X',\n'MIN', 'G',\\ -n'MIP', 'X',\n'MIS', 'S',\n'MIT', 'X',\n'MJI', 'X'\ -,\n'MK1', 'X',\n'MKC', 'X',\n'MLA', 'X',\n'MLC', '\ -X',\n'MLE', 'L',\n'MLN', 'X',\n'MLT', 'X',\n'MLY',\ - 'K',\n'MLZ', 'K',\n'MM3', 'X',\n'MM4', 'X',\n'MMA\ -', 'X',\n'MMC', 'X',\n'MME', 'M',\n'MMO', 'R',\n'M\ -MP', 'X',\n'MMQ', 'X',\n'MMT', 'X',\n'MN1', 'X',\n\ -'MN2', 'X',\n'MN3', 'X',\n'MN5', 'X',\n'MN7', 'X',\ -\n'MN8', 'X',\n'MNA', 'X',\n'MNB', 'X',\n'MNC', 'X\ -',\n'MNG', 'X',\n'MNL', 'L',\n'MNO', 'X',\n'MNP', \ -'X',\n'MNQ', 'X',\n'MNS', 'X',\n'MNT', 'X',\n'MNV'\ -, 'V',\n'MO1', 'X',\n'MO2', 'X',\n'MO3', 'X',\n'MO\ -4', 'X',\n'MO5', 'X',\n'MO6', 'X',\n'MOA', 'X',\n'\ -MOB', 'X',\n'MOC', 'X',\n'MOE', 'X',\n'MOG', 'X',\\ -n'MOH', 'X',\n'MOL', 'X',\n'MOO', 'X',\n'MOP', 'X'\ -,\n'MOR', 'X',\n'MOS', 'X',\n'MOT', 'X',\n'MOX', '\ -X',\n'MP1', 'X',\n'MP3', 'X',\n'MPA', 'X',\n'MPB',\ - 'X',\n'MPC', 'X',\n'MPD', 'X',\n'MPG', 'X',\n'MPH\ -', 'M',\n'MPI', 'X',\n'MPJ', 'M',\n'MPL', 'X',\n'M\ -PN', 'X',\n'MPO', 'X',\n'MPP', 'X',\n'MPQ', 'G',\n\ -'MPR', 'X',\n'MPS', 'X',\n'MQ0', 'X',\n'MQ7', 'X',\ -\n'MQ8', 'X',\n'MQ9', 'X',\n'MQI', 'X',\n'MR2', 'X\ -',\n'MRC', 'X',\n'MRM', 'X',\n'MRP', 'X',\n'MS2', \ -'X',\n'MSA', 'X',\n'MSB', 'X',\n'MSD', 'X',\n'MSE'\ -, 'M',\n'MSF', 'X',\n'MSI', 'X',\n'MSO', 'M',\n'MS\ -Q', 'X',\n'MST', 'X',\n'MSU', 'X',\n'MTA', 'X',\n'\ -MTB', 'X',\n'MTC', 'X',\n'MTD', 'X',\n'MTE', 'X',\\ -n'MTF', 'X',\n'MTG', 'X',\n'MTO', 'X',\n'MTS', 'X'\ -,\n'MTT', 'X',\n'MTX', 'X',\n'MTY', 'Y',\n'MUG', '\ -X',\n'MUP', 'X',\n'MUR', 'X',\n'MVA', 'V',\n'MW1',\ - 'X',\n'MW2', 'X',\n'MXA', 'X',\n'MXY', 'X',\n'MYA\ -', 'X',\n'MYC', 'X',\n'MYG', 'X',\n'MYR', 'X',\n'M\ -YS', 'X',\n'MYT', 'X',\n'MZM', 'X',\n'N1T', 'X',\n\ -'N25', 'X',\n'N2B', 'X',\n'N3T', 'X',\n'N4B', 'X',\ -\n'NA2', 'X',\n'NA5', 'X',\n'NA6', 'X',\n'NAA', 'X\ -',\n'NAB', 'X',\n'NAC', 'X',\n'NAD', 'X',\n'NAE', \ -'X',\n'NAF', 'X',\n'NAG', 'X',\n'NAH', 'X',\n'NAI'\ -, 'X',\n'NAL', 'A',\n'NAM', 'A',\n'NAN', 'X',\n'NA\ -O', 'X',\n'NAP', 'X',\n'NAQ', 'X',\n'NAR', 'X',\n'\ -NAS', 'X',\n'NAU', 'X',\n'NAV', 'X',\n'NAW', 'X',\\ -n'NAX', 'X',\n'NAY', 'X',\n'NBA', 'X',\n'NBD', 'X'\ -,\n'NBE', 'X',\n'NBG', 'X',\n'NBN', 'X',\n'NBP', '\ -X',\n'NBS', 'X',\n'NBU', 'X',\n'NCA', 'X',\n'NCB',\ - 'A',\n'NCD', 'X',\n'NCH', 'X',\n'NCM', 'X',\n'NCN\ -', 'X',\n'NCO', 'X',\n'NCR', 'X',\n'NCS', 'X',\n'N\ -D4', 'X',\n'NDA', 'X',\n'NDC', 'X',\n'NDD', 'X',\n\ -'NDO', 'X',\n'NDP', 'X',\n'NDT', 'X',\n'NEA', 'X',\ -\n'NEB', 'X',\n'NED', 'X',\n'NEM', 'H',\n'NEN', 'X\ -',\n'NEO', 'X',\n'NEP', 'H',\n'NEQ', 'X',\n'NES', \ -'X',\n'NET', 'X',\n'NEV', 'X',\n'NFA', 'F',\n'NFE'\ -, 'X',\n'NFG', 'X',\n'NFP', 'X',\n'NFS', 'X',\n'NG\ -6', 'X',\n'NGA', 'X',\n'NGL', 'X',\n'NGM', 'X',\n'\ -NGO', 'X',\n'NGP', 'X',\n'NGT', 'X',\n'NGU', 'X',\\ -n'NH2', 'X',\n'NH3', 'X',\n'NH4', 'X',\n'NHD', 'X'\ -,\n'NHE', 'X',\n'NHM', 'X',\n'NHP', 'X',\n'NHR', '\ -X',\n'NHS', 'X',\n'NI1', 'X',\n'NI2', 'X',\n'NIC',\ - 'X',\n'NID', 'X',\n'NIK', 'X',\n'NIO', 'X',\n'NIP\ -', 'X',\n'NIT', 'X',\n'NIU', 'X',\n'NIY', 'Y',\n'N\ -LA', 'X',\n'NLE', 'L',\n'NLG', 'X',\n'NLN', 'L',\n\ -'NLP', 'L',\n'NM1', 'X',\n'NMA', 'A',\n'NMB', 'X',\ -\n'NMC', 'G',\n'NMD', 'X',\n'NME', 'X',\n'NMN', 'X\ -',\n'NMO', 'X',\n'NMQ', 'X',\n'NMX', 'X',\n'NMY', \ -'X',\n'NNH', 'R',\n'NNO', 'X',\n'NO2', 'X',\n'NO3'\ -, 'X',\n'NOA', 'X',\n'NOD', 'X',\n'NOJ', 'X',\n'NO\ -N', 'X',\n'NOP', 'X',\n'NOR', 'X',\n'NOS', 'X',\n'\ -NOV', 'X',\n'NOX', 'X',\n'NP3', 'X',\n'NPA', 'X',\\ -n'NPC', 'X',\n'NPD', 'X',\n'NPE', 'X',\n'NPF', 'X'\ -,\n'NPH', 'C',\n'NPI', 'X',\n'NPL', 'X',\n'NPN', '\ -X',\n'NPO', 'X',\n'NPP', 'X',\n'NPT', 'X',\n'NPY',\ - 'X',\n'NRG', 'R',\n'NRI', 'X',\n'NS1', 'X',\n'NS5\ -', 'X',\n'NSP', 'X',\n'NTA', 'X',\n'NTB', 'X',\n'N\ -TC', 'X',\n'NTH', 'X',\n'NTM', 'X',\n'NTP', 'X',\n\ -'NTS', 'X',\n'NTU', 'X',\n'NTZ', 'X',\n'NU1', 'X',\ -\n'NVA', 'V',\n'NVI', 'X',\n'NVP', 'X',\n'NW1', 'X\ -',\n'NYP', 'X',\n'O4M', 'X',\n'OAA', 'X',\n'OAI', \ -'X',\n'OAP', 'X',\n'OAR', 'X',\n'OAS', 'S',\n'OBA'\ -, 'X',\n'OBN', 'X',\n'OC1', 'X',\n'OC2', 'X',\n'OC\ -3', 'X',\n'OC4', 'X',\n'OC5', 'X',\n'OC6', 'X',\n'\ -OC7', 'X',\n'OCL', 'X',\n'OCM', 'X',\n'OCN', 'X',\\ -n'OCO', 'X',\n'OCP', 'X',\n'OCS', 'C',\n'OCT', 'X'\ -,\n'OCV', 'K',\n'OCY', 'C',\n'ODA', 'X',\n'ODS', '\ -X',\n'OES', 'X',\n'OET', 'X',\n'OF1', 'X',\n'OF2',\ - 'X',\n'OF3', 'X',\n'OFL', 'X',\n'OFO', 'X',\n'OHE\ -', 'X',\n'OHO', 'X',\n'OHT', 'X',\n'OIC', 'X',\n'O\ -IP', 'X',\n'OKA', 'X',\n'OLA', 'X',\n'OLE', 'X',\n\ -'OLI', 'X',\n'OLO', 'X',\n'OMB', 'X',\n'OMC', 'X',\ -\n'OMD', 'X',\n'OME', 'X',\n'OMG', 'X',\n'OMP', 'X\ -',\n'OMT', 'M',\n'OMU', 'X',\n'ONE', 'X',\n'ONL', \ -'L',\n'ONP', 'X',\n'OPA', 'X',\n'OPD', 'X',\n'OPE'\ -, 'X',\n'OPG', 'X',\n'OPH', 'X',\n'OPN', 'X',\n'OP\ -P', 'X',\n'OPR', 'R',\n'ORN', 'X',\n'ORO', 'X',\n'\ -ORP', 'X',\n'OSB', 'X',\n'OSS', 'X',\n'OTA', 'X',\\ -n'OTB', 'X',\n'OTE', 'X',\n'OTG', 'X',\n'OUT', 'X'\ -,\n'OVA', 'X',\n'OWQ', 'X',\n'OXA', 'X',\n'OXE', '\ -X',\n'OXI', 'X',\n'OXL', 'X',\n'OXM', 'X',\n'OXN',\ - 'X',\n'OXO', 'X',\n'OXP', 'X',\n'OXS', 'X',\n'OXY\ -', 'X',\n'P11', 'A',\n'P24', 'X',\n'P28', 'X',\n'P\ -2P', 'X',\n'P2U', 'X',\n'P3M', 'X',\n'P4C', 'X',\n\ -'P4P', 'X',\n'P5P', 'X',\n'P6G', 'X',\n'PA1', 'X',\ -\n'PA2', 'X',\n'PA3', 'X',\n'PA4', 'X',\n'PA5', 'X\ -',\n'PAA', 'X',\n'PAB', 'X',\n'PAC', 'X',\n'PAD', \ -'X',\n'PAE', 'X',\n'PAG', 'X',\n'PAH', 'X',\n'PAI'\ -, 'X',\n'PAL', 'D',\n'PAM', 'X',\n'PAN', 'X',\n'PA\ -O', 'X',\n'PAP', 'A',\n'PAQ', 'F',\n'PAR', 'X',\n'\ -PAS', 'X',\n'PAT', 'W',\n'PBA', 'X',\n'PBB', 'X',\\ -n'PBC', 'X',\n'PBF', 'F',\n'PBG', 'X',\n'PBI', 'X'\ -,\n'PBM', 'X',\n'PBN', 'X',\n'PBP', 'X',\n'PBR', '\ -X',\n'PBZ', 'X',\n'PC2', 'X',\n'PCA', 'E',\n'PCB',\ - 'X',\n'PCD', 'X',\n'PCE', 'X',\n'PCG', 'X',\n'PCH\ -', 'X',\n'PCL', 'X',\n'PCM', 'X',\n'PCP', 'X',\n'P\ -CR', 'X',\n'PCS', 'X',\n'PCU', 'X',\n'PCV', 'X',\n\ -'PCY', 'X',\n'PD1', 'X',\n'PDA', 'X',\n'PDC', 'X',\ -\n'PDD', 'A',\n'PDE', 'A',\n'PDI', 'X',\n'PDL', 'A\ -',\n'PDN', 'X',\n'PDO', 'X',\n'PDP', 'X',\n'PDT', \ -'X',\n'PDU', 'X',\n'PE2', 'X',\n'PE6', 'X',\n'PEA'\ -, 'X',\n'PEB', 'X',\n'PEC', 'X',\n'PED', 'X',\n'PE\ -E', 'X',\n'PEF', 'X',\n'PEG', 'X',\n'PEL', 'X',\n'\ -PEO', 'X',\n'PEP', 'X',\n'PEQ', 'X',\n'PER', 'X',\\ -n'PET', 'X',\n'PFB', 'X',\n'PFC', 'X',\n'PFG', 'X'\ -,\n'PFL', 'X',\n'PFM', 'X',\n'PFZ', 'X',\n'PG4', '\ -X',\n'PG5', 'X',\n'PG6', 'X',\n'PGA', 'X',\n'PGC',\ - 'X',\n'PGD', 'X',\n'PGE', 'X',\n'PGG', 'G',\n'PGH\ -', 'X',\n'PGL', 'X',\n'PGO', 'X',\n'PGP', 'X',\n'P\ -GQ', 'X',\n'PGR', 'X',\n'PGS', 'X',\n'PGU', 'X',\n\ -'PGX', 'X',\n'PGY', 'G',\n'PH1', 'X',\n'PH2', 'X',\ -\n'PH3', 'X',\n'PHA', 'F',\n'PHB', 'X',\n'PHC', 'X\ -',\n'PHD', 'X',\n'PHE', 'F',\n'PHG', 'X',\n'PHH', \ -'X',\n'PHI', 'F',\n'PHL', 'F',\n'PHM', 'X',\n'PHN'\ -, 'X',\n'PHO', 'X',\n'PHP', 'X',\n'PHQ', 'X',\n'PH\ -S', 'H',\n'PHT', 'X',\n'PHW', 'P',\n'PHY', 'X',\n'\ -PI1', 'X',\n'PI2', 'X',\n'PI3', 'X',\n'PI4', 'X',\\ -n'PI5', 'X',\n'PI6', 'X',\n'PI7', 'X',\n'PI8', 'X'\ -,\n'PI9', 'X',\n'PIA', 'X',\n'PIB', 'X',\n'PIC', '\ -X',\n'PID', 'X',\n'PIG', 'X',\n'PIH', 'X',\n'PIM',\ - 'X',\n'PIN', 'X',\n'PIO', 'X',\n'PIP', 'X',\n'PIQ\ -', 'X',\n'PIR', 'X',\n'PIV', 'X',\n'PKF', 'X',\n'P\ -L1', 'X',\n'PL9', 'X',\n'PLA', 'D',\n'PLC', 'X',\n\ -'PLE', 'L',\n'PLG', 'G',\n'PLH', 'X',\n'PLM', 'X',\ -\n'PLP', 'X',\n'PLS', 'S',\n'PLT', 'W',\n'PLU', 'L\ -',\n'PLY', 'X',\n'PMA', 'X',\n'PMB', 'X',\n'PMC', \ -'X',\n'PME', 'F',\n'PML', 'X',\n'PMM', 'X',\n'PMO'\ -, 'X',\n'PMP', 'X',\n'PMS', 'X',\n'PMY', 'X',\n'PN\ -2', 'X',\n'PNA', 'X',\n'PNB', 'X',\n'PNC', 'G',\n'\ -PND', 'X',\n'PNE', 'A',\n'PNF', 'X',\n'PNG', 'X',\\ -n'PNI', 'X',\n'PNL', 'X',\n'PNM', 'X',\n'PNN', 'X'\ -,\n'PNO', 'X',\n'PNP', 'X',\n'PNQ', 'X',\n'PNS', '\ -X',\n'PNT', 'X',\n'PNU', 'X',\n'PO2', 'X',\n'PO4',\ - 'X',\n'POB', 'X',\n'POC', 'X',\n'POL', 'X',\n'POM\ -', 'P',\n'PON', 'X',\n'POP', 'X',\n'POR', 'X',\n'P\ -OS', 'X',\n'PP1', 'X',\n'PP2', 'X',\n'PP3', 'A',\n\ -'PP4', 'X',\n'PP5', 'X',\n'PP6', 'X',\n'PP7', 'X',\ -\n'PP8', 'N',\n'PP9', 'X',\n'PPB', 'X',\n'PPC', 'X\ -',\n'PPD', 'X',\n'PPE', 'E',\n'PPG', 'X',\n'PPH', \ -'F',\n'PPI', 'X',\n'PPJ', 'V',\n'PPL', 'X',\n'PPM'\ -, 'X',\n'PPN', 'A',\n'PPO', 'X',\n'PPP', 'X',\n'PP\ -Q', 'X',\n'PPR', 'X',\n'PPS', 'X',\n'PPT', 'X',\n'\ -PPU', 'X',\n'PPX', 'F',\n'PPY', 'X',\n'PPZ', 'X',\\ -n'PQ0', 'X',\n'PQN', 'X',\n'PQQ', 'X',\n'PR1', 'X'\ -,\n'PR2', 'X',\n'PR3', 'X',\n'PRA', 'X',\n'PRB', '\ -X',\n'PRC', 'X',\n'PRD', 'X',\n'PRE', 'X',\n'PRF',\ - 'X',\n'PRH', 'X',\n'PRI', 'P',\n'PRL', 'X',\n'PRN\ -', 'X',\n'PRO', 'P',\n'PRP', 'X',\n'PRR', 'A',\n'P\ -RS', 'P',\n'PRZ', 'X',\n'PS0', 'X',\n'PSA', 'X',\n\ -'PSD', 'X',\n'PSE', 'X',\n'PSF', 'S',\n'PSG', 'X',\ -\n'PSI', 'X',\n'PSO', 'X',\n'PSQ', 'X',\n'PSS', 'X\ -',\n'PST', 'X',\n'PSU', 'X',\n'PT1', 'X',\n'PT3', \ -'X',\n'PTA', 'X',\n'PTC', 'X',\n'PTD', 'X',\n'PTE'\ -, 'X',\n'PTH', 'Y',\n'PTL', 'X',\n'PTM', 'Y',\n'PT\ -N', 'X',\n'PTO', 'X',\n'PTP', 'X',\n'PTR', 'Y',\n'\ -PTS', 'X',\n'PTT', 'X',\n'PTU', 'X',\n'PTY', 'X',\\ -n'PUA', 'X',\n'PUB', 'X',\n'PUR', 'X',\n'PUT', 'X'\ -,\n'PVA', 'X',\n'PVB', 'X',\n'PVH', 'H',\n'PVL', '\ -X',\n'PXA', 'X',\n'PXF', 'X',\n'PXG', 'X',\n'PXP',\ - 'X',\n'PXY', 'X',\n'PXZ', 'X',\n'PY2', 'X',\n'PY4\ -', 'X',\n'PY5', 'X',\n'PY6', 'X',\n'PYA', 'A',\n'P\ -YC', 'X',\n'PYD', 'X',\n'PYE', 'X',\n'PYL', 'X',\n\ -'PYM', 'X',\n'PYO', 'X',\n'PYP', 'X',\n'PYQ', 'X',\ -\n'PYR', 'X',\n'PYS', 'X',\n'PYT', 'X',\n'PYX', 'X\ -',\n'PYY', 'X',\n'PYZ', 'X',\n'PZQ', 'X',\n'Q82', \ -'X',\n'QNC', 'X',\n'QND', 'X',\n'QSI', 'Q',\n'QTR'\ -, 'X',\n'QUA', 'X',\n'QUE', 'X',\n'QUI', 'X',\n'QU\ -O', 'X',\n'R11', 'X',\n'R12', 'X',\n'R13', 'X',\n'\ -R18', 'X',\n'R1P', 'X',\n'R56', 'X',\n'R5P', 'X',\\ -n'RA2', 'X',\n'RAD', 'X',\n'RAI', 'X',\n'RAL', 'X'\ -,\n'RAM', 'X',\n'RAN', 'X',\n'RAP', 'X',\n'RBF', '\ -X',\n'RBU', 'X',\n'RCA', 'X',\n'RCL', 'X',\n'RCO',\ - 'X',\n'RDC', 'X',\n'RDF', 'W',\n'RE9', 'X',\n'REA\ -', 'X',\n'RED', 'K',\n'REO', 'X',\n'REP', 'X',\n'R\ -ET', 'X',\n'RFA', 'X',\n'RFB', 'X',\n'RFL', 'X',\n\ -'RFP', 'X',\n'RG1', 'X',\n'RGS', 'X',\n'RH1', 'X',\ -\n'RHA', 'X',\n'RHC', 'X',\n'RHD', 'X',\n'RHM', 'X\ -',\n'RHO', 'X',\n'RHQ', 'X',\n'RHS', 'X',\n'RIA', \ -'X',\n'RIB', 'X',\n'RIC', 'X',\n'RIF', 'X',\n'RIN'\ -, 'X',\n'RIP', 'X',\n'RIT', 'X',\n'RMB', 'X',\n'RM\ -N', 'X',\n'RMP', 'X',\n'RNG', 'X',\n'RNS', 'X',\n'\ -RNT', 'X',\n'RO2', 'X',\n'RO4', 'X',\n'ROC', 'N',\\ -n'ROI', 'X',\n'ROM', 'X',\n'RON', 'V',\n'ROP', 'X'\ -,\n'ROS', 'X',\n'ROX', 'X',\n'RPA', 'X',\n'RPD', '\ -X',\n'RPH', 'X',\n'RPL', 'X',\n'RPP', 'X',\n'RPR',\ - 'X',\n'RPX', 'X',\n'RQ3', 'X',\n'RR1', 'X',\n'RR6\ -', 'X',\n'RRS', 'X',\n'RS1', 'X',\n'RS2', 'X',\n'R\ -S7', 'X',\n'RSS', 'X',\n'RTA', 'X',\n'RTB', 'X',\n\ -'RTC', 'X',\n'RTL', 'X',\n'RUB', 'X',\n'RUN', 'X',\ -\n'RWJ', 'X',\n'RXP', 'X',\n'S02', 'X',\n'S11', 'X\ -',\n'S1H', 'S',\n'S27', 'X',\n'S2C', 'C',\n'S3P', \ -'X',\n'S4U', 'X',\n'S57', 'X',\n'S58', 'X',\n'S5H'\ -, 'X',\n'S6G', 'X',\n'S80', 'X',\n'SAA', 'X',\n'SA\ -B', 'X',\n'SAC', 'S',\n'SAD', 'X',\n'SAE', 'X',\n'\ -SAF', 'X',\n'SAH', 'C',\n'SAI', 'C',\n'SAL', 'X',\\ -n'SAM', 'M',\n'SAN', 'X',\n'SAP', 'X',\n'SAR', 'X'\ -,\n'SAS', 'X',\n'SB1', 'X',\n'SB2', 'X',\n'SB3', '\ -X',\n'SB4', 'X',\n'SB5', 'X',\n'SB6', 'X',\n'SBA',\ - 'L',\n'SBB', 'X',\n'SBD', 'A',\n'SBI', 'X',\n'SBL\ -', 'A',\n'SBN', 'X',\n'SBO', 'X',\n'SBR', 'X',\n'S\ -BS', 'X',\n'SBT', 'X',\n'SBU', 'X',\n'SBX', 'X',\n\ -'SC4', 'X',\n'SCA', 'X',\n'SCC', 'X',\n'SCD', 'X',\ -\n'SCH', 'C',\n'SCI', 'X',\n'SCL', 'X',\n'SCM', 'X\ -',\n'SCN', 'X',\n'SCO', 'X',\n'SCP', 'S',\n'SCR', \ -'X',\n'SCS', 'X',\n'SCV', 'C',\n'SCY', 'C',\n'SD8'\ -, 'X',\n'SDK', 'X',\n'SDZ', 'X',\n'SE4', 'X',\n'SE\ -A', 'X',\n'SEB', 'S',\n'SEC', 'X',\n'SEG', 'A',\n'\ -SEI', 'X',\n'SEL', 'S',\n'SEM', 'X',\n'SEO', 'X',\\ -n'SEP', 'S',\n'SER', 'S',\n'SES', 'X',\n'SET', 'S'\ -,\n'SEU', 'X',\n'SF4', 'X',\n'SFG', 'X',\n'SFN', '\ -X',\n'SFO', 'X',\n'SGA', 'X',\n'SGC', 'X',\n'SGL',\ - 'X',\n'SGM', 'X',\n'SGN', 'X',\n'SGP', 'X',\n'SHA\ -', 'X',\n'SHC', 'X',\n'SHF', 'X',\n'SHH', 'X',\n'S\ -HP', 'G',\n'SHR', 'E',\n'SHT', 'T',\n'SHU', 'X',\n\ -'SI2', 'X',\n'SIA', 'X',\n'SIF', 'X',\n'SIG', 'X',\ -\n'SIH', 'X',\n'SIM', 'X',\n'SIN', 'X',\n'SKD', 'X\ -',\n'SKF', 'X',\n'SLB', 'X',\n'SLE', 'X',\n'SLZ', \ -'K',\n'SMA', 'X',\n'SMC', 'C',\n'SME', 'M',\n'SML'\ -, 'X',\n'SMM', 'M',\n'SMN', 'X',\n'SMP', 'X',\n'SM\ -S', 'X',\n'SN1', 'X',\n'SN6', 'X',\n'SN7', 'X',\n'\ -SNC', 'C',\n'SNN', 'X',\n'SNP', 'X',\n'SO1', 'X',\\ -n'SO2', 'X',\n'SO3', 'X',\n'SO4', 'X',\n'SOA', 'X'\ -,\n'SOC', 'C',\n'SOM', 'X',\n'SOR', 'X',\n'SOT', '\ -X',\n'SOX', 'X',\n'SPA', 'X',\n'SPB', 'X',\n'SPC',\ - 'X',\n'SPD', 'X',\n'SPE', 'X',\n'SPG', 'X',\n'SPH\ -', 'X',\n'SPI', 'X',\n'SPK', 'X',\n'SPM', 'X',\n'S\ -PN', 'X',\n'SPO', 'X',\n'SPP', 'X',\n'SPS', 'X',\n\ -'SPY', 'X',\n'SQU', 'X',\n'SRA', 'X',\n'SRB', 'X',\ -\n'SRD', 'X',\n'SRL', 'X',\n'SRM', 'X',\n'SRS', 'X\ -',\n'SRY', 'X',\n'SSA', 'X',\n'SSB', 'X',\n'SSG', \ -'X',\n'SSP', 'X',\n'ST1', 'X',\n'ST2', 'X',\n'ST3'\ -, 'X',\n'ST4', 'X',\n'ST5', 'X',\n'ST6', 'X',\n'ST\ -A', 'X',\n'STB', 'X',\n'STE', 'X',\n'STG', 'X',\n'\ -STI', 'X',\n'STL', 'X',\n'STN', 'X',\n'STO', 'X',\\ -n'STP', 'X',\n'STR', 'X',\n'STU', 'X',\n'STY', 'Y'\ -,\n'SU1', 'X',\n'SU2', 'X',\n'SUC', 'X',\n'SUI', '\ -X',\n'SUL', 'X',\n'SUR', 'X',\n'SVA', 'S',\n'SWA',\ - 'X',\n'T16', 'X',\n'T19', 'X',\n'T23', 'X',\n'T29\ -', 'X',\n'T33', 'X',\n'T3P', 'X',\n'T42', 'A',\n'T\ -44', 'X',\n'T5A', 'X',\n'T6A', 'T',\n'T6P', 'X',\n\ -'T80', 'X',\n'T87', 'X',\n'TA1', 'X',\n'TAA', 'X',\ -\n'TAB', 'X',\n'TAC', 'X',\n'TAD', 'X',\n'TAF', 'X\ -',\n'TAM', 'X',\n'TAP', 'X',\n'TAR', 'X',\n'TAS', \ -'X',\n'TAU', 'X',\n'TAX', 'X',\n'TAZ', 'X',\n'TB9'\ -, 'X',\n'TBA', 'X',\n'TBD', 'X',\n'TBG', 'G',\n'TB\ -H', 'X',\n'TBM', 'T',\n'TBO', 'X',\n'TBP', 'X',\n'\ -TBR', 'X',\n'TBS', 'X',\n'TBT', 'X',\n'TBU', 'X',\\ -n'TBZ', 'X',\n'TC4', 'X',\n'TCA', 'X',\n'TCB', 'X'\ -,\n'TCH', 'X',\n'TCK', 'X',\n'TCL', 'X',\n'TCM', '\ -X',\n'TCN', 'X',\n'TCP', 'X',\n'TCR', 'W',\n'TCS',\ - 'X',\n'TCZ', 'X',\n'TDA', 'X',\n'TDB', 'X',\n'TDG\ -', 'X',\n'TDP', 'X',\n'TDR', 'X',\n'TDX', 'X',\n'T\ -EA', 'X',\n'TEM', 'X',\n'TEN', 'X',\n'TEO', 'X',\n\ -'TEP', 'X',\n'TER', 'X',\n'TES', 'X',\n'TET', 'X',\ -\n'TFA', 'X',\n'TFB', 'X',\n'TFH', 'X',\n'TFI', 'X\ -',\n'TFK', 'X',\n'TFP', 'X',\n'THA', 'X',\n'THB', \ -'X',\n'THC', 'T',\n'THD', 'X',\n'THE', 'X',\n'THF'\ -, 'X',\n'THJ', 'X',\n'THK', 'X',\n'THM', 'X',\n'TH\ -N', 'X',\n'THO', 'T',\n'THP', 'X',\n'THQ', 'X',\n'\ -THR', 'T',\n'THS', 'X',\n'THT', 'X',\n'THU', 'X',\\ -n'THX', 'X',\n'THZ', 'X',\n'TI1', 'X',\n'TI2', 'X'\ -,\n'TI3', 'P',\n'TIA', 'X',\n'TIH', 'A',\n'TK4', '\ -X',\n'TLA', 'X',\n'TLC', 'X',\n'TLM', 'X',\n'TLN',\ - 'X',\n'TLX', 'X',\n'TM5', 'X',\n'TM6', 'X',\n'TMA\ -', 'X',\n'TMB', 'T',\n'TMC', 'X',\n'TMD', 'T',\n'T\ -ME', 'X',\n'TMF', 'X',\n'TML', 'K',\n'TMM', 'X',\n\ -'TMN', 'X',\n'TMP', 'X',\n'TMQ', 'X',\n'TMR', 'X',\ -\n'TMT', 'X',\n'TMZ', 'X',\n'TNB', 'C',\n'TND', 'X\ -',\n'TNK', 'X',\n'TNP', 'X',\n'TNT', 'X',\n'TOA', \ -'X',\n'TOB', 'X',\n'TOC', 'X',\n'TOL', 'X',\n'TOP'\ -, 'X',\n'TOS', 'X',\n'TOT', 'X',\n'TP1', 'G',\n'TP\ -2', 'P',\n'TP3', 'E',\n'TP4', 'E',\n'TP7', 'T',\n'\ -TPA', 'X',\n'TPE', 'X',\n'TPF', 'X',\n'TPI', 'X',\\ -n'TPL', 'W',\n'TPM', 'X',\n'TPN', 'G',\n'TPO', 'T'\ -,\n'TPP', 'X',\n'TPQ', 'A',\n'TPR', 'P',\n'TPS', '\ -X',\n'TPT', 'X',\n'TPV', 'X',\n'TPX', 'X',\n'TPY',\ - 'X',\n'TQ3', 'X',\n'TQ4', 'X',\n'TQ5', 'X',\n'TQ6\ -', 'X',\n'TR1', 'X',\n'TRA', 'X',\n'TRB', 'X',\n'T\ -RC', 'X',\n'TRD', 'X',\n'TRE', 'X',\n'TRF', 'W',\n\ -'TRG', 'K',\n'TRH', 'X',\n'TRI', 'X',\n'TRJ', 'X',\ -\n'TRM', 'X',\n'TRN', 'W',\n'TRO', 'W',\n'TRP', 'W\ -',\n'TRQ', 'X',\n'TRS', 'X',\n'TRX', 'W',\n'TRZ', \ -'X',\n'TS2', 'X',\n'TS3', 'X',\n'TS4', 'X',\n'TS5'\ -, 'X',\n'TSA', 'X',\n'TSB', 'X',\n'TSI', 'X',\n'TS\ -M', 'X',\n'TSN', 'X',\n'TSP', 'X',\n'TSU', 'X',\n'\ -TTA', 'X',\n'TTE', 'X',\n'TTN', 'X',\n'TTO', 'X',\\ -n'TTP', 'X',\n'TTX', 'X',\n'TXL', 'X',\n'TYA', 'Y'\ -,\n'TYB', 'Y',\n'TYD', 'X',\n'TYI', 'Y',\n'TYL', '\ -X',\n'TYM', 'W',\n'TYN', 'Y',\n'TYQ', 'Y',\n'TYR',\ - 'Y',\n'TYS', 'Y',\n'TYV', 'X',\n'TYY', 'A',\n'TZB\ -', 'X',\n'TZC', 'X',\n'TZE', 'X',\n'TZL', 'X',\n'T\ -ZO', 'X',\n'TZP', 'X',\n'U01', 'X',\n'U02', 'X',\n\ -'U03', 'X',\n'U04', 'X',\n'U05', 'X',\n'U0E', 'X',\ -\n'U10', 'X',\n'U18', 'X',\n'U2G', 'X',\n'U3P', 'X\ -',\n'U49', 'X',\n'U55', 'X',\n'U5P', 'X',\n'U66', \ -'X',\n'U89', 'X',\n'U8U', 'X',\n'UAA', 'X',\n'UAG'\ -, 'A',\n'UAP', 'X',\n'UAR', 'X',\n'UC1', 'X',\n'UC\ -2', 'X',\n'UC3', 'X',\n'UC4', 'X',\n'UD1', 'X',\n'\ -UD2', 'X',\n'UDP', 'X',\n'UDX', 'X',\n'UFG', 'X',\\ -n'UFM', 'X',\n'UFP', 'X',\n'UGA', 'X',\n'UIN', 'X'\ -,\n'UKP', 'A',\n'UM3', 'X',\n'UMA', 'A',\n'UMG', '\ -X',\n'UMP', 'X',\n'UNA', 'X',\n'UND', 'X',\n'UNI',\ - 'X',\n'UNK', 'X',\n'UNN', 'X',\n'UNX', 'X',\n'UP5\ -', 'X',\n'UP6', 'X',\n'UPA', 'X',\n'UPF', 'X',\n'U\ -PG', 'X',\n'UPP', 'X',\n'UQ1', 'X',\n'UQ2', 'X',\n\ -'UQ6', 'X',\n'UR2', 'X',\n'URA', 'X',\n'URE', 'X',\ -\n'URF', 'X',\n'URI', 'X',\n'URS', 'X',\n'UTP', 'X\ -',\n'UVC', 'X',\n'UVW', 'X',\n'V35', 'X',\n'V36', \ -'X',\n'V4O', 'X',\n'V7O', 'X',\n'VAA', 'V',\n'VAC'\ -, 'X',\n'VAD', 'V',\n'VAF', 'V',\n'VAG', 'X',\n'VA\ -L', 'V',\n'VAN', 'X',\n'VAS', 'X',\n'VAX', 'X',\n'\ -VDX', 'X',\n'VDY', 'X',\n'VG1', 'X',\n'VIB', 'X',\\ -n'VIR', 'X',\n'VIT', 'X',\n'VK3', 'X',\n'VO3', 'X'\ -,\n'VO4', 'X',\n'VS1', 'F',\n'VS2', 'F',\n'VS3', '\ -F',\n'VS4', 'F',\n'VXA', 'X',\n'W01', 'X',\n'W02',\ - 'X',\n'W03', 'X',\n'W11', 'X',\n'W33', 'X',\n'W35\ -', 'X',\n'W42', 'X',\n'W43', 'X',\n'W54', 'X',\n'W\ -56', 'X',\n'W59', 'X',\n'W71', 'X',\n'W84', 'X',\n\ -'W8R', 'X',\n'W91', 'X',\n'WAY', 'X',\n'WCC', 'X',\ -\n'WO2', 'X',\n'WO4', 'X',\n'WRB', 'X',\n'WRR', 'X\ -',\n'WRS', 'X',\n'WW7', 'X',\n'X2F', 'X',\n'X7O', \ -'X',\n'XAA', 'X',\n'XAN', 'X',\n'XAO', 'X',\n'XBB'\ -, 'X',\n'XBP', 'X',\n'XDN', 'X',\n'XDP', 'X',\n'XI\ -F', 'X',\n'XIM', 'X',\n'XK2', 'X',\n'XL1', 'X',\n'\ -XLS', 'X',\n'XMP', 'X',\n'XN1', 'X',\n'XN2', 'X',\\ -n'XN3', 'X',\n'XUL', 'X',\n'XV6', 'X',\n'XYD', 'X'\ -,\n'XYH', 'X',\n'XYL', 'X',\n'XYP', 'X',\n'XYS', '\ -X',\n'YOF', 'Y',\n'YRR', 'X',\n'YT3', 'X',\n'YZ9',\ - 'X',\n'Z34', 'G',\n'Z5A', 'X',\n'ZAF', 'X',\n'ZAP\ -', 'X',\n'ZEB', 'X',\n'ZEN', 'X',\n'ZES', 'X',\n'Z\ -ID', 'X',\n'ZMR', 'X',\n'ZN3', 'X',\n'ZNH', 'X',\n\ -'ZNO', 'X',\n'ZO3', 'X',\n'ZPR', 'P',\n'ZRA', 'A',\ -\n'ZST', 'X',\n'ZYA', 'A',\n\n\n'ASN','N');\n} \n \ - \n\n\n","use Cwd;\nuse File::Path;\nuse FileHand\ -le;\nuse strict;\n\n\nour (%MODE, %PG, %ENV_SET, %\ -SUPPORTED_OS);\n\n\nour $EXIT_SUCCESS=0;\nour $EXI\ -T_FAILURE=1;\nour $INTERNET=0;\n\nour $CP=\"cp \";\ - #was causing a crash on MacOSX\nour $SILENT=\">/d\ -ev/null 2>/dev/null\";\nour $WEB_BASE=\"http://www\ -.tcoffee.org\";\nour $TCLINKDB_ADDRESS=\"$WEB_BASE\ -/Resources/tclinkdb.txt\";\nour $OS=get_os();\nour\ - $ROOT=&get_root();\nour $CD=cwd();\nour $CDIR=$CD\ -;\nour $HOME=$ENV{'HOME'};\nour $CXX=\"g++\";\nour\ - $CXXFLAGS=\"\";\n\nour $CPP=\"g++\";\nour $CPPFLA\ -GS=\"\";\n\nour $CC=\"gcc\";\nour $CFLAGS=\"\";\n\\ -nour $FC=\"f77\";\nour $FFLAGS=\"\";\n\nmy $instal\ -l=\"all\";\nmy $default_update_action=\"no_update\\ -";\nmy @required_applications=(\"wget_OR_curl\");\\ -nmy @smode=(\"all\", \"clean\", \"install\");\n\n&\ -initialize_PG();\n\nmy $cl=join( \" \", @ARGV);\ni\ -f ($#ARGV==-1 || ($cl=~/-h/) ||($cl=~/-H/) )\n {\\ -n print \"\\n!!!!!!! ./install t_coffee \ - --> installs t_coffee only\";\n print \\ -"\\n!!!!!!! ./install all --> in\ -stalls all the modes [mcoffee, expresso, psicoffee\ -,rcoffee..]\";\n print \"\\n!!!!!!! ./install \ - [mcoffee|rcoffee|..] --> installs the specified m\ -ode\";\n print \"\\n!!!!!!! ./install -h \ - --> print usage\\n\\n\";\n if ( \ -$#ARGV==-1){exit ($EXIT_FAILURE);}\n }\n \ni\ -f (($cl=~/-h/) ||($cl=~/-H/) )\n {\n my $m;\n \ - print \"\\n\\n!!!!!!! advanced mode\\n\";\n \ -foreach $m ((keys (%MODE)),@smode)\n {\n prin\ -t \"!!!!!!! ./install $m\\n\";\n }\n \ - \n print \"!!!!!!! ./install [target:package|m\ -ode|] [-update|-force|-exec=dir|-dis=dir|-root|-tc\ -linkdb=file|-] [CC=|FCC=|CXX=|CFLAGS=|CXXFLAGS=]\\\ -n\";\n print \"!!!!!!! ./install clean [remo\ -ves all executables]\\n\";\n print \"!!!!!!! ./\ -install [optional:target] -update [u\ -pdates package already installed]\\n\";\n print\ - \"!!!!!!! ./install [optional:target] -force \ - [Forces recompilation over everything]\\ -\n\";\n \n print \"!!!!!!! ./install [option\ -al:target] -root [You are running \ -as root]\\n\";\n print \"!!!!!!! ./install [opt\ -ional:target] -exec=/foo/bar/ [address for t\ -he T-Coffee executable]\\n\";\n print \"!!!!!!!\ - ./install [optional:target] -dis=/foo/bar/ \ - [Address where distributions should be stored]\\n\ -\";\n print \"!!!!!!! ./install [optional:targe\ -t] -tclinkdb=foo|update [file containing all the \ -packages to be installed]\\n\";\n print \"!!!!!\ -!! ./install [optional:target] -tclinkdb=foo|updat\ -e [file containing all the packages to be install\ -ed]\\n\";\n print \"!!!!!!! ./install [optional\ -:target] -clean [clean everything]\\ -\n\";\n print \"!!!!!!! ./install [optional:tar\ -get] -plugins [plugins directory]\\n\\ -";\n print \"!!!!!!! mode:\";\n foreach $m (\ -keys(%MODE)){print \"$m \";}\n print \"\\n\";\n\ - print \"!!!!!!! Packages:\";\n foreach $m (\ -keys (%PG)){print \"$m \";}\n print \"\\n\";\n \ - \n print \"\\n\\n\";\n exit ($EXIT_FAILUR\ -E);\n }\n\n\n\nmy (@argl)=($cl=~/(\\S+=[^=]+)\\s\\ -\w+=/g);\npush (@argl, ($cl=~/(\\S+=[^=]+\\S)\\s*$\ -/g));\n\nforeach $a (@argl)\n {\n if ( ($cl=~/\ -CXX=(.*)/)){$CXX=$1;}\n if ( ($cl=~/-CC=(.*)/ \ - )){$CC=$1;}\n if ( ($cl=~/-FC=(.*)/ )){$FC\ -=$1;}\n if ( ($cl=~/-CFLAGS=(.*)/)){$CFLAGS=$1;\ -}\n if ( ($cl=~/-CXXFLAGS=(.*)/)){$CXXFLAGS=$1;\ -}\n }\nour ($ROOT_INSTALL, $NO_QUESTION, $default\ -_update_action,$BINARIES_ONLY,$force, $default_upd\ -ate_action, $INSTALL_DIR, $PLUGINS_DIR, $DISTRIBUT\ -IONS,$tclinkdb, $proxy, $clean);\nif ( ($cl=~/-roo\ -t/)){$ROOT_INSTALL=1;}\nif ( ($cl=~/-no_question/)\ -){$NO_QUESTION=1;}\nif ( ($cl=~/-update/)){$defaul\ -t_update_action=\"update\";}\nif ( ($cl=~/-binarie\ -s/)){$BINARIES_ONLY=1;}\nif ( ($cl=~/-force/)){$fo\ -rce=1;$default_update_action=\"update\"}\nif ( ($c\ -l=~/-exec=\\s*(\\S+)/)){$INSTALL_DIR=$1;}\nif ( ($\ -cl=~/-plugins=\\s*(\\S+)/)){$PLUGINS_DIR=$1;}\nif \ -( ($cl=~/-dis=\\s*(\\S+)/)){$DISTRIBUTIONS=$1;}\n\\ -nif ( ($cl=~/-tclinkdb=\\s*(\\S+)/)){$tclinkdb=$1;\ -}\nif ( ($cl=~/-proxy=\\s*(\\S+)/)){$proxy=$1;}\ni\ -f ( ($cl=~/-clean/)){$clean=1;}\nif ($tclinkdb){&u\ -pdate_tclinkdb ($tclinkdb);}\n\nour $TCDIR=$ENV{DI\ -R_4_TCOFFEE};\nour $TCCACHE=$ENV{CACHE_4_TCOFFEE};\ -\nour $TCTMP=$ENV{CACHE_4_TCOFFEE};\nour $TCM=$ENV\ -{MCOFFEE_4_TCOFFEE};\nour $TCMETHODS=$ENV{METHODS_\ -4_TCOFFEE};\nour $TCPLUGINS=$ENV{PLUGINS_4_TCOFFEE\ -};\nour $PLUGINS_DIR=\"\";\nour $INSTALL_DIR=\"\";\ -\n\n&add_dir ($TCDIR=\"$HOME/.t_coffee\");\n&add_d\ -ir ($TCCACHE=\"$TCDIR/cache\");\n&add_dir ($TCTMP=\ -\"$CDIR/tmp\");\n&add_dir ($TCM=\"$TCDIR/mcoffee\"\ -);\n&add_dir ($TCMETHODS=\"$TCDIR/methods\");\n&ad\ -d_dir ($TCPLUGINS=\"$TCDIR/plugins/$OS\");\n\n\nou\ -r $BASE=\"$CD/bin\";\nour $BIN=\"$BASE/binaries/$O\ -S\";\nour $DOWNLOAD_DIR=\"$BASE/download\";\nour $\ -DOWNLOAD_FILE=\"$DOWNLOAD_DIR/files\";\nour $TMP=\\ -"$BASE/tmp\";\n\n&add_dir($BASE);\n&add_dir($BIN);\ -\n&add_dir($DOWNLOAD_DIR);\n&add_dir($DOWNLOAD_FIL\ -E);\nif (!$DISTRIBUTIONS){$DISTRIBUTIONS=\"$DOWNLO\ -AD_DIR/distributions\";}\n&add_dir ($DISTRIBUTIONS\ -);\n&add_dir ($TMP);\n\n\nif (!$PLUGINS_DIR && \ -!$ROOT_INSTALL){$PLUGINS_DIR=$TCPLUGINS;}\nelsif (\ -!$PLUGINS_DIR && $ROOT_INSTALL){$PLUGINS_DIR=\"/u\ -sr/local/bin/\";}\n\nif (!$INSTALL_DIR && !$ROO\ -T_INSTALL){$INSTALL_DIR=\"$HOME/bin/\";mkpath ($IN\ -STALL_DIR);}\nelsif (!$INSTALL_DIR && $ROOT_INSTA\ -LL){$INSTALL_DIR=\"/usr/local/bin/\";}\n\nif (-d \\ -"mcoffee\"){`cp mcoffee/* $TCM`;}\n\n\nour $ENV_FI\ -LE=\"$TCDIR/t_coffee_env\";\n&env_file2putenv ($EN\ -V_FILE);\n&set_proxy($proxy);\nmy ($target, $p, $r\ -);\n$target=$p;\n\nforeach $p ( ((keys (%PG)),(ke\ -ys(%MODE)),(@smode)) )\n {\n if ($ARGV[0] eq $\ -p && $target eq \"\"){$target=$p;}\n }\nif ($targ\ -et eq \"\"){exit ($EXIT_FAILURE);}\n\n\nforeach $r\ - (@required_applications)\n {\n my @app_list;\\ -n my $i;\n $i=0;\n \n @app_list=split \ -(/_OR_/, $r);\n foreach my $pg (@app_list)\n \ - {\n $i+=&pg_is_installed ($pg);\n }\n i\ -f ($i==0)\n {\n print \"One of the follo\ -wing packages must be installed to proceed: \";\n \ - foreach my $pg (@app_list)\n {\n print (\"$\ -pg \");\n }\n die;\n }\n }\n\n\n\n\n\n\n&\ -sign_license_ni();\n\n\n$PG{C}{compiler}=get_C_com\ -piler($CC);\n$PG{Fortran}{compiler}=get_F_compiler\ -($FC);\n$PG{CXX}{compiler}=$PG{CPP}{compiler}=$PG{\ -GPP}{compiler}=get_CXX_compiler($CXX);\nif ($CXXFL\ -AGS){$PG{CPP}{options}=$PG{GPP}{options}=$PG{CXX}{\ -options}=$CXXFLAGS;}\nif ($CFLAGS){$PG{C}{options}\ -=$CFLAGS;}\nforeach my $c (keys(%PG))\n {\n my\ - $arguments;\n if ($PG{$c}{compiler})\n {\\ -n $arguments=\"$PG{$c}{compiler_flag}=$PG{$c}{comp\ -iler} \";\n if ($PG{$c}{options})\n {\n $arg\ -uments.=\"$PG{$c}{options_flag}=$PG{$c}{options} \\ -";\n }\n $PG{$c}{arguments}=$arguments;\n }\ -\n }\n\nif ($PG{$target}){$PG{$target}{install}=1\ -;}\nelse\n {\n foreach my $pg (keys(%PG))\n \ - {\n if ( $target eq \"all\" || ($PG{$pg}{mode}=\ -~/$target/))\n {\n $PG{$pg} {install}=1;\n \ - }\n }\n }\n\nforeach my $pg (keys(%PG))\n \ -{\n if (!$PG{$pg}{update_action}){$PG{$pg}{upda\ -te_action}=$default_update_action;}\n elsif ($P\ -G{$pg}{update_action} eq \"never\"){$PG{$pg}{insta\ -ll}=0;}\n if ( $force && $PG{$pg}{install})\n \ - {\n `rm $BIN/$pg $BIN/$pg.exe $SILENT`;\n \ - }\n if ($PG{$pg}{update_action} eq \"update\" \ -&& $PG{$pg}{install}){$PG{$pg}{update}=1;}\n }\n\\ -nif (($target=~/clean/))\n {\n print \"-------\ - cleaning executables -----\\n\";\n `rm bin/* $\ -SILENT`;\n exit ($EXIT_SUCCESS);\n }\n\nif ( !\ -$PG{$target}){print \"------- Installing T-Coffee \ -Modes\\n\";}\n\nforeach my $m (keys(%MODE))\n {\n\ - if ( $target eq \"all\" || $target eq $m)\n \ - {\n print \"\\n------- The installer will now i\ -nstall the $m components $MODE{$m}{description}\\n\ -\";\n foreach my $pg (keys(%PG))\n {\n if ( \ -$PG{$pg}{mode} =~/$m/ && $PG{$pg}{install})\n \ - {\n if ($PG{$pg}{touched}){print \"------- $PG{\ -$pg}{dname}: already processed\\n\";}\n else {$PG\ -{$pg}{success}=&install_pg($pg);$PG{$pg}{touched}=\ -1;}\n }\n }\n }\n }\n\nif ( $PG{$tar\ -get}){print \"------- Installing Individual Packag\ -e\\n\";}\nforeach my $pg (keys (%PG))\n {\n \n\ - if ( $PG{$pg}{install} && !$PG{$pg}{touched})\\ -n {\n print \"\\n------- Install $pg\\n\";\n \ -$PG{$pg}{success}=&install_pg($pg);$PG{$pg}{touche\ -d}=1;\n }\n }\nprint \"------- Finishing The\ - installation\\n\";\nmy $final_report=&install ($I\ -NSTALL_DIR);\n\nprint \"\\n\";\nprint \"**********\ -**************************************************\ -*********\\n\";\nprint \"******** INS\ -TALLATION SUMMARY *****************\\n\";\ -\nprint \"****************************************\ -*****************************\\n\";\nprint \"-----\ --- SUMMARY package Installation:\\n\";\nforeach my\ - $pg (keys(%PG))\n {\n if ( $PG{$pg}{install})\ -\n {\n my $bin_status=($PG{$pg}{from_binary} \ -&& $PG{$pg}{success})?\"[from binary]\":\"\";\n if\ - ( $PG{$pg}{new} && !$PG{$pg}{old}) \ - {print \"*------ $PG{$pg}{dname}:\ - installed $bin_status\\n\"; $PG{$pg}{status}=1;}\\ -n elsif ( $PG{$pg}{new} && $PG{$pg}{old}) \ - {print \"*------ $PG{$pg}{dna\ -me}: updated $bin_status\\n\" ; $PG{$pg}{status}=\ -1;} \n elsif (!$PG{$pg}{new} && $PG{$pg}{old} &&\ - !$PG{$pg}{update}){print \"*------ $PG{$pg\ -}{dname}: previous\\n\" ; $PG{$pg}{status}=1;}\n e\ -lsif (!$PG{$pg}{new} && $PG{$pg}{old} && $PG{$p\ -g}{update}){print \"*------ $PG{$pg}{dname}\ -: failed update (previous installation available)\\ -\n\";$PG{$pg}{status}=0;}\n else \ - {print \"*\ ------- $PG{$pg}{dname}: failed installation\ -\";$PG{$pg}{status}=0;}\n }\n }\n\nif ( !$PG\ -{$target}){print \"*------ SUMMARY mode Installati\ -on:\\n\";}\nforeach my $m (keys(%MODE))\n {\n \ -if ( $target eq \"all\" || $target eq $m)\n {\ -\n my $succesful=1;\n foreach my $pg (keys(%PG))\n\ - {\n if (($PG{$pg}{mode}=~/$m/) && $PG{$pg}{\ -install} && $PG{$pg}{status}==0)\n {\n $suc\ -cesful=0;\n print \"*!!!!!! $PG{$pg}{dname}\ -: Missing\\n\";\n }\n }\n if ( $succesful)\ -\n {\n $MODE{$m}{status}=1;\n print \"*-\ ------ MODE $MODE{$m}{dname} SUCCESFULY insta\ -lled\\n\";\n }\n else\n {\n $MODE{$m}{stat\ -us}=0;\n print \"*!!!!!! MODE $MODE{$m}{\ -dname} UNSUCCESFULY installed\\n\";\n }\n }\ -\n }\n\nif ($clean==1 && ($BASE=~/install4tcoffee\ -/) ){print \"*------ Clean Installation Directory:\ - $BASE\\n\";`rm -rf $BASE`;}\nforeach my $pg (keys\ -(%PG)){if ($PG{$pg}{install} && $PG{$pg}{status}==\ -0){exit ($EXIT_FAILURE);}}\nexit ($EXIT_SUCCESS); \ - \n\nsub get_CXX_compiler\n {\n my $c=@_[0];\n\ - my (@clist)=(\"g++\");\n \n return get_c\ -ompil ($c, @clist);\n }\nsub get_C_compiler\n {\n\ - my $c=@_[0];\n my (@clist)=(\"gcc\", \"cc\"\ -, \"icc\");\n \n return get_compil ($c, @cli\ -st);\n }\n\nsub get_F_compiler\n {\n my ($c)=@\ -_[0];\n my @clist=(\"f77\", \"g77\", \"gfortran\ -\", \"ifort\");\n return get_compil ($c, @clist\ -);\n } \n \nsub get_compil\n {\n my ($f\ -av,@clist)=(@_);\n \n #return the first comp\ -iler found installed in the system. Check first th\ -e favorite\n foreach my $c ($fav,@clist)\n \ - {\n if (&pg_is_installed ($c)){return $c;}\n \ - }\n return \"\";\n }\nsub exit_if_pg_not_ins\ -talled\n {\n my (@arg)=(@_);\n \n foreac\ -h my $p (@arg)\n {\n if ( !&pg_is_installed (\ -$p))\n {\n print \"!!!!!!!! The $p utility m\ -ust be installed for this installation to proceed \ -[FATAL]\\n\";\n die;\n }\n }\n retur\ -n 1;\n }\nsub set_proxy\n {\n my ($proxy)=(@_\ -);\n my (@list,$p);\n \n @list= (\"HTTP_p\ -roxy\", \"http_proxy\", \"HTTP_PROXY\", \"ALL_prox\ -y\", \"all_proxy\",\"HTTP_proxy_4_TCOFFEE\",\"http\ -_proxy_4_TCOFFEE\");\n \n if (!$proxy)\n \ - {\n foreach my $p (@list)\n {\n if ( ($ENV\ -_SET{$p}) || $ENV{$p}){$proxy=$ENV{$p};}\n }\n \ - }\n foreach my $p(@list){$ENV{$p}=$proxy;}\\ -n }\n \nsub check_internet_connection\n {\n m\ -y $internet;\n \n if ( -e \"x\"){unlink (\"x\ -\");}\n if (&pg_is_installed (\"wget\"))\ -{`wget www.google.com -Ox >/dev/null 2>/dev/null`;\ -}\n elsif (&pg_is_installed (\"curl\")){`cu\ -rl www.google.com -ox >/dev/null 2>/dev/null`;}\n \ - else\n {\n printf stderr \"\\nERROR: No pg\ - for remote file fetching [wget or curl][FATAL]\\n\ -\";\n exit ($EXIT_FAILURE);\n }\n \n if\ - ( !-e \"x\" || -s \"x\" < 10){$internet=0;}\n \ -else {$internet=1;}\n if (-e \"x\"){unlink \"x\\ -";}\n return $internet;\n }\nsub url2file\n {\ -\n my ($cmd, $file,$wget_arg, $curl_arg)=(@_);\\ -n my ($exit,$flag, $pg, $arg);\n \n if ($\ -INTERNET || check_internet_connection ()){$INTERNE\ -T=1;}\n else\n {\n print STDERR \"ERROR: N\ -o Internet Connection [FATAL:install.pl]\\n\";\n e\ -xit ($EXIT_FAILURE);\n }\n \n if (&\ -pg_is_installed (\"wget\")){$pg=\"wget\"; $flag\ -=\"-O\";$arg=$wget_arg;}\n elsif (&pg_is_insta\ -lled (\"curl\")){$pg=\"curl\"; $flag=\"-o\";$ar\ -g=$curl_arg;}\n else\n {\n printf stderr \\ -"\\nERROR: No pg for remote file fetching [wget or\ - curl][FATAL]\\n\";\n exit ($EXIT_FAILURE);\n \ - }\n \n \n if (-e $file){unlink($file);}\\ -n $exit=system \"$pg $cmd $flag$file $arg\";\n \ - return $exit;\n }\n\nsub pg_is_installed\n {\\ -n my ($p, $dir)=(@_);\n my ($r,$m);\n my \ -($supported, $language, $compil);\n \n if ( \ -$PG{$p})\n {\n $language=$PG{$p}{language2};\\ -n $compil=$PG{$language}{compiler};\n }\n \ -\n if ( $compil eq \"CPAN\")\n {\n if ( sy\ -stem (\"perl -M$p -e 1\")==$EXIT_SUCCESS){return 1\ -;}\n else {return 0;}\n }\n elsif ($dir)\n\ - {\n if (-e \"$dir/$p\" || -e \"$dir/$p\\.exe\ -\"){return 1;}\n else {return 0;}\n }\n el\ -sif (-e \"$PLUGINS_DIR/$p\" || -e \"$PLUGINS_DIR/$\ -p.exe\"){return 1;}\n else\n {\n $r=`which\ - $p 2>/dev/null`;\n if ($r eq \"\"){return 0;}\n e\ -lse {return 1;}\n }\n return 0;\n }\nsub \ -install\n {\n my ($new_bin)=(@_);\n my ($co\ -pied, $report);\n\n \n if (!$ROOT_INSTALL)\n\ - {\n \n if (-e \"$BIN/t_coffee\"){`$CP $BIN/t\ -_coffee $INSTALL_DIR`};\n `cp $BIN/* $PLUGINS_DIR`\ -;\n $copied=1;\n }\n else\n {\n $copi\ -ed=&root_run (\"You must be root to finalize the i\ -nstallation\", \"$CP $BIN/* $INSTALL_DIR $SILENT\"\ -);\n }\n \n \n if ( !$copied)\n {\\ -n $report=\"*!!!!!! Installation unsuccesful.\ - The executables have been left in $BASE/bin\\n\";\ -\n }\n elsif ( $copied && $ROOT)\n {\n \ - $report=\"*------ Installation succesful. Your ex\ -ecutables have been copied in $new_bin and are on \ -your PATH\\n\";\n }\n elsif ( $copied && !$ROO\ -T)\n {\n $report= \"*!!!!!! T-Coffee and a\ -ssociated packages have been copied in: $new_bin\\\ -n\";\n $report.=\"*!!!!!! This address is NOT\ - in your PATH sytem variable\\n\";\n $report.\ -=\"*!!!!!! You can do so by adding the following l\ -ine in your ~/.bashrc file:\\n\";\n $report.=\ -\"*!!!!!! export PATH=$new_bin:\\$PATH\\n\";\n \ -}\n return $report;\n}\n\nsub sign_license_ni\n \ -{\n my $F=new FileHandle;\n open ($F, \"lice\ -nse.txt\");\n while (<$F>)\n {\n print \"$\ -_\";\n }\n close ($F);\n \n return;\\ -n }\n\nsub install_pg\n {\n my ($pg)=(@_);\n \ - my ($report, $previous, $language, $compiler, $\ -return);\n \n if (!$PG{$pg}{install}){return\ - 1;}\n \n $previous=&pg_is_installed ($pg);\\ -n \n if ($PG{$pg}{update_action} eq \"no_upd\ -ate\" && $previous)\n {\n $PG{$pg}{old}=1;\n \ -$PG{$pg}{new}=0;\n $return=1;\n }\n else\n\ - {\n $PG{$pg}{old}=$previous;\n \n if ($PG{$p\ -g} {language2} eq \"Perl\"){&install_perl_package \ -($pg);}\n elsif ($BINARIES_ONLY && &install_binary\ -_package ($pg)){$PG{$pg}{from_binary}=1;}\n elsif \ -(&install_source_package ($pg)){;}\n else \n {\n\ - \n if (!&supported_os($OS))\n {\n \ -print \"!!!!!!!! $pg compilation failed, binary un\ -supported for $OS\\n\"; \n }\n elsif (!(\ -$PG{$pg}{from_binary}=&install_binary_package ($pg\ -)))\n {\n print \"!!!!!!!! $pg compilation \ -and binary installation failed\\n\";\n }\n \ - }\n $PG{$pg}{new}=$return=&pg_is_installed ($pg,\ -$BIN);\n }\n\n \n return $return;\n }\\ -nsub install_perl_package\n {\n my ($pg)=(@_);\ -\n my ($report, $language, $compiler);\n \n \ - $language=$PG{$pg} {language2};\n $compiler=\ -$PG{$language}{compiler};\n \n if (!&pg_is_i\ -nstalled ($pg))\n {\n if ( $OS eq \"windows\"\ -){`perl -M$compiler -e 'install $pg'`;}\n elsif ( \ -$ROOT eq \"sudo\"){system (\"sudo perl -M$compiler\ - -e 'install $pg'\");}\n else {system (\"su root -\ -c perl -M$compiler -e 'install $pg'\");}\n }\\ -n return &pg_is_installed ($pg);\n }\n\n\n\nsu\ -b install_source_package\n {\n my ($pg)=(@_);\\ -n my ($report, $download, $arguments, $language\ -, $address, $name, $ext, $main_dir, $distrib);\n \ - my $wget_tmp=\"$TMP/wget.tmp\";\n my (@fl);\n\ - if ( -e \"$BIN/$pg\" || -e \"$BIN/$pg.exe\"){r\ -eturn 1;}\n \n if ($pg eq \"t_coffee\") {re\ -turn &install_t_coffee ($pg);}\n elsif ($pg e\ -q \"TMalign\"){return &install_TMalign ($pg);}\n\ - \n chdir $DISTRIBUTIONS;\n \n $downlo\ -ad=$PG{$pg}{source};\n \n if (($download =~/\ -tgz/))\n {\n ($address,$name,$ext)=($download\ -=~/(.+\\/)([^\\/]+)(\\.tgz)/);\n }\n elsif\ - (($download=~/tar\\.gz/))\n {\n ($address,$n\ -ame,$ext)=($download=~/(.+\\/)([^\\/]+)(\\.tar\\.g\ -z)/);\n }\n elsif (($download=~/tar/))\n \ - {\n ($address,$name,$ext)=($download=~/(.+\\/)\ -([^\\/]+)(\\.tar)/);\n }\n else\n {\n\ - ($address,$name)=($download=~/(.+\\/)([^\\/]+)/);\ -\n $ext=\"\";\n }\n $distrib=\"$name$ext\"\ -;\n \n if ( !-d $pg){mkdir $pg;}\n chdir \ -$pg;\n \n #get the distribution if available\\ -n if ( -e \"$DOWNLOAD_DIR/$distrib\")\n {\\ -n `$CP $DOWNLOAD_DIR/$distrib .`;\n }\n #U\ -NTAR and Prepare everything\n if (!-e \"$name.t\ -ar\" && !-e \"$name\")\n {\n &check_rm ($wget\ -_tmp);\n print \"\\n------- Downloading/Installing\ - $pg\\n\";\n if (!-e $distrib && &url2file (\"$dow\ -nload\", \"$wget_tmp\")==$EXIT_SUCCESS)\n {\n \ - \n `mv $wget_tmp $distrib`;\n `$CP $dist\ -rib $DOWNLOAD_DIR/`;\n }\n\n if (!-e $distrib)\n\ - {\n print \"!!!!!!! Download of $pg distrib\ -ution failed\\n\";\n print \"!!!!!!! Check Add\ -ress: $PG{$pg}{source}\\n\";\n return 0;\n }\ -\n print \"\\n------- unzipping/untaring $name\\n\\ -";\n if (($ext =~/z/))\n { \n &flush_command\ - (\"gunzip $name$ext\");\n \n }\n if (($ext \ -=~/tar/) || ($ext =~/tgz/))\n {\n &flush_com\ -mand(\"tar -xvf $name.tar\");\n }\n }\n \ -#Guess and enter the distribution directory\n @\ -fl=ls($p);\n foreach my $f (@fl)\n {\n if \ -(-d $f)\n {\n $main_dir=$f;\n }\n }\n\ - if (-d $main_dir)\n {chdir $main_dir;}\n \ -\n print \"\\n------- Compiling/Installing $pg\\ -\n\";\n `make clean $SILENT`;\n #sap\n if\ - ($pg eq \"sap\")\n {\n `rm *.o sap sap.exe \ -./util/aa/*.o ./util/wt/.o $SILENT`;\n &flush_com\ -mand (\"make $arguments sap\");\n &check_cp ($pg, \ -\"$BIN\");\n }\n elsif ($pg eq \"clustalw2\ -\")\n {\n &flush_command(\"./configure\");\n \ -&flush_command(\"make $arguments\");\n &check_cp (\ -\"./src/$pg\", \"$BIN\");\n \n }\n elsif (\ -$pg eq \"clustalw\")\n {\n &flush_command(\"m\ -ake $arguments clustalw\");\n `$CP $pg $BIN $SILEN\ -T`;\n }\n \n elsif ($pg eq \"mafft\")\n\ - {\n my $base=cwd();\n my $c;\n \n #compile c\ -ore\n mkpath (\"./mafft/bin\");\n mkpath (\"./maff\ -t/lib\");\n chdir \"$base/core\";\n `make clean $S\ -ILENT`;\n &flush_command (\"make $arguments\");\n \ -&flush_command (\"make install LIBDIR=../mafft/lib\ - BINDIR=../mafft/bin\");\n \n #compile extension\n\ - chdir \"$base/extensions\";\n `make clean $SILENT\ -`;\n &flush_command (\"make $arguments\");\n &flus\ -h_command (\"make install LIBDIR=../mafft/lib BIND\ -IR=../mafft/bin\");\n \n #put everything in mafft \ -and copy the coompiled stuff in bin\n chdir \"$bas\ -e\";\n if ($ROOT_INSTALL)\n {\n &root_run (\\ -"You Must be Roor to Install MAFFT\\n\", \"mkdir /\ -usr/local/mafft/;$CP mafft/lib/* /usr/local/mafft;\ -$CP mafft/lib/mafft* /usr/local/bin ;$CP mafft/bin\ -/mafft /usr/local/bin/; \");\n }\n else\n {\n \ - `$CP mafft/lib/* $BIN`;\n `$CP mafft/bin/\ -mafft $BIN`;\n }\n `tar -cvf mafft.tar mafft`;\\ -n `gzip mafft.tar`;\n `mv mafft.tar.gz $BIN`;\n \ - }\n elsif ( $pg eq \"dialign-tx\")\n {\\ -n my $f;\n my $base=cwd();\n\n chdir \"./source\";\ -\n &flush_command (\" make CPPFLAGS='-O3 -funroll-\ -loops' all\");\n \n chdir \"..\";\n &check_cp (\".\ -/source/$pg\", \"$BIN\");\n &check_cp (\"./source/\ -$pg\", \"$BIN/dialign-t\");\n }\n elsif ($\ -pg eq \"poa\")\n {\n &flush_command (\"make $\ -arguments poa\");\n &check_cp (\"$pg\", \"$BIN\");\ -\n }\n elsif ( $pg eq \"probcons\")\n \ - {\n `rm *.exe $SILENT`;\n &flush_command (\"make \ -$arguments probcons\");\n &check_cp(\"$pg\", \"$BI\ -N/$pg\");\n }\n elsif ( $pg eq \"probcons\\ -" || $pg eq \"probconsRNA\")\n {\n `rm *.exe \ -$SILENT`;\n &flush_command (\"make $arguments prob\ -cons\");\n &check_cp(\"probcons\", \"$BIN/$pg\");\\ -n }\n\n elsif ( $pg eq \"muscle\")\n \ - {\n `rm *.o muscle muscle.exe $SILENT`;\n &flush_\ -command (\"make $arguments all\");\n &check_cp(\"$\ -pg\", \"$BIN\");\n }\n elsif ( $pg eq \"pc\ -ma\")\n {\n &flush_command (\"make $arguments\ - pcma\");\n &check_cp(\"$pg\", \"$BIN\");\n }\ -\n elsif ($pg eq \"kalign\")\n {\n &flush_\ -command (\"./configure\");\n &flush_command(\"make\ - $arguments\");\n &check_cp (\"$pg\",$BIN);\n \ - }\n elsif ( $pg eq \"amap\")\n {\n chdir \ -\"align\";\n `make clean $SILENT`;\n &flush_comman\ -d (\"make $arguments all\");\n &check_cp (\"$pg\",\ - $BIN);\n }\n elsif ( $pg eq \"proda\")\n \ - {\n &flush_command (\"make $arguments all\");\ -\n &check_cp (\"$pg\", $BIN);\n }\n elsif \ -( $pg eq \"prank\")\n {\n &flush_command (\"m\ -ake $arguments all\");\n &check_cp (\"$pg\", $BIN)\ -;\n }\n elsif ( $pg eq \"mustang\")\n \ - {\n &flush_command (\"make $arguments all\");\n i\ -f ( $OS=~/windows/){&check_cp(\"./bin/MUSTANG_v.3\\ -", \"$BIN/mustang.exe\");}\n else {&check_cp(\"./b\ -in/MUSTANG_v.3\", \"$BIN/mustang\");}\n }\n \ - elsif ( $pg eq \"RNAplfold\")\n {\n &flush_\ -command(\"./configure\");\n &flush_command (\"make\ - $arguments all\");\n &check_cp(\"./Progs/RNAplfol\ -d\", \"$BIN\");\n }\n chdir $CDIR;\n re\ -turn &pg_is_installed ($pg, $BIN);\n }\n\nsub ins\ -tall_t_coffee\n {\n my ($pg)=(@_);\n my ($r\ -eport,$cflags, $arguments, $language, $compiler) ;\ -\n #1-Install T-Coffee\n chdir \"t_coffee_so\ -urce\";\n &flush_command (\"make clean\");\n \ - print \"\\n------- Compiling T-Coffee\\n\";\n \ -$language=$PG{$pg} {language2};\n $arguments=$P\ -G{$language}{arguments};\n if (!($arguments =~/\ -CFLAGS/)){$arguments .= \" CFLAGS=-O2 \";}\n\n \ -if ( $CC ne \"\"){&flush_command (\"make -i $argum\ -ents t_coffee\");}\n &check_cp ($pg, $BIN);\n \ - \n chdir $CDIR;\n return &pg_is_installed \ -($pg, $BIN);\n }\nsub install_TMalign\n {\n m\ -y ($pg)=(@_);\n my $report;\n chdir \"t_coff\ -ee_source\";\n print \"\\n------- Compiling TMa\ -lign\\n\";\n `rm TMalign TMalign.exe $SILENT`;\\ -n if ( $FC ne \"\"){&flush_command (\"make -i $\ -PG{Fortran}{arguments} TMalign\");}\n &check_cp\ - ($pg, $BIN);\n if ( !-e \"$BIN/$pg\" && pg_has\ -_binary_distrib ($pg))\n {\n print \"!!!!!!! \ -Compilation of $pg impossible. Will try to install\ - from binary\\n\";\n return &install_binary_packag\ -e ($pg);\n }\n chdir $CDIR;\n return &p\ -g_is_installed ($pg, $BIN);\n }\n\nsub pg_has_bin\ -ary_distrib\n {\n my ($pg)=(@_);\n if ($PG{\ -$pg}{windows}){return 1;}\n elsif ($PG{$pg}{osx\ -}){return 1;}\n elsif ($PG{$pg}{linux}){return \ -1;}\n return 0;\n }\nsub install_binary_packag\ -e\n {\n my ($pg)=(@_);\n my ($base,$report,\ -$name, $download, $arguments, $language, $dir);\n \ - my $isdir;\n &input_os();\n \n if (!&s\ -upported_os($OS)){return 0;}\n if ( $PG{$pg}{bi\ -nary}){$name=$PG{$pg}{binary};}\n else \n \ -{\n $name=$pg;\n if ( $OS eq \"windows\"){$name.=\\ -".exe\";}\n }\n \n $download=\"$WEB_BAS\ -E/Packages/Binaries/$OS/$name\";\n \n $base=\ -cwd();\n chdir $TMP;\n \n if (!-e $name)\\ -n {\n `rm x $SILENT`;\n if ( url2file(\"$down\ -load\",\"x\")==$EXIT_SUCCESS)\n {\n `mv x $n\ -ame`;\n }\n }\n \n if (!-e $name)\n \ - {\n print \"!!!!!!! $PG{$pg}{dname}: Download \ -of $pg binary failed\\n\";\n print \"!!!!!!! $PG{$\ -pg}{dname}: Check Address: $download\\n\";\n retur\ -n 0;\n }\n print \"\\n------- Installing $\ -pg\\n\";\n \n if ($name =~/tar\\.gz/)\n \ - {\n `gunzip $name`;\n `tar -xvf $pg.tar`;\n chdi\ -r $pg;\n if ( $pg eq \"mafft\")\n {\n if ($R\ -OOT_INSTALL)\n {\n &root_run (\"You Must be\ - Roor to Install MAFFT\\n\", \"$CP mafft/bin/* /us\ -r/local/mafft;mkdir /usr/local/mafft/; $CP mafft/l\ -ib/* /usr/local/bin/\");\n }\n else\n \ - {\n `$CP $TMP/$pg/bin/* $BIN $SILENT`;\n `$C\ -P $TMP/$pg/lib/* $BIN $SILENT`;\n }\n }\n \ -else\n {\n if (-e \"$TMP/$pg/data\"){`$CP $T\ -MP/$pg/data/* $TCM $SILENT`;}\n if (!($pg=~/\\\ -*/)){`rm -rf $pg`;}\n }\n }\n else\n \ - {\n &check_cp (\"$pg\", \"$BIN\");\n `chmod u+x \ -$BIN/$pg`; \n unlink ($pg);\n }\n chdir $b\ -ase;\n $PG{$pg}{from_binary}=1;\n return &pg\ -_is_installed ($pg, $BIN);\n }\n\nsub add_dir \n \ - {\n my $dir=@_[0];\n \n if (!-e $dir && \ -!-d $dir)\n {\n return mkpath ($dir);\n \ -}\n else\n {\n return 0;\n }\n }\nsu\ -b check_rm \n {\n my ($file)=(@_);\n \n \ -if ( -e $file)\n {\n return unlink($file);\n \ - }\n return 0;\n }\nsub check_cp\n {\n \ - my ($from, $to)=(@_);\n if ( !-e $from && -e \\ -"$from\\.exe\"){$from=\"$from\\.exe\";}\n if ( \ -!-e $from){return 0;}\n \n `$CP $from $t\ -o`;\n return 1;\n }\nsub check_file_list_exist\ -s \n {\n my ($base, @flist)=(@_);\n my $f;\\ -n\n foreach $f (@flist)\n {\n if ( !-e \"$\ -base/$f\"){return 0;}\n }\n return 1;\n }\ -\nsub ls\n {\n my $f=@_[0];\n my @fl;\n \ -chomp(@fl=`ls -1 $f`);\n return @fl;\n }\nsub \ -flush_command\n {\n my $command=@_[0];\n my\ - $F=new FileHandle;\n open ($F, \"$command|\");\ -\n while (<$F>){print \" --- $_\";}\n clo\ -se ($F);\n } \n\nsub input_installation_direct\ -ory\n {\n my $dir=@_[0];\n my $new;\n \n\ - print \"------- The current installation direc\ -tory is: [$dir]\\n\";\n print \"??????? Return \ -to keep the default or new value:\";\n \n if \ -($NO_QUESTION==0)\n {\n chomp ($new=);\ -\n while ( $new ne \"\" && !input_yes (\"You have \ -entered $new. Is this correct? ([y]/n):\"))\n {\\ -n print \"???????New installation directory:\"\ -;\n chomp ($new=);\n }\n $dir=($new e\ -q \"\")?$dir:$new;\n $dir=~s/\\/$//;\n }\n \ - \n if ( -d $dir){return $dir;}\n elsif (&ro\ -ot_run (\"You must be root to create $dir\",\"mkdi\ -r $dir\")==$EXIT_SUCCESS){return $dir;}\n else\\ -n {\n print \"!!!!!!! $dir could not be creat\ -ed\\n\";\n if ( $NO_QUESTION)\n {\n return \\ -"\";\n }\n elsif ( &input_yes (\"??????? Do you \ -want to provide a new directory([y]/n)?:\"))\n {\ -\n return input_installation_directory ($dir);\ -\n }\n else\n {\n return \"\";\n }\n \ - }\n \n }\nsub input_yes\n {\n my $questi\ -on =@_[0];\n my $answer;\n\n if ($NO_QUESTIO\ -N==1){return 1;}\n \n if ($question eq \"\")\ -{$question=\"??????? Do you wish to proceed ([y]/n\ -)?:\";}\n print $question;\n chomp($answer=l\ -c());\n if (($answer=~/^y/) || $answer e\ -q \"\"){return 1;}\n elsif ( ($answer=~/^n/)){r\ -eturn 0;}\n else\n {\n return input_yes($q\ -uestion);\n }\n }\nsub root_run\n {\n my\ - ($txt, $cmd)=(@_);\n \n if ( system ($cmd)=\ -=$EXIT_SUCCESS){return $EXIT_SUCCESS;}\n else \\ -n {\n print \"------- $txt\\n\";\n if ( $ROOT\ - eq \"sudo\"){return system (\"sudo $cmd\");}\n el\ -se {return system (\"su root -c \\\"$cmd\\\"\");}\\ -n }\n }\nsub get_root\n {\n if (&pg_is_i\ -nstalled (\"sudo\")){return \"sudo\";}\n else {\ -return \"su\";}\n }\n\nsub get_os\n {\n my $r\ -aw_os=`uname`;\n my $os;\n\n $raw_os=lc ($ra\ -w_os);\n \n if ($raw_os =~/cygwin/){$os=\"wi\ -ndows\";}\n elsif ($raw_os =~/linux/){$os=\"lin\ -ux\";}\n elsif ($raw_os =~/osx/){$os=\"macosx\"\ -;}\n elsif ($raw_os =~/darwin/){$os=\"macosx\";\ -}\n else\n {\n $os=$raw_os;\n }\n \ -return $os;\n }\nsub input_os\n {\n my $answe\ -r;\n if ($OS) {return $OS;}\n \n print \"\ -??????? which os do you use: [w]indows, [l]inux, [\ -m]acosx:?\";\n $answer=lc();\n\n if (\ -($answer=~/^m/)){$OS=\"macosx\";}\n elsif ( ($a\ -nswer=~/^w/)){$OS=\"windows\";}\n elsif ( ($ans\ -wer=~/^linux/)){$OS=\"linux\";}\n \n else\n \ - {\n return &input_os();\n }\n return \ -$OS;\n }\n\nsub supported_os\n {\n my ($os)=(\ -@_[0]);\n return $SUPPORTED_OS{$os};\n }\n \ -\n \n\n\nsub update_tclinkdb \n {\n my $fil\ -e =@_[0];\n my $name;\n my $F=new FileHandle\ -;\n my ($download, $address, $name, $l, $db);\n\ - \n if ( $file eq \"update\"){$file=$TCLINKD\ -B_ADDRESS;}\n \n if ( $file =~/http:\\/\\// \ -|| $file =~/ftp:\\/\\//)\n {\n ($address, $na\ -me)=($download=~/(.*)\\/([^\\/]+)$/);\n `rm x $SIL\ -ENT`;\n if (&url2file ($file,\"x\")==$EXIT_SUCCESS\ -)\n {\n print \"------- Susscessful upload o\ -f $name\";\n `mv x $name`;\n $file=$name;\\ -n }\n }\n open ($F, \"$file\");\n whi\ -le (<$F>)\n {\n my $l=$_;\n if (($l =~/^\\/\\\ -//) || ($db=~/^#/)){;}\n elsif ( !($l =~/\\w/)){;}\ -\n else\n {\n my @v=split (/\\s+/, $l);\n \ - if ( $l=~/^MODE/)\n {\n $MODE{$v[1]}{$v[2\ -]}=$v[3];\n }\n elsif ($l=~/^PG/)\n \ - {\n $PG{$v[1]}{$v[2]}=$v[3];\n }\n }\n \ - }\n close ($F);\n &post_process_PG();\n\ - return;\n }\n\n\n\nsub initialize_PG\n {\n \ - \n$PG{\"t_coffee\"}{\"4_TCOFFEE\"}=\"TCOFFEE\";\\ -n$PG{\"t_coffee\"}{\"type\"}=\"sequence_multiple_a\ -ligner\";\n$PG{\"t_coffee\"}{\"ADDRESS\"}=\"http:/\ -/www.tcoffee.org\";\n$PG{\"t_coffee\"}{\"language\\ -"}=\"C\";\n$PG{\"t_coffee\"}{\"language2\"}=\"C\";\ -\n$PG{\"t_coffee\"}{\"source\"}=\"http://www.tcoff\ -ee.org/Packages/T-COFFEE_distribution.tar.gz\";\n$\ -PG{\"t_coffee\"}{\"update_action\"}=\"always\";\n$\ -PG{\"t_coffee\"}{\"mode\"}=\"tcoffee,mcoffee,rcoff\ -ee,expresso,3dcoffee\";\n$PG{\"clustalw2\"}{\"4_TC\ -OFFEE\"}=\"CLUSTALW2\";\n$PG{\"clustalw2\"}{\"type\ -\"}=\"sequence_multiple_aligner\";\n$PG{\"clustalw\ -2\"}{\"ADDRESS\"}=\"http://www.clustal.org\";\n$PG\ -{\"clustalw2\"}{\"language\"}=\"C++\";\n$PG{\"clus\ -talw2\"}{\"language2\"}=\"CXX\";\n$PG{\"clustalw2\\ -"}{\"source\"}=\"http://www.clustal.org/download/2\ -.0.10/clustalw-2.0.10-src.tar.gz\";\n$PG{\"clustal\ -w2\"}{\"mode\"}=\"mcoffee,rcoffee\";\n$PG{\"clusta\ -lw\"}{\"4_TCOFFEE\"}=\"CLUSTALW\";\n$PG{\"clustalw\ -\"}{\"type\"}=\"sequence_multiple_aligner\";\n$PG{\ -\"clustalw\"}{\"ADDRESS\"}=\"http://www.clustal.or\ -g\";\n$PG{\"clustalw\"}{\"language\"}=\"C\";\n$PG{\ -\"clustalw\"}{\"language2\"}=\"C\";\n$PG{\"clustal\ -w\"}{\"source\"}=\"http://www.clustal.org/download\ -/1.X/ftp-igbmc.u-strasbg.fr/pub/ClustalW/clustalw1\ -.82.UNIX.tar.gz\";\n$PG{\"clustalw\"}{\"mode\"}=\"\ -mcoffee,rcoffee\";\n$PG{\"dialign-t\"}{\"4_TCOFFEE\ -\"}=\"DIALIGNT\";\n$PG{\"dialign-t\"}{\"type\"}=\"\ -sequence_multiple_aligner\";\n$PG{\"dialign-t\"}{\\ -"ADDRESS\"}=\"http://dialign-tx.gobics.de/\";\n$PG\ -{\"dialign-t\"}{\"DIR\"}=\"/usr/share/dialign-tx/\\ -";\n$PG{\"dialign-t\"}{\"language\"}=\"C\";\n$PG{\\ -"dialign-t\"}{\"language2\"}=\"C\";\n$PG{\"dialign\ --t\"}{\"source\"}=\"http://dialign-tx.gobics.de/DI\ -ALIGN-TX_1.0.1.tar.gz\";\n$PG{\"dialign-t\"}{\"mod\ -e\"}=\"mcoffee\";\n$PG{\"dialign-t\"}{\"binary\"}=\ -\"dialign-t\";\n$PG{\"dialign-tx\"}{\"4_TCOFFEE\"}\ -=\"DIALIGNTX\";\n$PG{\"dialign-tx\"}{\"type\"}=\"s\ -equence_multiple_aligner\";\n$PG{\"dialign-tx\"}{\\ -"ADDRESS\"}=\"http://dialign-tx.gobics.de/\";\n$PG\ -{\"dialign-tx\"}{\"DIR\"}=\"/usr/share/dialign-tx/\ -\";\n$PG{\"dialign-tx\"}{\"language\"}=\"C\";\n$PG\ -{\"dialign-tx\"}{\"language2\"}=\"C\";\n$PG{\"dial\ -ign-tx\"}{\"source\"}=\"http://dialign-tx.gobics.d\ -e/DIALIGN-TX_1.0.1.tar.gz\";\n$PG{\"dialign-tx\"}{\ -\"mode\"}=\"mcoffee\";\n$PG{\"dialign-tx\"}{\"bina\ -ry\"}=\"dialign-tx\";\n$PG{\"poa\"}{\"4_TCOFFEE\"}\ -=\"POA\";\n$PG{\"poa\"}{\"type\"}=\"sequence_multi\ -ple_aligner\";\n$PG{\"poa\"}{\"ADDRESS\"}=\"http:/\ -/www.bioinformatics.ucla.edu/poa/\";\n$PG{\"poa\"}\ -{\"language\"}=\"C\";\n$PG{\"poa\"}{\"language2\"}\ -=\"C\";\n$PG{\"poa\"}{\"source\"}=\"http://downloa\ -ds.sourceforge.net/poamsa/poaV2.tar.gz\";\n$PG{\"p\ -oa\"}{\"DIR\"}=\"/usr/share/\";\n$PG{\"poa\"}{\"FI\ -LE1\"}=\"blosum80.mat\";\n$PG{\"poa\"}{\"mode\"}=\\ -"mcoffee\";\n$PG{\"poa\"}{\"binary\"}=\"poa\";\n$P\ -G{\"probcons\"}{\"4_TCOFFEE\"}=\"PROBCONS\";\n$PG{\ -\"probcons\"}{\"type\"}=\"sequence_multiple_aligne\ -r\";\n$PG{\"probcons\"}{\"ADDRESS\"}=\"http://prob\ -cons.stanford.edu/\";\n$PG{\"probcons\"}{\"languag\ -e2\"}=\"CXX\";\n$PG{\"probcons\"}{\"language\"}=\"\ -C++\";\n$PG{\"probcons\"}{\"source\"}=\"http://pro\ -bcons.stanford.edu/probcons_v1_12.tar.gz\";\n$PG{\\ -"probcons\"}{\"mode\"}=\"mcoffee\";\n$PG{\"probcon\ -s\"}{\"binary\"}=\"probcons\";\n$PG{\"mafft\"}{\"4\ -_TCOFFEE\"}=\"MAFFT\";\n$PG{\"mafft\"}{\"type\"}=\\ -"sequence_multiple_aligner\";\n$PG{\"mafft\"}{\"AD\ -DRESS\"}=\"http://align.bmr.kyushu-u.ac.jp/mafft/o\ -nline/server/\";\n$PG{\"mafft\"}{\"language\"}=\"C\ -\";\n$PG{\"mafft\"}{\"language\"}=\"C\";\n$PG{\"ma\ -fft\"}{\"source\"}=\"http://align.bmr.kyushu-u.ac.\ -jp/mafft/software/mafft-6.603-with-extensions-src.\ -tgz\";\n$PG{\"mafft\"}{\"windows\"}=\"http://align\ -.bmr.kyushu-u.ac.jp/mafft/software/mafft-6.603-min\ -gw.tar\";\n$PG{\"mafft\"}{\"mode\"}=\"mcoffee,rcof\ -fee\";\n$PG{\"mafft\"}{\"binary\"}=\"mafft.tar.gz\\ -";\n$PG{\"muscle\"}{\"4_TCOFFEE\"}=\"MUSCLE\";\n$P\ -G{\"muscle\"}{\"type\"}=\"sequence_multiple_aligne\ -r\";\n$PG{\"muscle\"}{\"ADDRESS\"}=\"http://www.dr\ -ive5.com/muscle/\";\n$PG{\"muscle\"}{\"language\"}\ -=\"C++\";\n$PG{\"muscle\"}{\"language2\"}=\"GPP\";\ -\n$PG{\"muscle\"}{\"source\"}=\"http://www.drive5.\ -com/muscle/downloads3.6/muscle3.6_src.tar.gz\";\n$\ -PG{\"muscle\"}{\"windows\"}=\"http://www.drive5.co\ -m/muscle/downloads3.6/muscle3.6_win32.zip\";\n$PG{\ -\"muscle\"}{\"linux\"}=\"http://www.drive5.com/mus\ -cle/downloads3.6/muscle3.6_linux_ia32.tar.gz\";\n$\ -PG{\"muscle\"}{\"mode\"}=\"mcoffee,rcoffee\";\n$PG\ -{\"pcma\"}{\"4_TCOFFEE\"}=\"PCMA\";\n$PG{\"pcma\"}\ -{\"type\"}=\"sequence_multiple_aligner\";\n$PG{\"p\ -cma\"}{\"ADDRESS\"}=\"ftp://iole.swmed.edu/pub/PCM\ -A/\";\n$PG{\"pcma\"}{\"language\"}=\"C\";\n$PG{\"p\ -cma\"}{\"language2\"}=\"C\";\n$PG{\"pcma\"}{\"sour\ -ce\"}=\"ftp://iole.swmed.edu/pub/PCMA/pcma.tar.gz\\ -";\n$PG{\"pcma\"}{\"mode\"}=\"mcoffee\";\n$PG{\"ka\ -lign\"}{\"4_TCOFFEE\"}=\"KALIGN\";\n$PG{\"kalign\"\ -}{\"type\"}=\"sequence_multiple_aligner\";\n$PG{\"\ -kalign\"}{\"ADDRESS\"}=\"http://msa.cgb.ki.se\";\n\ -$PG{\"kalign\"}{\"language\"}=\"C\";\n$PG{\"kalign\ -\"}{\"language2\"}=\"C\";\n$PG{\"kalign\"}{\"sourc\ -e\"}=\"http://msa.cgb.ki.se/downloads/kalign/curre\ -nt.tar.gz\";\n$PG{\"kalign\"}{\"mode\"}=\"mcoffee\\ -";\n$PG{\"amap\"}{\"4_TCOFFEE\"}=\"AMAP\";\n$PG{\"\ -amap\"}{\"type\"}=\"sequence_multiple_aligner\";\n\ -$PG{\"amap\"}{\"ADDRESS\"}=\"http://bio.math.berke\ -ley.edu/amap/\";\n$PG{\"amap\"}{\"language\"}=\"C+\ -+\";\n$PG{\"amap\"}{\"language2\"}=\"CXX\";\n$PG{\\ -"amap\"}{\"source\"}=\"http://baboon.math.berkeley\ -.edu/amap/download/amap.2.2.tar.gz\";\n$PG{\"amap\\ -"}{\"mode\"}=\"mcoffee\";\n$PG{\"proda\"}{\"4_TCOF\ -FEE\"}=\"PRODA\";\n$PG{\"proda\"}{\"type\"}=\"sequ\ -ence_multiple_aligner\";\n$PG{\"proda\"}{\"ADDRESS\ -\"}=\"http://proda.stanford.edu\";\n$PG{\"proda\"}\ -{\"language\"}=\"C++\";\n$PG{\"proda\"}{\"language\ -2\"}=\"CXX\";\n$PG{\"proda\"}{\"source\"}=\"http:/\ -/proda.stanford.edu/proda_1_0.tar.gz\";\n$PG{\"pro\ -da\"}{\"mode\"}=\"mcoffee\";\n$PG{\"prank\"}{\"4_T\ -COFFEE\"}=\"PRANK\";\n$PG{\"prank\"}{\"type\"}=\"s\ -equence_multiple_aligner\";\n$PG{\"prank\"}{\"ADDR\ -ESS\"}=\"http://www.ebi.ac.uk/goldman-srv/prank/\"\ -;\n$PG{\"prank\"}{\"language\"}=\"C++\";\n$PG{\"pr\ -ank\"}{\"language2\"}=\"CXX\";\n$PG{\"prank\"}{\"s\ -ource\"}=\"http://www.ebi.ac.uk/goldman-srv/prank/\ -src/old/prank.src.081202.tgz\";\n$PG{\"prank\"}{\"\ -mode\"}=\"mcoffee\";\n$PG{\"sap\"}{\"4_TCOFFEE\"}=\ -\"SAP\";\n$PG{\"sap\"}{\"type\"}=\"structure_pairw\ -ise_aligner\";\n$PG{\"sap\"}{\"ADDRESS\"}=\"http:/\ -/mathbio.nimr.mrc.ac.uk/wiki/Software\";\n$PG{\"sa\ -p\"}{\"language\"}=\"C\";\n$PG{\"sap\"}{\"language\ -2\"}=\"C\";\n$PG{\"sap\"}{\"source\"}=\"http://www\ -.tcoffee.org/Packages/sap_distribution_TCC_0.6.tar\ -.gz\";\n$PG{\"sap\"}{\"mode\"}=\"expresso,3dcoffee\ -\";\n$PG{\"TMalign\"}{\"4_TCOFFEE\"}=\"TMALIGN\";\\ -n$PG{\"TMalign\"}{\"type\"}=\"structure_pairwise_a\ -ligner\";\n$PG{\"TMalign\"}{\"ADDRESS\"}=\"http://\ -zhang.bioinformatics.ku.edu/TM-align/TMalign.f\";\\ -n$PG{\"TMalign\"}{\"language\"}=\"Fortran\";\n$PG{\ -\"TMalign\"}{\"language2\"}=\"Fortran\";\n$PG{\"TM\ -align\"}{\"source\"}=\"http://zhang.bioinformatics\ -.ku.edu/TM-align/TMalign.f\";\n$PG{\"TMalign\"}{\"\ -linux\"}=\"http://zhang.bioinformatics.ku.edu/TM-a\ -lign/TMalign_32.gz\";\n$PG{\"TMalign\"}{\"mode\"}=\ -\"expresso,3dcoffee\";\n$PG{\"mustang\"}{\"4_TCOFF\ -EE\"}=\"MUSTANG\";\n$PG{\"mustang\"}{\"type\"}=\"s\ -tructure_pairwise_aligner\";\n$PG{\"mustang\"}{\"A\ -DDRESS\"}=\"http://www.cs.mu.oz.au/~arun/mustang\"\ -;\n$PG{\"mustang\"}{\"language\"}=\"C++\";\n$PG{\"\ -mustang\"}{\"language2\"}=\"CXX\";\n$PG{\"mustang\\ -"}{\"source\"}=\"http://www.cs.mu.oz.au/~arun/must\ -ang/mustang_v.3.tgz\";\n$PG{\"mustang\"}{\"mode\"}\ -=\"expresso,3dcoffee\";\n$PG{\"lsqman\"}{\"4_TCOFF\ -EE\"}=\"LSQMAN\";\n$PG{\"lsqman\"}{\"type\"}=\"str\ -ucture_pairwise_aligner\";\n$PG{\"lsqman\"}{\"ADDR\ -ESS\"}=\"empty\";\n$PG{\"lsqman\"}{\"language\"}=\\ -"empty\";\n$PG{\"lsqman\"}{\"language2\"}=\"empty\\ -";\n$PG{\"lsqman\"}{\"source\"}=\"empty\";\n$PG{\"\ -lsqman\"}{\"update_action\"}=\"never\";\n$PG{\"lsq\ -man\"}{\"mode\"}=\"expresso,3dcoffee\";\n$PG{\"ali\ -gn_pdb\"}{\"4_TCOFFEE\"}=\"ALIGN_PDB\";\n$PG{\"ali\ -gn_pdb\"}{\"type\"}=\"structure_pairwise_aligner\"\ -;\n$PG{\"align_pdb\"}{\"ADDRESS\"}=\"empty\";\n$PG\ -{\"align_pdb\"}{\"language\"}=\"empty\";\n$PG{\"al\ -ign_pdb\"}{\"language2\"}=\"empty\";\n$PG{\"align_\ -pdb\"}{\"source\"}=\"empty\";\n$PG{\"align_pdb\"}{\ -\"update_action\"}=\"never\";\n$PG{\"align_pdb\"}{\ -\"mode\"}=\"expresso,3dcoffee\";\n$PG{\"fugueali\"\ -}{\"4_TCOFFEE\"}=\"FUGUE\";\n$PG{\"fugueali\"}{\"t\ -ype\"}=\"structure_pairwise_aligner\";\n$PG{\"fugu\ -eali\"}{\"ADDRESS\"}=\"http://www-cryst.bioc.cam.a\ -c.uk/fugue/download.html\";\n$PG{\"fugueali\"}{\"l\ -anguage\"}=\"empty\";\n$PG{\"fugueali\"}{\"languag\ -e2\"}=\"empty\";\n$PG{\"fugueali\"}{\"source\"}=\"\ -empty\";\n$PG{\"fugueali\"}{\"update_action\"}=\"n\ -ever\";\n$PG{\"fugueali\"}{\"mode\"}=\"expresso,3d\ -coffee\";\n$PG{\"dalilite.pl\"}{\"4_TCOFFEE\"}=\"D\ -ALILITEc\";\n$PG{\"dalilite.pl\"}{\"type\"}=\"stru\ -cture_pairwise_aligner\";\n$PG{\"dalilite.pl\"}{\"\ -ADDRESS\"}=\"built_in\";\n$PG{\"dalilite.pl\"}{\"A\ -DDRESS2\"}=\"http://www.ebi.ac.uk/Tools/webservice\ -s/services/dalilite\";\n$PG{\"dalilite.pl\"}{\"lan\ -guage\"}=\"Perl\";\n$PG{\"dalilite.pl\"}{\"languag\ -e2\"}=\"Perl\";\n$PG{\"dalilite.pl\"}{\"source\"}=\ -\"empty\";\n$PG{\"dalilite.pl\"}{\"update_action\"\ -}=\"never\";\n$PG{\"dalilite.pl\"}{\"mode\"}=\"exp\ -resso,3dcoffee\";\n$PG{\"probconsRNA\"}{\"4_TCOFFE\ -E\"}=\"PROBCONSRNA\";\n$PG{\"probconsRNA\"}{\"type\ -\"}=\"RNA_multiple_aligner\";\n$PG{\"probconsRNA\"\ -}{\"ADDRESS\"}=\"http://probcons.stanford.edu/\";\\ -n$PG{\"probconsRNA\"}{\"language\"}=\"C++\";\n$PG{\ -\"probconsRNA\"}{\"language2\"}=\"CXX\";\n$PG{\"pr\ -obconsRNA\"}{\"source\"}=\"http://probcons.stanfor\ -d.edu/probconsRNA.tar.gz\";\n$PG{\"probconsRNA\"}{\ -\"mode\"}=\"mcoffee,rcoffee\";\n$PG{\"sfold\"}{\"4\ -_TCOFFEE\"}=\"CONSAN\";\n$PG{\"sfold\"}{\"type\"}=\ -\"RNA_pairwise_aligner\";\n$PG{\"sfold\"}{\"ADDRES\ -S\"}=\"http://selab.janelia.org/software/consan/\"\ -;\n$PG{\"sfold\"}{\"language\"}=\"empty\";\n$PG{\"\ -sfold\"}{\"language2\"}=\"empty\";\n$PG{\"sfold\"}\ -{\"source\"}=\"empty\";\n$PG{\"sfold\"}{\"update_a\ -ction\"}=\"never\";\n$PG{\"sfold\"}{\"mode\"}=\"rc\ -offee\";\n$PG{\"RNAplfold\"}{\"4_TCOFFEE\"}=\"RNAP\ -LFOLD\";\n$PG{\"RNAplfold\"}{\"type\"}=\"RNA_secon\ -darystructure_predictor\";\n$PG{\"RNAplfold\"}{\"A\ -DDRESS\"}=\"http://www.tbi.univie.ac.at/~ivo/RNA/\\ -";\n$PG{\"RNAplfold\"}{\"language\"}=\"C\";\n$PG{\\ -"RNAplfold\"}{\"language2\"}=\"C\";\n$PG{\"RNAplfo\ -ld\"}{\"source\"}=\"http://www.tbi.univie.ac.at/~i\ -vo/RNA/ViennaRNA-1.7.2.tar.gz\";\n$PG{\"RNAplfold\\ -"}{\"mode\"}=\"rcoffee\";\n$PG{\"hmmtop\"}{\"4_TCO\ -FFEE\"}=\"HMMTOP\";\n$PG{\"hmmtop\"}{\"type\"}=\"p\ -rotein_secondarystructure_predictor\";\n$PG{\"hmmt\ -op\"}{\"ADDRESS\"}=\"www.enzim.hu/hmmtop/\";\n$PG{\ -\"hmmtop\"}{\"language\"}=\"C\";\n$PG{\"hmmtop\"}{\ -\"language2\"}=\"C\";\n$PG{\"hmmtop\"}{\"source\"}\ -=\"empty\";\n$PG{\"hmmtop\"}{\"update_action\"}=\"\ -never\";\n$PG{\"hmmtop\"}{\"mode\"}=\"tcoffee\";\n\ -$PG{\"gorIV\"}{\"4_TCOFFEE\"}=\"GOR4\";\n$PG{\"gor\ -IV\"}{\"type\"}=\"protein_secondarystructure_predi\ -ctor\";\n$PG{\"gorIV\"}{\"ADDRESS\"}=\"http://mig.\ -jouy.inra.fr/logiciels/gorIV/\";\n$PG{\"gorIV\"}{\\ -"language\"}=\"C\";\n$PG{\"gorIV\"}{\"language2\"}\ -=\"C\";\n$PG{\"gorIV\"}{\"source\"}=\"http://mig.j\ -ouy.inra.fr/logiciels/gorIV/GOR_IV.tar.gz\";\n$PG{\ -\"gorIV\"}{\"update_action\"}=\"never\";\n$PG{\"go\ -rIV\"}{\"mode\"}=\"tcoffee\";\n$PG{\"wublast.pl\"}\ -{\"4_TCOFFEE\"}=\"EBIWUBLASTc\";\n$PG{\"wublast.pl\ -\"}{\"type\"}=\"protein_homology_predictor\";\n$PG\ -{\"wublast.pl\"}{\"ADDRESS\"}=\"built_in\";\n$PG{\\ -"wublast.pl\"}{\"ADDRESS2\"}=\"http://www.ebi.ac.u\ -k/Tools/webservices/services/wublast\";\n$PG{\"wub\ -last.pl\"}{\"language\"}=\"Perl\";\n$PG{\"wublast.\ -pl\"}{\"language2\"}=\"Perl\";\n$PG{\"wublast.pl\"\ -}{\"source\"}=\"empty\";\n$PG{\"wublast.pl\"}{\"up\ -date_action\"}=\"never\";\n$PG{\"wublast.pl\"}{\"m\ -ode\"}=\"psicoffee,expresso,3dcoffee\";\n$PG{\"bla\ -stpgp.pl\"}{\"4_TCOFFEE\"}=\"EBIBLASTPGPc\";\n$PG{\ -\"blastpgp.pl\"}{\"type\"}=\"protein_homology_pred\ -ictor\";\n$PG{\"blastpgp.pl\"}{\"ADDRESS\"}=\"buil\ -t_in\";\n$PG{\"blastpgp.pl\"}{\"ADDRESS2\"}=\"http\ -://www.ebi.ac.uk/Tools/webservices/services/blastp\ -gp\";\n$PG{\"blastpgp.pl\"}{\"language\"}=\"Perl\"\ -;\n$PG{\"blastpgp.pl\"}{\"language2\"}=\"Perl\";\n\ -$PG{\"blastpgp.pl\"}{\"source\"}=\"empty\";\n$PG{\\ -"blastpgp.pl\"}{\"update_action\"}=\"never\";\n$PG\ -{\"blastpgp.pl\"}{\"mode\"}=\"psicoffee,expresso,3\ -dcoffee\";\n$PG{\"blastcl3\"}{\"4_TCOFFEE\"}=\"NCB\ -IWEBBLAST\";\n$PG{\"blastcl3\"}{\"type\"}=\"protei\ -n_homology_predictor\";\n$PG{\"blastcl3\"}{\"ADDRE\ -SS\"}=\"ftp://ftp.ncbi.nih.gov/blast/executables/L\ -ATEST\";\n$PG{\"blastcl3\"}{\"language\"}=\"C\";\n\ -$PG{\"blastcl3\"}{\"language2\"}=\"C\";\n$PG{\"bla\ -stcl3\"}{\"source\"}=\"empty\";\n$PG{\"blastcl3\"}\ -{\"update_action\"}=\"never\";\n$PG{\"blastcl3\"}{\ -\"mode\"}=\"psicoffee,expresso,3dcoffee\";\n$PG{\"\ -blastpgp\"}{\"4_TCOFFEE\"}=\"NCBIBLAST\";\n$PG{\"b\ -lastpgp\"}{\"type\"}=\"protein_homology_predictor\\ -";\n$PG{\"blastpgp\"}{\"ADDRESS\"}=\"ftp://ftp.ncb\ -i.nih.gov/blast/executables/LATEST\";\n$PG{\"blast\ -pgp\"}{\"language\"}=\"C\";\n$PG{\"blastpgp\"}{\"l\ -anguage2\"}=\"C\";\n$PG{\"blastpgp\"}{\"source\"}=\ -\"empty\";\n$PG{\"blastpgp\"}{\"update_action\"}=\\ -"never\";\n$PG{\"blastpgp\"}{\"mode\"}=\"psicoffee\ -,expresso,3dcoffee\";\n$PG{\"SOAP::Lite\"}{\"4_TCO\ -FFEE\"}=\"SOAPLITE\";\n$PG{\"SOAP::Lite\"}{\"type\\ -"}=\"library\";\n$PG{\"SOAP::Lite\"}{\"ADDRESS\"}=\ -\"http://cpansearch.perl.org/src/MKUTTER/SOAP-Lite\ --0.710.08/Makefile.PL\";\n$PG{\"SOAP::Lite\"}{\"la\ -nguage\"}=\"Perl\";\n$PG{\"SOAP::Lite\"}{\"languag\ -e2\"}=\"Perl\";\n$PG{\"SOAP::Lite\"}{\"source\"}=\\ -"empty\";\n$PG{\"SOAP::Lite\"}{\"mode\"}=\"psicoff\ -ee,expresso,3dcoffee\";\n$MODE{\"tcoffee\"}{\"name\ -\"}=\"tcoffee\";\n$MODE{\"rcoffee\"}{\"name\"}=\"r\ -coffee\";\n$MODE{\"3dcoffee\"}{\"name\"}=\"3dcoffe\ -e\";\n$MODE{\"mcoffee\"}{\"name\"}=\"mcoffee\";\n$\ -MODE{\"expresso\"}{\"name\"}=\"expresso\";\n\n\n$P\ -G{C}{compiler}=\"gcc\";\n$PG{C}{compiler_flag}=\"C\ -C\";\n$PG{C}{options}=\"\";\n$PG{C}{options_flag}=\ -\"CFLAGS\";\n$PG{C}{type}=\"compiler\";\n\n$PG{\"C\ -XX\"}{compiler}=\"g++\";\n$PG{\"CXX\"}{compiler_fl\ -ag}=\"CXX\";\n$PG{\"CXX\"}{options}=\"\";\n$PG{\"C\ -XX\"}{options_flag}=\"CXXFLAGS\";\n$PG{CXX}{type}=\ -\"compiler\";\n\n$PG{\"CPP\"}{compiler}=\"g++\";\n\ -$PG{\"CPP\"}{compiler_flag}=\"CPP\";\n$PG{\"CPP\"}\ -{options}=\"\";\n$PG{\"CPP\"}{options_flag}=\"CPPF\ -LAGS\";\n$PG{CPP}{type}=\"compiler\";\n\n$PG{\"GPP\ -\"}{compiler}=\"g++\";\n$PG{\"GPP\"}{compiler_flag\ -}=\"GPP\";\n$PG{\"GPP\"}{options}=\"\";\n$PG{\"GPP\ -\"}{options_flag}=\"CFLAGS\";\n$PG{GPP}{type}=\"co\ -mpiler\";\n\n$PG{Fortran}{compiler}=\"g77\";\n$PG{\ -Fortran}{compiler_flag}=\"FCC\";\n$PG{Fortran}{typ\ -e}=\"compiler\";\n\n$PG{Perl}{compiler}=\"CPAN\";\\ -n$PG{Perl}{type}=\"compiler\";\n\n$SUPPORTED_OS{ma\ -cox}=\"Macintosh\";\n$SUPPORTED_OS{linux}=\"Linux\\ -";\n$SUPPORTED_OS{windows}=\"Cygwin\";\n\n\n\n$MOD\ -E{t_coffee}{description}=\" for regular multiple s\ -equence alignments\";\n$MODE{rcoffee} {description\ -}=\" for RNA multiple sequence alignments\";\n\n$M\ -ODE{psicoffee} {description}=\" for Homology Exten\ -ded multiple sequence alignments\";\n$MODE{express\ -o}{description}=\" for very accurate structure bas\ -ed multiple sequence alignments\";\n$MODE{\"3dcoff\ -ee\"}{description}=\" for multiple structure align\ -ments\";\n$MODE{mcoffee} {description}=\" for comb\ -ining alternative multiple sequence alignment pack\ -ages\\n------- into a unique meta-package. The ins\ -taller will upload several MSA packages and compil\ -e them\\n\n\";\n\n\n&post_process_PG();\nreturn;\n\ -}\n\nsub post_process_PG\n {\n my $p;\n \n \ - %PG=&name2dname (%PG);\n %MODE=&name2dname(%\ -MODE);\n foreach $p (keys(%PG)){if ( $PG{$p}{ty\ -pe} eq \"compiler\"){$PG{$p}{update_action}=\"neve\ -r\";}}\n \n }\n\nsub name2dname\n {\n my (\ -%L)=(@_);\n my ($l, $ml);\n \n foreach my\ - $pg (keys(%L))\n {\n $l=length ($pg);\n if (\ - $l>$ml){$ml=$l;}\n }\n $ml+=1;\n forea\ -ch my $pg (keys(%L))\n {\n my $name;\n $l=$ml\ --length ($pg);\n $name=$pg;\n for ( $b=0; $b<$l; $\ -b++)\n {\n $name .=\" \";\n }\n $L{$pg}{dn\ -ame}=$name;\n }\n return %L;\n }\n\nsub e\ -nv_file2putenv\n {\n my $f=@_[0];\n my $F=n\ -ew FileHandle;\n my $n;\n \n open ($F, \"\ -$f\");\n while (<$F>)\n {\n my $line=$_;\n\ - my($var, $value)=($_=~/(\\S+)\\=(\\S*)/);\n $ENV{\ -$var}=$value;\n $ENV_SET{$var}=1;\n $n++;\n }\ -\n close ($F);\n return $n;\n }\n\n","use E\ -nv;\nuse Cwd;\n@suffix=(\"tmp\", \"temp\", \"cache\ -\", \"t_coffee\", \"core\", \"tcoffee\");\n\nif ($\ -#ARGV==-1)\n {\n print \"clean_cache.pl -file \ - -dir= -size=\\n0: unlimited -1 always.\\nWill only clean dir\ -ectories matching:[\";\n foreach $k(@suffix){pr\ -int \"*$k* \";}\n print \"]\\n\";\n exit (EX\ -IT_FAILURE);\n }\n\n$cl=join (\" \",@ARGV);\nif (\ -($cl=~/\\-no_action/))\n {\n exit (EXIT_SUCCES\ -S);\n }\n\nif (($cl=~/\\-debug/))\n {\n $DEBU\ -G=1;\n }\nelse\n {\n $DEBUG=0;\n }\n\nif (($\ -cl=~/\\-dir=(\\S+)/))\n {\n $dir=$1;\n }\nels\ -e\n {\n $dir=\"./\";\n }\n\nif ($cl=~/\\-file\ -=(\\S+)/)\n {\n $file=$1;\n }\nelse\n {\n \ - $file=0;\n }\n\nif ($cl=~/\\-size=(\\S+)/)\n {\\ -n $max_size=$1;\n }\nelse\n {\n $max_size=\ -0;#unlimited\n }\nif ($cl=~/\\-force/)\n {\n \ -$force=1;\n }\nelse\n {\n $force=0;\n }\n\ni\ -f ($cl=~/\\-age=(\\S+)/)\n {\n $max_age=$1;\n \ - }\nelse\n {\n $max_age=0;#unlimited\n }\n\n$\ -max_size*=1000000;\nif ( ! -d $dir)\n {\n prin\ -t STDERR \"\\nCannot process $dir: does not exist \ -\\n\";\n exit (EXIT_FAILURE);\n }\n\nif ( !($d\ -ir=~/^\\//))\n {\n $base=cwd();\n $dir=\"$b\ -ase/$dir\";\n }\n\n$proceed=0;\nforeach $s (@suff\ -ix)\n {\n \n if (($dir=~/$s/)){$proceed=1;}\ -\n $s=uc ($s);\n if (($dir=~/$s/)){$proceed=\ -1;}\n }\nif ( $proceed==0)\n {\n print STDERR\ - \"Clean_cache.pl can only clean directories whose\ - absolute path name contains the following strings\ -:\";\n foreach $w (@suffix) {print STDERR \"$w \ -\";$w=lc($w); print STDERR \"$w \";}\n print ST\ -DERR \"\\nCannot process $dir\\n\";\n exit (EXI\ -T_FAILURE);\n }\n\n$name_file=\"$dir/name_file.tx\ -t\";\n$size_file=\"$dir/size_file.txt\";\nif ( $fo\ -rce){&create_ref_file ($dir,$name_file,$size_file)\ -;}\nif ($file){&add_file ($dir, $name_file, $size_\ -file, $file);}\n&clean_dir ($dir, $name_file, $siz\ -e_file, $max_size,$max_age);\nexit (EXIT_SUCCESS);\ -\n\nsub clean_dir \n {\n my ($dir, $name_file,\ - $size_file, $max_size, $max_age)=@_;\n my ($to\ -t_size, $size, $f, $s);\n\n \n $tot_size=&get_\ -tot_size ($dir, $name_file, $size_file);\n\n if\ - ( $tot_size<=$max_size){return ;}\n else {$max\ -_size/=2;}\n \n #recreate the name file in c\ -ase some temprary files have not been properly reg\ -istered\n &create_ref_file ($dir, $name_file, $\ -size_file, $max_age);\n \n $new_name_file=&vtm\ -pnam();\n open (R, \"$name_file\");\n open (\ -W, \">$new_name_file\");\n while ()\n {\ -\n my $line=$_;\n \n ($f, $s)=($line=~/(\\S+) (\\S\ -+)/);\n if ( !($f=~/\\S/)){next;}\n \n elsif ($max\ -_size && $tot_size>=$max_size && !($f=~/name_file/\ -))\n {\n remove ( \"$dir/$f\");\n $tot_s\ -ize-=$s;\n }\n elsif ( $max_age && -M(\"$dir/$f\\ -")>=$max_age)\n {\n remove ( \"$dir/$f\");\n\ - $tot_size-=$s;\n }\n else\n {\n print\ - W \"$f $s\\n\";\n }\n }\n close (R);\n \ - close (W);\n open (F, \">$size_file\");\n \ - print F \"$tot_size\";\n if ( -e $new_name_fil\ -e){`mv $new_name_file $name_file`;}\n close (F)\ -;\n }\nsub get_tot_size\n {\n my ($dir, $name\ -_file, $size_file)=@_;\n my $size;\n \n i\ -f ( !-d $dir){return 0;}\n if ( !-e $name_file)\ -\n {\n \n &create_ref_file ($dir, $name_file,\ - $size_file);\n }\n open (F, \"$size_file\\ -");\n $size=;\n close (F);\n chomp ($s\ -ize);\n return $size;\n }\nsub size \n {\n \ - my $f=@_[0];\n\n if ( !-d $f){return -s($f);}\\ -n else {return &dir2size($f);}\n }\nsub dir2si\ -ze\n {\n my $d=@_[0];\n my ($s, $f);\n \\ -n if ( !-d $d) {return 0;}\n \n foreach $\ -f (&dir2list ($d))\n {\n if ( -d $f){$s+=&dir\ -2size (\"$d/$f\");}\n else {$s+= -s \"$dir/$f\";}\\ -n }\n return $s;\n }\n\nsub remove \n {\\ -n my $file=@_[0];\n my ($f);\n \n debu\ -g_print( \"--- $file ---\\n\");\n if (($file eq\ - \".\") || ($file eq \"..\") || ($file=~/\\*/)){re\ -turn EXIT_FAILURE;}\n elsif ( !-d $file)\n \ - {\n debug_print (\"unlink $file\\n\");\n if (-e $\ -file){unlink ($file);}\n }\n elsif ( -d $f\ -ile)\n {\n debug_print (\"++++++++ $file ++++\ -+++\\n\");\n foreach $f (&dir2list($file))\n {\n\ - &remove (\"$file/$f\");\n }\n debug_print (\ -\"rmdir $file\\n\");\n rmdir $file;\n }\n \ -else\n {\n debug_print (\"????????? $file ???\ -?????\\n\");\n }\n return EXIT_SUCCESS;\n \ - }\n\nsub dir2list\n {\n my $dir=@_[0];\n m\ -y (@list1, @list2,@list3, $l);\n\n opendir (DIR\ -,$dir);\n @list1=readdir (DIR);\n closedir (\ -DIR);\n \n foreach $l (@list1)\n {\n if\ - ( $l ne \".\" && $l ne \"..\"){@list2=(@list2, $l\ -);}\n }\n @list3 = sort { (-M \"$dir/$list\ -2[$b]\") <=> (-M \"$dir/$list2[$a]\")} @list2;\n \ - return @list3;\n \n }\n\nsub debug_print\n \ -{\n \n if ($DEBUG==1){print @_;}\n \n }\\ -nsub create_ref_file\n {\n my ($dir,$name_file\ -,$size_file)=@_;\n my ($f, $s, $tot_size, @l);\\ -n \n if ( !-d $dir){return;}\n \n @l=&\ -dir2list ($dir);\n open (F, \">$name_file\");\n\ - foreach $f (@l)\n {\n $s=&size(\"$dir/$f\\ -");\n $tot_size+=$s;\n print F \"$f $s\\n\";\n \ - }\n &myecho ($tot_size, \">$size_file\");\n \ - close (F);\n }\nsub add_file \n {\n my ($di\ -r,$name_file,$size_file,$file)=@_;\n my ($s, $t\ -ot_size);\n \n if ( !-d $dir) {return;}\n \ - if ( !-e \"$dir/$file\" ) {return;}\n if ( !\ --e $name_file){&create_ref_file ($dir,$name_file,$\ -size_file);}\n \n $s=&size(\"$dir/$file\ -\");\n open (F, \">>$name_file\");\n print F\ - \"$file\\n\";\n close (F);\n\n $tot_size=&g\ -et_tot_size ($dir,$name_file,$size_file);\n $to\ -t_size+=$s;\n &myecho ($tot_size, \">$size_file\ -\");\n \n }\n \nsub myecho\n {\n my ($stri\ -ng, $file)=@_;\n open (ECHO, $file) || die;\n \ - print ECHO \"$string\";\n close (ECHO);\n }\\ -n \n \n \nsub vtmpnam\n {\n my $tmp_file_n\ -ame;\n $tmp_name_counter++;\n $tmp_file_name\ -=\"tmp_file_for_clean_cache_pdb$$.$tmp_name_counte\ -r\";\n $tmp_file_list[$ntmp_file++]=$tmp_file_n\ -ame;\n if ( -e $tmp_file_name) {return &vtmpnam\ - ();}\n else {return $tmp_file_name;}\n }\n","\ -\n$t_coffee=\"t_coffee\";\n\nforeach $value ( @ARG\ -V)\n {\n $seq_file=$seq_file.\" \".$value;\n \ -}\n\n$name=$ARGV[0];\n$name=~s/\\.[^\\.]*$//;\n$li\ -b_name=\"$name.mocca_lib\";\n$type=`t_coffee $seq_\ -file -get_type -quiet`;\nchop ($type);\n\nif ( $ty\ -pe eq \"PROTEIN\"){$lib_mode=\"lalign_rs_s_pair -l\ -align_n_top 20\";}\nelsif ( $type eq\"DNA\"){$lib_\ -mode=\"lalign_rs_s_dna_pair -lalign_n_top 40\";}\n\ -\nif ( !(-e $lib_name))\n {\n \n $command=\"$t\ -_coffee -mocca -seq_weight=no -cosmetic_penalty=0 \ --mocca_interactive -in $lib_mode -out_lib $lib_nam\ -e -infile $seq_file\";\n \n }\nelsif ( (-e $lib_\ -name))\n {\n $command=\"$t_coffee -mocca -seq_we\ -ight=no -cosmetic_penalty=0 -mocca_interactive -in\ - $lib_name -infile $seq_file\";\n \n }\n\nsystem\ - ($command);\n\nexit;\n\n","my $WSDL = 'http://www\ -.ebi.ac.uk/Tools/webservices/wsdl/WSDaliLite.wsdl'\ -;\n\nuse SOAP::Lite;\nuse Data::Dumper;\nuse Getop\ -t::Long qw(:config no_ignore_case bundling);\nuse \ -File::Basename;\n\nmy $checkInterval = 5;\n\nmy %p\ -arams=(\n 'async' => '1', # Use async mode and\ - simulate sync mode in client\n );\nGetOptions\ -(\n 'pdb1=s' => \\$params{'sequence1'},\n \ - 'chainid1=s' => \\$params{'chainid1'},\n 'pdb\ -2=s' => \\$params{'sequence2'},\n 'chainid2\ -=s' => \\$params{'chainid2'},\n \"help|h\" => \ -\\$help, # Usage info\n \"async|a\" => \\$asyn\ -c, # Asynchronous submission\n \"polljob\" => \ -\\$polljob, # Get results\n \"status\" => \\$s\ -tatus, # Get status\n \"jobid|j=s\" => \\$jobi\ -d, # JobId\n \"email|S=s\" => \\$params{email}\ -, # E-mail address\n \"trace\" => \\$trace\ -, # SOAP messages\n \"sequence=s\" => \\$sequen\ -ce, # Input PDB\n );\n\nmy $scriptName = basena\ -me($0, ());\nif($help) {\n &usage();\n exit(\ -0);\n}\n\nif($trace) {\n print \"Tracing active\ -\\n\";\n SOAP::Lite->import(+trace => 'debug');\ -\n}\n\nmy $soap = SOAP::Lite\n ->service($WSDL)\ -\n ->on_fault(sub {\n my $soap = shift;\\ -n my $res = shift;\n # Throw an exce\ -ption for all faults\n if(ref($res) eq '') \ -{\n die($res);\n } else {\n \ - die($res->faultstring);\n }\n \ - return new SOAP::SOM;\n }\n );\n\ -\nif( !($polljob || $status) &&\n !( defined($p\ -arams{'sequence1'}) && defined($params{'sequence2'\ -}) )\n ) {\n print STDERR 'Error: bad option\ - combination', \"\\n\";\n &usage();\n exit(1\ -);\n}\nelsif($polljob && defined($jobid)) {\n p\ -rint \"Getting results for job $jobid\\n\";\n g\ -etResults($jobid);\n}\nelsif($status && defined($j\ -obid)) {\n print STDERR \"Getting status for jo\ -b $jobid\\n\";\n my $result = $soap->checkStatu\ -s($jobid);\n print STDOUT \"$result\", \"\\n\";\ -\n if($result eq 'DONE') {\n print STDERR \"To \ -get results: $scriptName --polljob --jobid $jobid\\ -\n\";\n }\n}\nelse {\n if(-f $params{'sequen\ -ce1'}) {\n $params{'sequence1'} = read_file($param\ -s{'sequence1'});\n }\n if(-f $params{'sequen\ -ce2'}) {\n $params{'sequence2'} = read_file($param\ -s{'sequence2'});\n }\n\n my $jobid;\n my \ -$paramsData = SOAP::Data->name('params')->type(map\ -=>\\%params);\n # For SOAP::Lite 0.60 and earli\ -er parameters are passed directly\n if($SOAP::L\ -ite::VERSION eq '0.60' || $SOAP::Lite::VERSION =~ \ -/0\\.[1-5]/) {\n $jobid = $soap->runDaliLit\ -e($paramsData);\n }\n # For SOAP::Lite 0.69 \ -and later parameter handling is different, so pass\ -\n # undef's for templated params, and then pas\ -s the formatted args.\n else {\n $jobid \ -= $soap->runDaliLite(undef,\n $paramsData)\ -;\n }\n\n if (defined($async)) {\n print STD\ -OUT $jobid, \"\\n\";\n print STDERR \"To ch\ -eck status: $scriptName --status --jobid $jobid\\n\ -\";\n } else { # Synchronous mode\n prin\ -t STDERR \"JobId: $jobid\\n\";\n sleep 1;\n\ - getResults($jobid);\n }\n}\n\nsub clien\ -tPoll($) {\n my $jobid = shift;\n my $result\ - = 'PENDING';\n # Check status and wait if not \ -finished\n #print STDERR \"Checking status: $jo\ -bid\\n\";\n while($result eq 'RUNNING' || $resu\ -lt eq 'PENDING') {\n $result = $soap->check\ -Status($jobid);\n print STDERR \"$result\\n\ -\";\n if($result eq 'RUNNING' || $result eq\ - 'PENDING') {\n # Wait before polling a\ -gain.\n sleep $checkInterval;\n \ -}\n }\n}\n\nsub getResults($) {\n $jobid = s\ -hift;\n # Check status, and wait if not finishe\ -d\n clientPoll($jobid);\n # Use JobId if out\ -put file name is not defined\n unless(defined($\ -outfile)) {\n $outfile=$jobid;\n }\n \ -# Get list of data types\n my $resultTypes = $s\ -oap->getResults($jobid);\n # Get the data and w\ -rite it to a file\n if(defined($outformat)) { #\ - Specified data type\n my $selResultType;\n\ - foreach my $resultType (@$resultTypes) {\n\ - if($resultType->{type} eq $outformat) \ -{\n $selResultType = $resultType;\n\ - }\n }\n $res=$soap->poll\ -($jobid, $selResultType->{type});\n write_f\ -ile($outfile.'.'.$selResultType->{ext}, $res);\n \ - } else { # Data types available\n # Write\ - a file for each output type\n for my $resu\ -ltType (@$resultTypes){\n #print \"Gett\ -ing $resultType->{type}\\n\";\n $res=$s\ -oap->poll($jobid, $resultType->{type});\n \ - write_file($outfile.'.'.$resultType->{ext}, $re\ -s);\n }\n }\n}\n\nsub read_file($) {\n \ - my $filename = shift;\n open(FILE, $filename)\ -;\n my $content;\n my $buffer;\n while(sy\ -sread(FILE, $buffer, 1024)) {\n $content.= $buffer\ -;\n }\n close(FILE);\n return $content;\n\ -}\n\nsub write_file($$) {\n my ($tmp,$entity) =\ - @_;\n print STDERR \"Creating result file: \".\ -$tmp.\"\\n\";\n unless(open (FILE, \">$tmp\")) \ -{\n return 0;\n }\n syswrite(FILE, $entity);\ -\n close (FILE);\n return 1;\n}\n\nsub usage\ - {\n print STDERR < [options] pdbFile [--outfile \ -string]\n Returns: saves the results to disk\n\nA\ -synchronous job:\n\n Use this if you want to retr\ -ieve the results at a later time. The results \n \ -are stored for up to 24 hours. \n The asynchronou\ -s submission mode is recommended when users are su\ -bmitting \n batch jobs or large database searches\ - \n Usage: $scriptName --email --a\ -sync [options] pdbFile\n Returns: jobid\n\n Use \ -the jobid to query for the status of the job. \n \ -Usage: $scriptName --status --jobid \n Ret\ -urns: string indicating the status of the job:\n \ - DONE - job has finished\n RUNNING - job is ru\ -nning\n NOT_FOUND - job cannot be found\n ER\ -ROR - the jobs has encountered an error\n\n When \ -done, use the jobid to retrieve the status of the \ -job. \n Usage: $scriptName --polljob --jobid [--outfile string]\n\n[Help]\n\n For more det\ -ailed help information refer to\n http://www.ebi.\ -ac.uk/DaliLite/\nEOF\n;\n}\n","my $WSDL = 'http://\ -www.ebi.ac.uk/Tools/webservices/wsdl/WSWUBlast.wsd\ -l';\n\nuse strict;\nuse SOAP::Lite;\nuse Getopt::L\ -ong qw(:config no_ignore_case bundling);\nuse File\ -::Basename;\n\nmy $checkInterval = 15;\n\nmy $numO\ -pts = scalar(@ARGV);\nmy ($outfile, $outformat, $h\ -elp, $async, $polljob, $status, $ids, $jobid, $tra\ -ce, $sequence);\nmy %params= ( # Defaults\n \ -'async' => 1, # Force into async mode\n 'exp\ -' => 10.0, # E-value threshold\n 'numal' => \ -50, # Maximum number of alignments\n 'scores\ -' => 100, # Maximum number of scores\n \ -);\nGetOptions( # Map the options into variables\n\ - \"program|p=s\" => \\$params{program}, # B\ -LAST program\n \"database|D=s\" => \\$params\ -{database}, # Search database\n \"matrix|m=s\" \ - => \\$params{matrix}, # Scoring matrix\n \\ -"exp|E=f\" => \\$params{exp}, # E-value th\ -reshold\n \"echofilter|e\" => \\$params{echo\ -filter}, # Display filtered sequence\n \"filter\ -|f=s\" => \\$params{filter}, # Low complexity\ - filter name\n \"alignments|b=i\" => \\$params\ -{numal}, # Number of alignments\n \"scores|s=i\\ -" => \\$params{scores}, # Number of scores\n \ - \"sensitivity|S=s\" => \\$params{sensitivity}, \ -# Search sensitivity\n \"sort|t=s\" => \\\ -$params{sort}, # Sort hits by...\n \"stats|T=s\\ -" => \\$params{stats}, # Scoring statistic t\ -o use\n \"strand|d=s\" => \\$params{strand\ -}, # Strand to use in DNA vs. DNA search\n \"to\ -pcombon|c=i\" => \\$params{topcombon}, # Consist\ -ent sets of HSPs\n \"outfile=s\" => \\$ou\ -tfile, # Output file\n \"outformat|o=s\" => \\ -\$outformat, # Output format\n \"help|h\" \ - => \\$help, # Usage info\n \"async|a\" =\ -> \\$async, # Asynchronous mode\n \"polljob\" \ - => \\$polljob, # Get results\n \"status\" \ - => \\$status, # Get job status\n \"ids\" \ - => \\$ids, # Get ids from result\n \ -\"jobid|j=s\" => \\$jobid, # JobId\n \"em\ -ail=s\" => \\$params{email}, # E-mail addr\ -ess\n \"trace\" => \\$trace, # SOAP t\ -race\n \"sequence=s\" => \\$sequence, # Qu\ -ery sequence\n );\n\nmy $scriptName = basename(\ -$0, ());\nif($help || $numOpts == 0) {\n &usage\ -();\n exit(0);\n}\n\nif($trace){\n print STD\ -ERR \"Tracing active\\n\";\n SOAP::Lite->import\ -(+trace => 'debug');\n}\n\nmy $soap = SOAP::Lite\n\ - ->service($WSDL)\n ->proxy('http://localhos\ -t/',\n #proxy => ['http' => 'http://your.proxy.\ -server/'], # HTTP proxy\n timeout => 600, # HTT\ -P connection timeout\n )\n ->on_fault(sub { \ -# SOAP fault handler\n my $soap = shift;\n \ - my $res = shift;\n # Throw an except\ -ion for all faults\n if(ref($res) eq '') {\\ -n die($res);\n } else {\n \ - die($res->faultstring);\n }\n r\ -eturn new SOAP::SOM;\n }\n );\n\n\ -if( !($polljob || $status || $ids) &&\n !( defi\ -ned($ARGV[0]) || defined($sequence) )\n ) {\n \ - print STDERR 'Error: bad option combination', \"\ -\\n\";\n &usage();\n exit(1);\n}\nelsif($pol\ -ljob && defined($jobid)) {\n print \"Getting re\ -sults for job $jobid\\n\";\n getResults($jobid)\ -;\n}\nelsif($status && defined($jobid)) {\n pri\ -nt STDERR \"Getting status for job $jobid\\n\";\n \ - my $result = $soap->checkStatus($jobid);\n p\ -rint STDOUT \"$result\\n\";\n if($result eq 'DO\ -NE') {\n print STDERR \"To get results: $scriptNam\ -e --polljob --jobid $jobid\\n\";\n }\n} \nelsi\ -f($ids && defined($jobid)) {\n print STDERR \"G\ -etting ids from job $jobid\\n\";\n getIds($jobi\ -d);\n}\nelse {\n # Prepare input data\n my $\ -content;\n my (@contents) = ();\n if(-f $ARG\ -V[0] || $ARGV[0] eq '-') { \n $content={type=>'seq\ -uence',content=>read_file($ARGV[0])}; \n }\n \ - if($sequence) { \n if(-f $sequence || $sequence e\ -q '-') { \n $content={type=>'sequence',content\ -=>read_file($ARGV[0])}; \n } else {\n $content\ -={type=>'sequence',content=>$sequence};\n }\n }\ -\n push @contents, $content;\n\n # Submit th\ -e job\n my $paramsData = SOAP::Data->name('para\ -ms')->type(map=>\\%params);\n my $contentData =\ - SOAP::Data->name('content')->value(\\@contents);\\ -n # For SOAP::Lite 0.60 and earlier parameters \ -are passed directly\n if($SOAP::Lite::VERSION e\ -q '0.60' || $SOAP::Lite::VERSION =~ /0\\.[1-5]/) {\ -\n $jobid = $soap->runWUBlast($paramsData, \ -$contentData);\n }\n # For SOAP::Lite 0.69 a\ -nd later parameter handling is different, so pass\\ -n # undef's for templated params, and then pass\ - the formatted args.\n else {\n $jobid =\ - $soap->runWUBlast(undef, undef,\n $paramsDa\ -ta, $contentData);\n }\n\n # Asynchronous mo\ -de: output jobid and exit.\n if (defined($async\ -)) {\n print STDOUT $jobid, \"\\n\";\n prin\ -t STDERR \"To check status: $scriptName --status -\ --jobid $jobid\\n\";\n }\n # Synchronous mode\ -: try to get results\n else {\n print ST\ -DERR \"JobId: $jobid\\n\";\n sleep 1;\n \ - getResults($jobid);\n }\n}\n\nsub getIds($)\ - {\n my $jobid = shift;\n my $results = $soa\ -p->getIds($jobid);\n for my $result (@$results)\ -{\n print \"$result\\n\";\n }\n}\n\nsub clientP\ -oll($) {\n my $jobid = shift;\n my $result =\ - 'PENDING';\n # Check status and wait if not fi\ -nished\n while($result eq 'RUNNING' || $result \ -eq 'PENDING') {\n $result = $soap->checkSta\ -tus($jobid);\n print STDERR \"$result\\n\";\ -\n if($result eq 'RUNNING' || $result eq 'P\ -ENDING') {\n # Wait before polling agai\ -n.\n sleep $checkInterval;\n }\n\ - }\n}\n\nsub getResults($) {\n my $jobid = s\ -hift;\n my $res;\n # Check status, and wait \ -if not finished\n clientPoll($jobid);\n # Us\ -e JobId if output file name is not defined\n un\ -less(defined($outfile)) {\n $outfile=$jobid\ -;\n }\n # Get list of data types\n my $re\ -sultTypes = $soap->getResults($jobid);\n # Get \ -the data and write it to a file\n if(defined($o\ -utformat)) { # Specified data type\n if($outformat\ - eq 'xml') {$outformat = 'toolxml';}\n if($outform\ -at eq 'txt') {$outformat = 'tooloutput';}\n \ - my $selResultType;\n foreach my $resultTyp\ -e (@$resultTypes) {\n if($resultType->{\ -type} eq $outformat) {\n $selResult\ -Type = $resultType;\n }\n }\n \ - $res=$soap->poll($jobid, $selResultType->{typ\ -e});\n if($outfile eq '-') {\n write_file($ou\ -tfile, $res);\n } else {\n write_file($outfile\ -.'.'.$selResultType->{ext}, $res);\n }\n } else\ - { # Data types available\n # Write a file \ -for each output type\n for my $resultType (\ -@$resultTypes){\n #print STDERR \"Getti\ -ng $resultType->{type}\\n\";\n $res=$so\ -ap->poll($jobid, $resultType->{type});\n if($o\ -utfile eq '-') {\n write_file($outfile, $res);\n \ - } else {\n write_file($outfile.'.'.$resultTyp\ -e->{ext}, $res);\n }\n }\n }\n}\n\ns\ -ub read_file($) {\n my $filename = shift;\n \ -my ($content, $buffer);\n if($filename eq '-') \ -{\n while(sysread(STDIN, $buffer, 1024)) {\n $\ -content .= $buffer;\n }\n }\n else { # File\\ -n open(FILE, $filename) or die \"Error: unable to \ -open input file\";\n while(sysread(FILE, $buffer, \ -1024)) {\n $content .= $buffer;\n }\n close(FI\ -LE);\n }\n return $content;\n}\n\nsub write_\ -file($$) {\n my ($filename, $data) = @_;\n p\ -rint STDERR 'Creating result file: ' . $filename .\ - \"\\n\";\n if($filename eq '-') {\n print STDO\ -UT $data;\n }\n else {\n open(FILE, \">$file\ -name\") or die \"Error: unable to open output file\ -\";\n syswrite(FILE, $data);\n close(FILE);\n }\ -\n}\n\nsub usage {\n print STDERR < [options...] seqFile\ -\n Returns: saves the results to disk\n\nAsynchro\ -nous job:\n\n Use this if you want to retrieve th\ -e results at a later time. The results \n are sto\ -red for up to 24 hours. \n The asynchronous submi\ -ssion mode is recommended when users are submittin\ -g \n batch jobs or large database searches \n Us\ -age: $scriptName --async --email [o\ -ptions...] seqFile\n Returns : jobid\n\n Use the\ - jobid to query for the status of the job. \n Usa\ -ge: $scriptName --status --jobid \n Return\ -s : string indicating the status of the job:\n \ -DONE - job has finished\n RUNNING - job is runn\ -ing\n NOT_FOUND - job cannot be found\n ERRO\ -R - the jobs has encountered an error\n\n When do\ -ne, use the jobid to retrieve the status of the jo\ -b. \n Usage: $scriptName --polljob --jobid [--outfile string]\n Returns: saves the results\ - to disk\n\n[Help]\n\nFor more detailed help infor\ -mation refer to \nhttp://www.ebi.ac.uk/blast2/WU-B\ -last2_Help_frame.html\n \nEOF\n;\n}\n","\nmy $WSDL\ - = 'http://www.ebi.ac.uk/Tools/webservices/wsdl/WS\ -Blastpgp.wsdl';\n\nuse SOAP::Lite;\nuse Getopt::Lo\ -ng qw(:config no_ignore_case bundling);\nuse File:\ -:Basename;\n\nmy $checkInterval = 15;\n\nmy %param\ -s=(\n 'async' => '1', # Use async mode and sim\ -ulate sync mode in client\n );\nGetOptions(\n \ - \"mode=s\" => \\$params{mode}, # Sear\ -ch mode: PSI-Blast or PHI-Blast\n \"database|d=\ -s\" => \\$params{database}, # Database to sear\ -ch\n \"matrix|M=s\" => \\$params{matrix},\ -# Scoring maxtrix\n \"exp|e=f\" => \\$\ -params{exp}, # E-value\n \"expmulti|h=f\" =\ -> \\$params{expmulti}, # E-value\n \"filter|F=s\ -\" => \\$params{filter}, # Low complexity fi\ -lter\n \"dropoff|X=i\" => \\$params{dropof\ -f}, # Dropoff score\n \"finaldropoff|Z=i\" => \\ -\$params{finaldropoff}, # Final dropoff score\n \ - \"scores|v=i\" => \\$params{scores}, # Max \ -number of scores\n \"align=i\" => \\$p\ -arams{align}, # Alignment view\n \"startregion|\ -S=i\" => \\$params{startregion}, # Start of regio\ -n in query\n \"endregion|H=i\" => \\$params{\ -endregion}, # End of region in query\n \"maxpas\ -ses|j=i\" => \\$params{maxpasses}, # Number of \ -PSI iterations\n \"opengap|G=i\" => \\$par\ -ams{opengap}, # Gap open penalty\n \"extendgap|\ -E=i\" => \\$params{extendgap}, # Gap extension \ -penalty\n \"pattern=s\" => \\$params{pat\ -tern}, # PHI-BLAST pattern\n \"usagemode|p=s\" \ - => \\$params{usagemode}, # PHI-BLAST program\n \ - \"appxml=s\" => \\$params{appxml}, # Ap\ -plication XML\n \"sequence=s\" => \\$sequ\ -ence, # Query sequence\n \"help\" => \\$\ -help, # Usage info\n \"polljob\" => \\$p\ -olljob, # Get results\n \"status\" => \\\ -$status, # Get status\n \"ids\" =>\ - \\$ids, # Get ids from result\n \"jobid=s\" \ - => \\$jobid, # JobId\n \"outfile=s\" \ - => \\$outfile, # Output filename\n \"outfor\ -mat|o=s\" => \\$outformat, # Output file format\ -\n \"async|a\" => \\$async, # Async subm\ -ission\n \"email=s\" => \\$params{emai\ -l}, # User e-mail address\n \"trace\" \ - => \\$trace, # Show SOAP messages\n );\n\nmy \ -$scriptName = basename($0, ());\nif($help) {\n \ -&usage();\n exit(0);\n}\n\nif ($trace){\n pr\ -int \"Tracing active\\n\";\n SOAP::Lite->import\ -(+trace => 'debug');\n}\n\nmy $soap = SOAP::Lite\n\ - ->service($WSDL)\n ->on_fault(sub {\n \ - my $soap = shift;\n my $res = shift;\n \ - # Throw an exception for all faults\n \ -if(ref($res) eq '') {\n die($res);\n \ - } else {\n die($res->faultstring);\ -\n }\n return new SOAP::SOM;\n }\\ -n );\n\nif( !($polljob || $status ||\ - $ids) &&\n !( (defined($ARGV[0]) && -f $ARGV[0\ -]) || defined($sequence) )\n ) {\n print STD\ -ERR 'Error: bad option combination', \"\\n\";\n \ - &usage();\n exit(1);\n}\nelsif($polljob && def\ -ined($jobid)) {\n print \"Getting results for j\ -ob $jobid\\n\";\n getResults($jobid);\n}\nelsif\ -($status && defined($jobid)) {\n print STDERR \\ -"Getting status for job $jobid\\n\";\n my $resu\ -lt = $soap->checkStatus($jobid);\n print STDOUT\ - $result, \"\\n\";\n if($result eq 'DONE') {\n \ -print STDERR \"To get results: $scriptName --pollj\ -ob --jobid $jobid\\n\";\n }\n} \nelsif($ids &&\ - defined($jobid)) {\n print STDERR \"Getting id\ -s from job $jobid\\n\";\n getIds($jobid);\n}\ne\ -lse {\n if(-f $ARGV[0]) { \n $content={type=>'s\ -equence', content=>read_file($ARGV[0])}; \n }\n\ - if($sequence) { \n if(-f $sequence) {\n $c\ -ontent={type=>'sequence', content=>read_file($sequ\ -ence)}; \n } else {\n $content={type=>'sequenc\ -e', content=>$sequence};\n }\n }\n push @con\ -tent, $content;\n\n my $jobid;\n my $paramsD\ -ata = SOAP::Data->name('params')->type(map=>\\%par\ -ams);\n my $contentData = SOAP::Data->name('con\ -tent')->value(\\@content);\n # For SOAP::Lite 0\ -.60 and earlier parameters are passed directly\n \ - if($SOAP::Lite::VERSION eq '0.60' || $SOAP::Lite\ -::VERSION =~ /0\\.[1-5]/) {\n $jobid = $soa\ -p->runBlastpgp($paramsData, $contentData);\n }\\ -n # For SOAP::Lite 0.69 and later parameter han\ -dling is different, so pass\n # undef's for tem\ -plated params, and then pass the formatted args.\n\ - else {\n $jobid = $soap->runBlastpgp(un\ -def, undef,\n $paramsData, $contentData);\n\ - }\n\n if (defined($async)) {\n print STDOUT\ - $jobid, \"\\n\";\n print STDERR \"To check\ - status: $scriptName --status --jobid $jobid\\n\";\ -\n } else { # Synchronous mode\n print S\ -TDERR \"JobId: $jobid\\n\";\n sleep 1;\n \ - getResults($jobid);\n }\n}\n\nsub getIds($\ -) {\n $jobid = shift;\n my $results = $soap-\ ->getIds($jobid);\n for $result (@$results){\n p\ -rint \"$result\\n\";\n }\n}\n\nsub clientPoll($\ -) {\n my $jobid = shift;\n my $result = 'PEN\ -DING';\n # Check status and wait if not finishe\ -d\n #print STDERR \"Checking status: $jobid\\n\\ -";\n while($result eq 'RUNNING' || $result eq '\ -PENDING') {\n $result = $soap->checkStatus(\ -$jobid);\n print STDERR \"$result\\n\";\n \ - if($result eq 'RUNNING' || $result eq 'PENDI\ -NG') {\n # Wait before polling again.\n\ - sleep $checkInterval;\n }\n \ -}\n}\n\nsub getResults($) {\n $jobid = shift;\n\ - # Check status, and wait if not finished\n \ -clientPoll($jobid);\n # Use JobId if output fil\ -e name is not defined\n unless(defined($outfile\ -)) {\n $outfile=$jobid;\n }\n # Get l\ -ist of data types\n my $resultTypes = $soap->ge\ -tResults($jobid);\n # Get the data and write it\ - to a file\n if(defined($outformat)) { # Specif\ -ied data type\n my $selResultType;\n \ - foreach my $resultType (@$resultTypes) {\n \ - if($resultType->{type} eq $outformat) {\n \ - $selResultType = $resultType;\n \ - }\n }\n $res=$soap->poll($jobid\ -, $selResultType->{type});\n write_file($ou\ -tfile.'.'.$selResultType->{ext}, $res);\n } els\ -e { # Data types available\n # Write a file\ - for each output type\n for my $resultType \ -(@$resultTypes){\n #print \"Getting $re\ -sultType->{type}\\n\";\n $res=$soap->po\ -ll($jobid, $resultType->{type});\n writ\ -e_file($outfile.'.'.$resultType->{ext}, $res);\n \ - }\n }\n}\n\nsub read_file($) {\n my $f\ -ilename = shift;\n open(FILE, $filename);\n \ -my $content;\n my $buffer;\n while(sysread(F\ -ILE, $buffer, 1024)) {\n $content.= $buffer;\n \ -}\n close(FILE); \n return $content;\n}\n\n\ -sub write_file($$) {\n my ($tmp,$entity) = @_;\\ -n print STDERR \"Creating result file: \".$tmp.\ -\"\\n\";\n unless(open (FILE, \">$tmp\")) {\n r\ -eturn 0;\n }\n syswrite(FILE, $entity);\n \ - close (FILE);\n return 1;\n}\n\nsub usage {\n \ - print STDERR < [options...] seqfile\n Returns: saves\ - the results to disk\n\nAsynchronous job:\n\n Use\ - this if you want to retrieve the results at a lat\ -er time. The results\n are stored for up to 24 ho\ -urs.\n The asynchronous submission mode is recomm\ -ended when users are submitting\n batch jobs or l\ -arge database searches\n Usage: blastpgp.pl --ema\ -il --async [options...] seqFile\n Re\ -turns: jobid\n\n Use the jobid to query for the s\ -tatus of the job.\n Usage: blastpgp.pl --status -\ --jobid \n Returns: string indicating the s\ -tatus of the job\n DONE - job has finished\n \ - RUNNING - job is running\n NOT_FOUND - job can\ -not be found\n ERROR - the jobs has encountered\ - an error\n\n When done, use the jobid to retriev\ -e the results of the job.\n Usage: blastpgp.pl --\ -polljob --jobid [--outfile ]\n \ -Returns: saves the results to disk\nEOF\n;\n}\n","\ -\n\n\nmy $PROBTRESH = 0.3;# base pairs below this \ -prob threshold will be ignored\nmy $WEIGHT = 100.0\ -; # float!!\nmy $NUCALPH = \"ACGTUNRYMKSWHBVD\";\n\ -use vars qw($NUCALPH $WEIGHT);\n\nmy $myname = bas\ -ename($0);\n\nuse strict;\nuse warnings;\n\nuse Fi\ -le::Basename;\nuse Getopt::Long;\nuse File::Glob '\ -:glob';\nuse File::Spec;\nuse File::Temp qw/ tempf\ -ile tempdir /;\n\n\n\n\nsub tcoffeelib_header($;$)\ -\n{\n my ($nseq, $fd) = @_;\n if (! defined(\ -$fd)) {\n $fd = *STDOUT;\n }\n printf\ - $fd \"! TC_LIB_FORMAT_01\\n\";\n printf $fd \"\ -%d\\n\", $nseq;\n}\n\n\nsub tcoffeelib_header_adds\ -eq($$;$)\n{\n my ($id, $seq, $fd) = @_;\n if\ - (! defined($fd)) {\n $fd = *STDOUT;\n }\ -\n printf $fd \"%s %d %s\\n\", $id, length($seq\ -), $seq;\n}\n\n\nsub tcoffeelib_comment($;$)\n{\n \ - my ($comment, $fd) = @_;\n if (! defined($fd\ -)) {\n $fd = *STDOUT;\n }\n printf $f\ -d \"!\" . $comment . \"\\n\";\n}\n\n\nsub tcoffeel\ -ib_struct($$$;$)\n{\n my ($nseq, $len, $bpm, $f\ -d) = @_;\n\n if (! defined($fd)) {\n $fd\ - = *STDOUT;\n }\n\n # output basepair indice\ -s with fixed weight\n printf $fd \"#%d %d\\n\",\ - $nseq, $nseq;\n # output basepairs (only once)\ - and with unit-offset\n for (my $i=0; $i<$len; \ -$i++) {\n for (my $j=$i+1; $j<$len; $j++) {\ -\n if (! defined($bpm->[$i][$j])) {\n \ - print STDERR \"ERROR: \\$bpm->[$i][$\ -j] undefined\\n\";\n }\n if \ -($bpm->[$i][$j]>0) {\n print $fd $i\ -+1;\n print $fd \" \";\n \ - print $fd $j+1;\n print $fd \"\ - \" . $bpm->[$i][$j] . \"\\n\";\n }\n \ - }\n }\n}\n\n\nsub tcoffeelib_footer(;$)\n\ -{\n my ($fd) = @_;\n if (! defined($fd)) {\n\ - $fd = *STDOUT;\n }\n print $fd \"! S\ -EQ_1_TO_N\\n\";\n}\n\n\n \nsub plfold($$$)\n{ \ - \n my ($id, $seq, $probtresh) = @_;\n my (\ -@struct);# return\n my ($templ, $fhtmp, $fnamet\ -mp, $cmd, $ctr, $window_size);\n\n $templ = $my\ -name . \".\" . $id . \".pid-\" . $$ . \".XXXXXX\";\ -\n ($fhtmp, $fnametmp) = tempfile($templ, UNLIN\ -K => 1); \n print $fhtmp \">$id\\n$seq\\n\";\n\\ -n # --- init basepair array\n #\n for (my\ - $i=0; $i/dev/n\ -ull\";\n system($cmd);\n \n if ($? != 0) \ -{\n printf STDERR \"ERROR: RNAplfold ($cmd)\ - exited with error status %d\\n\", $? >> 8;\n \ - return;\n }\n #unlink($fnametmp);\n my\ - $fps = sprintf(\"%s_dp.ps\", $id); # check long n\ -ame\n \n if (! -s $fps) {\n {\n\n $fps \ -= sprintf(\"%s_dp.ps\", substr($id,0,12)); # check\ - short name\n if (! -s $fps)\n {\n die(\"co\ -uldn't find expected file $fps\\n\");\n return\ -;\n }\n }\n }\n\n \n # --- read ba\ -se pairs from created postscript\n #\n open(\ -FH, $fps);\n while (my $line = ) {\n \ - my ($nti, $ntj, $prob);\n chomp($line); \ - \n # line: bp bp sqrt-prob ubox\n \ - my @match = ($line =~ m/^([0-9]+) +([0-9]+) +([\ -0-9\\.]+) +ubox$/);\n if (scalar(@match)) {\ -\n $nti=$1;\n $ntj=$2;\n \ - $prob=$3*$3;# prob stored as square root\n\ -\n if ($prob>$probtresh) {\n \ - #printf STDERR \"\\$struct[$nti][$ntj] sqrtpr\ -ob=$3 prob=$prob > $probtresh\\n\";\n \ - $struct[$nti-1][$ntj-1] = $WEIGHT\n \ -}\n # store with zero-offset\n }\ -\n }\n close(FH);\n\n # remove or gzi pos\ -tscript\n #\n unlink($fps);\n #\n # or\ - gzip\n #$cmd = \"gzip -qf $fps\";\n #system\ -($cmd);\n #if ($? != 0) {\n # printf STDE\ -RR \"ERROR: gzip ($cmd) exited with error status %\ -d\\n\", $? >> 8;\n #}\n\n return \\@struct;\\ -n}\n\n\n\n\n\nsub rnaseqfmt($)\n{\n my ($seq) =\ - @_;\n # remove gaps\n $seq =~ s/-//g;\n \ -# uppercase RNA\n $seq = uc($seq);\n # T -> \ -U\n $seq =~ s/T/U/g;\n # check for invalid c\ -haraters\n $_ = $seq;\n s/[^$NUCALPH]//g;\n \ - return $_;\n}\n\n\n\n\nsub usage(;$)\n{ \n \ - my ($errmsg) = @_;\n if ($errmsg) {\n \ -print STDERR \"ERROR: $errmsg\\n\";\n }\n pr\ -int STDERR << \"EOF\";\n$myname:\n Creates a T-Cof\ -fee RNA structure library from RNAplfold predictio\ -n.\n See FIXME:citation\nUsage:\n $myname -in seq_\ -file -out tcoffee_lib\nEOF\n exit(1);\n}\n\nsub\ - read_fasta_seq \n {\n my $f=$_[0];\n my %h\ -seq;\n my (@seq, @com, @name);\n my ($a, $s,\ -$nseq);\n\n open (F, $f);\n while ()\n \ - {\n $s.=$_;\n }\n close (F);\n\n \n \ - @name=($s=~/>(\\S*).*\\n[^>]*/g);\n \n @s\ -eq =($s=~/>.*.*\\n([^>]*)/g);\n @com =($s=~/>(\\ -\S*)(.*)\\n([^>]*)/g);\n\n\n $nseq=$#name+1;\n \ - \n for ($a=0; $a<$nseq; $a++)\n {\n my $n\ -=$name[$a];\n my $s;\n $hseq{$n}{name}=$n;\n $s=$s\ -eq[$a];$s=~s/\\s//g;\n \n $hseq{$n}{seq}=$s;\n $hs\ -eq{$n}{com}=$com[$a];\n }\n return %hseq;\\ -n }\n\n\n\n\n\n\n\nmy $fmsq = \"\";\nmy $flib = \\ -"\";\nmy %OPTS;\nmy %seq;\nmy ($id, $nseq, $i);\nm\ -y @nl;\n\nGetOptions(\"in=s\" => \\$fmsq, \"out=s\\ -" => \\$flib);\n\nif (! -s $fmsq) {\n usage(\"e\ -mpty or non-existant file \\\"$fmsq\\\"\")\n}\nif \ -(length($flib)==0) {\n usage(\"empty out-filena\ -me\")\n}\n\n\n\n\n\n\n%seq=read_fasta_seq($fmsq);\\ -n\n\n@nl=keys(%seq);\n\n$nseq=$#nl+1;\nopen FD_LIB\ -, \">$flib\" or die \"can't open $flib!\";\ntcoffe\ -elib_header($nseq, *FD_LIB);\nforeach $id (keys (%\ -seq))\n {\n my ($seq, $fmtseq);\n \n $se\ -q = $seq{$id}{seq};\n \n $fmtseq = rnaseqfmt\ -($seq);# check here, formatting for folding import\ -ant later\n if (length($seq)!=length($fmtseq)) \ -{\n print STDERR \"ERROR: invalid sequence \ -$id is not an RNA sequence. read seq is: $seq\\n\"\ -;\n exit\n }\n \n tcoffeelib_head\ -er_addseq($id, uc($seq), *FD_LIB);\n }\ntcoffeeli\ -b_comment(\"generated by $myname on \" . localtime\ -(), *FD_LIB);\n\n\n\n$i=0;\nforeach $id (keys (%se\ -q))\n {\n my ($cleanid, $seq, $bpm);\n $seq\ -=$seq{$id}{seq};\n $cleanid = $id;\n $cleani\ -d =~ s,[/ ],_,g;# needed for rnaplfold\n $seq =\ - rnaseqfmt($seq);\n \n $bpm = plfold($cleani\ -d, rnaseqfmt($seq), $PROBTRESH); \n \n \ - tcoffeelib_struct($i+1, length($seq), $bpm, *FD_L\ -IB);\n $i++;\n}\n\n\ntcoffeelib_footer(*FD_LIB)\ -;\nclose FD_LIB;\nexit (0);\n\n","\n\n\n\n\n$cmd=j\ -oin ' ', @ARGV;\nif ($cmd=~/-infile=(\\S+)/){ $seq\ -file=$1;}\nif ($cmd=~/-outfile=(\\S+)/){ $libfile=\ -$1;}\n\n\n\n%s=read_fasta_seq ($seqfile);\n\nopen \ -(F, \">$libfile\");\nforeach $name (keys (%s))\n \ -{\n my $tclib=\"$name.RNAplfold_tclib\";\n p\ -rint (F \">$name _F_ $tclib\\n\");\n seq2RNAplf\ -old2tclib ($name, $s{$name}{seq}, $tclib);\n }\nc\ -lose (F);\nexit (EXIT_SUCCESS);\n\nsub seq2RNAplfo\ -ld2tclib\n {\n my ($name, $seq, $tclib)=@_;\n \ - my ($tmp);\n $n++;\n $tmp=\"tmp4seq2RNApl\ -fold_tclib.$$.$n.pep\";\n open (RF, \">$tmp\");\ -\n print (RF \">$name\\n$seq\\n\");\n close \ -(RF);\n \n system \"t_coffee -other_pg RNApl\ -fold2tclib.pl -in=$tmp -out=$tclib\";\n \n u\ -nlink ($tmp);\n return $tclib;\n }\n \n \ -\nsub read_fasta_seq \n {\n my $f=@_[0];\n \ -my %hseq;\n my (@seq, @com, @name);\n my ($a\ -, $s,$nseq);\n\n open (F, $f);\n while ()\ -\n {\n $s.=$_;\n }\n close (F);\n\n \ - \n @name=($s=~/>(\\S*).*\\n[^>]*/g);\n \n \ - @seq =($s=~/>.*.*\\n([^>]*)/g);\n @com =($s=\ -~/>\\S*(.*)\\n([^>]*)/g);\n\n \n $nseq=$#nam\ -e+1;\n \n for ($a=0; $a<$nseq; $a++)\n \ -{\n my $n=$name[$a];\n $hseq{$n}{name}=$n;\n $hseq\ -{$n}{seq}=$seq[$a];\n $hseq{$n}{com}=$com[$a];\n \ - }\n return %hseq;\n }\n","use Getopt::Long\ -;\nGetOptions(\"-in=s\" => \\$fmsq1, \"-out=s\" =>\ - \\$outfile, \"-arch=s\" => \\$arch,\"-psv=s\" => \ -\\$psv, \"-hmmtop_home=s\", \\$hmmtop_home );\nope\ -n (O, \">$outfile\");\n\nif (!$hmmtop_home){$hmmto\ -p_home=\"/home/notredame/packages/hmmtop/hmmtop_2.\ -1\";}\nif ($arch){$ENV{'HMMTOP_ARCH'}=$arch;}\nels\ -e {$ENV{'HMMTOP_ARCH'}=\"$hmmtop_home/hmmtop.arch\\ -";}\n\nif ($psv){$ENV{'HMMTOP_PSV'}=$psv;}\nelse{$\ -ENV{'HMMTOP_PSV'}=\"$hmmtop_home/hmmtop.psv\";}\n\\ -n$fmsq=\"seq2convert.$$.tmp\";\nsystem (\"t_coffee\ - -other_pg seq_reformat -in $fmsq1 -output fasta_s\ -eq > $fmsq\");\n%seq=read_fasta_seq($fmsq);\n\n$tm\ -pfile=\"fasta_seq2hmmtop_fasta.$$.tmp\";\nforeach \ -$s (keys (%seq))\n {\n \n open F, \">$tmpfi\ -le\";\n print F \">seq\\n$seq{$s}{seq}\\n\";\n \ - close F;\n\n $result=`hmmtop -if=$tmpfile -s\ -f=FAS -pl 2>/dev/null`;\n @r=($result=~/(.+)/g)\ -;\n foreach $l (@r)\n {\n \n if ($l=~/pred\ -(.*)/)\n {$p.=$1;}\n }\n \n $p=~s/\\s\ -//g;\n print O \">$seq{$s}{name}\\n$p\\n\";\n \ - $p=\"\";\n }\nunlink \"$tmpfile\";\nunlink \"$f\ -msq\";\nclose (O);\n\nsub read_fasta_seq \n {\n \ - my $f=$_[0];\n my %hseq;\n my (@seq, @com,\ - @name);\n my ($a, $s,$nseq);\n\n open (F, $\ -f);\n while ()\n {\n $s.=$_;\n }\n\ - close (F);\n\n \n @name=($s=~/>(.*).*\\n\ -[^>]*/g);\n \n @seq =($s=~/>.*.*\\n([^>]*)/g\ -);\n @com =($s=~/>.*(.*)\\n([^>]*)/g);\n\n\n \ - $nseq=$#name+1;\n \n \n for ($a=0; $a<$nse\ -q; $a++)\n {\n my $n=$name[$a];\n my $s;\n $h\ -seq{$n}{name}=$n;\n $s=$seq[$a];$s=~s/\\s//g;\n \n\ - $hseq{$n}{seq}=$s;\n $hseq{$n}{com}=$com[$a];\n \ - }\n return %hseq;\n }\n","\n\n\n\n\nmy $FM\ -ODEL =\"\"; \nmy $TMPDIR = \"/tmp\";\n\n\n\n\nmy $\ -NUCALPH = \"ACGTUNRYMKSWHBVD\";\nmy $PRIMNUCALPH =\ - \"ACGTUN\";\nuse vars qw($NUCALPH $PRIMNUCALPH $T\ -MPDIR);\n\n\nmy $errmsg;\nuse vars qw($errmsg);\n\\ -n\n\nuse Getopt::Long;\nuse Cwd;\nuse File::Basena\ -me;\nuse File::Temp qw/ tempfile tempdir /;\nuse F\ -ile::Copy;\nuse File::Path;\n\n\n\nsub usage(;$)\n\ -{\n my ($errmsg) = @_;\n my $myname = basena\ -me($0);\n\n if ($errmsg) {\n print STDER\ -R \"ERROR: $errmsg\\n\";\n }\n\n print STDER\ -R << \"EOF\";\n \n$myname: align two sequences \ -by means of consan\\'s sfold\nUsage:\n $myname -i \ -file -o file -d path\nOptions:\n -i|--in : pairwis\ -e input sequence file\n -o|--out: output alignment\ -\n -d|--directory containing data\n\nEOF\n}\n\nsub\ - read_stk_aln \n {\n my $f=$_[0];\n my ($se\ -q, $id);\n \n my %hseq;\n\n open (STK, \"\ -$f\");\n while ()\n {\n if ( /^#/ || \ -/^\\/\\// || /^\\s*$/){;}\n else\n {\n ($id,\ -$seq)=/(\\S+)\\s+(\\S+)/;\n $hseq{$id}{'seq'}.\ -=$seq;\n }\n }\n close (STK);\n retur\ -n %hseq;\n }\nsub read_fasta_seq \n {\n my $f\ -=$_[0];\n my %hseq;\n my (@seq, @com, @name)\ -;\n my ($a, $s,$nseq);\n\n open (F, $f);\n \ - while ()\n {\n $s.=$_;\n }\n clo\ -se (F);\n\n \n @name=($s=~/>(.*).*\\n[^>]*/g\ -);\n \n @seq =($s=~/>.*.*\\n([^>]*)/g);\n \ - @com =($s=~/>.*(.*)\\n([^>]*)/g);\n\n \n $n\ -seq=$#name+1;\n \n for ($a=0; $a<$nseq; $a++\ -)\n {\n my $n=$name[$a];\n $hseq{$n}{name}=$n\ -;\n $hseq{$n}{seq}=$seq[$a];\n $hseq{$n}{com}=$com\ -[$a];\n }\n return %hseq;\n }\n\n\n\nsub \ -sfold_parseoutput($$)\n{\n my ($frawout, $foutf\ -a) = @_;\n my %haln;\n my ($fstk, $cmd, $id)\ -;\n open FOUTFA, \">$foutfa\";\n \n $fstk\ - = $frawout . \".stk\";\n \n # first line of\ - raw out contains info\n # remaining stuff is s\ -tockholm formatted\n $cmd = \"sed -e '1d' $fraw\ -out\";\n system(\"$cmd > $fstk\");\n if ($? \ -!= 0) {\n $errmsg = \"command failed with e\ -xit status $?.\";\n $errmsg .= \"Command w\ -as \\\"$cmd\\\"\";\n return -1;\n }\n\n \ - # this gives an error message. just ignore it..\ -.\n %haln=read_stk_aln ( $fstk);\n foreach $\ -i (keys (%haln))\n {\n my $s;\n $s=$haln{$i}{\ -'seq'};\n $s =~ s/\\./-/g;\n print FOUTFA \">$i\\n\ -$s\\n\";\n }\n close FOUTFA;\n return 0\ -;\n}\n\n\n\n\nsub sfold_wrapper($$$$)\n{\n \n \ - my ($fs1, $fs2, $fmodel, $foutfa) = @_;\n \n\\ -n my ($cmd, $frawout, $ferrlog, $freadme, $ftim\ -elog, $fstk);\n\n # add basename($fmsqin) (unk\ -nown here!)\n $frawout = \"sfold.log\";\n $f\ -errlog = \"sfold.err\";\n $ftimelog = \"sfold.t\ -ime\";\n $freadme = \"sfold.README\";\n $fs\ -tk = \"sfold.stk\";\n \n # prepare execution\ -...\n #\n # ./tmp is essential for dswpalign\ -\n # otherwise you'll get a segfault\n mkdir\ - \"./tmp\";\n \n $cmd = \"sfold -m $fmodel $\ -fs1 $fs2\";\n open(FREADME,\">$freadme\");\n \ - print FREADME \"$cmd\\n\"; \n close(FREADME);\\ -n\n # and go\n #\n system(\"/usr/bin/time\ - -p -o $ftimelog $cmd >$frawout 2>$ferrlog\");\n \ - if ($? != 0) {\n $errmsg = \"command fail\ -ed with exit status $?\";\n $errmsg .= \"co\ -mmand was \\\"$cmd\\\". See \" . getcwd . \"\\n\";\ -\n return -1;\n }\n\n return sfold_pa\ -rseoutput($frawout, $foutfa);\n}\n\n\n\n\n\n\n\nmy\ - ($help, $fmsqin, $fmsaout);\nGetOptions(\"help\" \ - => \\$help,\n \"in=s\" => \\$fmsqin,\n \ - \"out=s\" => \\$fmsaout,\n \"data=s\"\ - => \\$ref_dir);\n\n\n\nif ($help) {\n usage();\ -\n exit(0);\n}\nif (! defined($fmsqin)) {\n \ -usage('missing input filename');\n exit(1);\n}\\ -nif (! defined($fmsaout)) {\n usage('missing ou\ -tput filename');\n exit(1);\n\n}\nif (scalar(@A\ -RGV)) {\n usage('Unknown remaining args');\n \ - exit(1);\n}\n\n$FMODEL = \"$ref_dir/mix80.mod\";\\ -nif (! -e \"$FMODEL\") {\n die(\"couldn't find \ -sfold grammar model file. Expected $FMODEL\\n\");\\ -n}\n\n\nmy %hseq=read_fasta_seq ($fmsqin);\nmy $id\ -;\n\nforeach $id (keys(%hseq))\n {\n push(@seq\ -_array, $hseq{$id});\n }\n\nif ( scalar(@seq_arra\ -y) != 2 ) {\n die(\"Need *exactly* two sequence\ -s as input (pairwise alignment!).\")\n}\n\n\n\nmy \ -($sec, $min, $hour, $mday, $mon, $year, $wday, $yd\ -ay, $isdst) = localtime(time);\nmy $datei = sprint\ -f(\"%4d-%02d-%02d\", $year+1900, $mon+1, $mday);\n\ -my $templ = basename($0) . \".\" . $datei . \".pid\ --\" . $$ . \".XXXXXX\";\nmy $wd = tempdir ( $templ\ -, DIR => $TMPDIR);\n\ncopy($fmsqin, \"$wd/\" . bas\ -ename($fmsqin) . \".org\"); # for reproduction\nco\ -py($FMODEL, \"$wd\");\nmy $fmodel = basename($FMOD\ -EL);\nmy $orgwd = getcwd;\nchdir $wd;\n\n\n\nmy @s\ -epseqfiles;\nforeach $id (keys(%hseq)) {\n my (\ -$seq, $orgseq, $fname, $sout);\n $seq=$hseq{$id\ -}{'seq'};\n \n $fname = basename($fmsqin) . \ -\"_$id.fa\";\n # replace funnies in file/id nam\ -e (e.g. \"/\" \" \" etc)\n $fname =~ s,[/ ],_,g\ -;\n open (PF, \">$fname\");\n print (PF \">$\ -id\\n$seq\\n\");\n close (PF);\n\n push(@sep\ -seqfiles, $fname);\n}\n\nmy ($f1, $f2, $fout);\n$f\ -1 = $sepseqfiles[0];\n$f2 = $sepseqfiles[1];\n$fou\ -t = $wd . basename($fmsqin) . \".out.fa\";\nif (sf\ -old_wrapper($f1, $f2, $fmodel, \"$fout\") != 0) {\\ -n printf STDERR \"ERROR: See logs in $wd\\n\";\\ -n exit(1);\n} else {\n chdir $orgwd;\n co\ -py($fout, $fmsaout);\n rmtree($wd);\n exit(0)\ -;\n}\n","\nuse Env qw(HOST);\nuse Env qw(HOME);\nu\ -se Env qw(USER);\n\n\n$tmp=clean_cr ($ARGV[0]);\no\ -pen (F, $tmp);\n\nwhile ( )\n {\n my $l=$_;\ -\n if ( $l=~/^# STOCKHOLM/){$stockholm=1;}\n \ - elsif ( $stockholm && $l=~/^#/)\n {\n $l=~/^\ -#(\\S+)\\s+(\\S+)\\s+(\\S*)/g;\n $l=\"_stockholmha\ -sch_$1\\_stockholmspace_$2 $3\\n\";\n }\n \ -$file.=$l;\n }\nclose (F);\nunlink($tmp);\n$file1\ -=$file;\n\n$file=~s/\\#/_hash_symbol_/g;\n$file=~s\ -/\\@/_arobase_symbol_/g;\n\n\n$file=~s/\\n[\\.:*\\\ -s]+\\n/\\n\\n/g;\n\n$file=~s/\\n[ \\t\\r\\f]+(\\b)\ -/\\n\\1/g;\n\n\n$file=~s/(\\n\\S+)(\\s+)(\\S)/\\1_\ -blank_\\3/g;\n\n$file=~s/[ ]//g;\n$file=~s/_blank_\ -/ /g;\n\n\n\n$file =~s/\\n\\s*\\n/#/g;\n\n$file.=\\ -"#\";\n$file =~s/\\n/@/g;\n\n\n\n\n@blocks=split /\ -\\#/, $file;\nshift (@blocks);\n@s=split /\\@/, $b\ -locks[0];\n$nseq=$#s+1;\n\n\n\n$file=join '@', @bl\ -ocks;\n@lines=split /\\@/,$file;\n\n$c=0;\n\nforea\ -ch $l (@lines)\n {\n if (!($l=~/\\S/)){next;}\\ -n elsif ($stockholm && ($l=~/^\\/\\// || $l=~/S\ -TOCKHOLM/)){next;}#get read of STOCHOLM Terminator\ -\n \n $l=~/(\\S+)\\s+(\\S*)/g;\n $n=$1; $s\ -=$2;\n \n $seq[$c].=$s;\n $name[$c]=$n;\n\ - $c++;\n \n if ( $c==$nseq){$c=0;}\n \\ -n } \n\nif ( $c!=0)\n {\n print STDERR \"ERR\ -OR: $ARGV[0] is NOT an MSA in Clustalw format: mak\ -e sure there is no blank line within a block [ERRO\ -R]\\n\";\n exit (EXIT_FAILURE);\n }\n\nfor ($\ -a=0; $a< $nseq; $a++)\n {\n $name[$a]=cleanstr\ -ing ($name[$a]);\n $seq[$a]=cleanstring ($seq[$\ -a]);\n $seq[$a]=breakstring($seq[$a], 60);\n \ - \n $line=\">$name[$a]\\n$seq[$a]\\n\";\n \n\ - print \"$line\";\n }\nexit (EXIT_SUCCESS);\n\\ -nsub cleanstring\n {\n my $s=@_[0];\n $s=~s\ -/_hash_symbol_/\\#/g;\n $s=~s/_arobase_symbol_/\ -\\@/g;\n $s=~s/[ \\t]//g;\n return $s;\n }\\ -nsub breakstring\n {\n my $s=@_[0];\n my $s\ -ize=@_[1];\n my @list;\n my $n,$ns, $symbol;\ -\n \n @list=split //,$s;\n $n=0;$ns=\"\";\ -\n foreach $symbol (@list)\n {\n if ( $n==\ -$size)\n {\n $ns.=\"\\n\";\n $n=0;\n }\ -\n $ns.=$symbol;\n $n++;\n }\n return $ns;\ -\n }\n\nsub clean_cr\n {\n my $f=@_[0];\n \ - my $file;\n \n $tmp=\"f$.$$\";\n \n \ -\n open (IN, $f);\n open (OUT, \">$tmp\");\n\ - \n while ( )\n {\n $file=$_;\n $fi\ -le=~s/\\r\\n/\\n/g;\n $file=~s/\\n\\r/\\n/g;\n $fi\ -le=~s/\\r\\r/\\n/g;\n $file=~s/\\r/\\n/g;\n print \ -OUT \"$file\";\n }\n \n close (IN);\n \ - close (OUT);\n return $tmp;\n }\n","use Env \ -qw(HOST);\nuse Env qw(HOME);\nuse Env qw(USER);\n\\ -n\n$query_start=-1;\n$query_end=-1;\n\nwhile (<>)\\ -n {\n if ( /\\/\\//){$in_aln=1;}\n elsif ( \ -$in_aln && /(\\S+)\\s+(.*)/)\n {\n\n\n $name=\ -$1;\n \n\n $seq=$2;\n $seq=~s/\\s//g;\n $se\ -q=~s/\\~/\\-/g;\n $seq=~s/\\./\\-/g;\n if ( $list{\ -$n}{'name'} && $list{$n}{'name'} ne $name)\n {\n\ - print \"$list{$n}{'name'} Vs $name\";\n \\ -n exit (EXIT_FAILURE);\n }\n else\n {\n \ - $list{$n}{'name'}= $name;\n }\n\n $list{$n}{'s\ -eq'}=$list{$n}{'seq'}.$seq;\n \n $nseq=++$n;\n \n \ - }\n else\n {$n=0;}\n }\n\n\nfor ($a=\ -0; $a<$nseq; $a++)\n {\n print \">$list{$a}{'n\ -ame'}\\n$list{$a}{'seq'}\\n\";\n }\n \n","\n\ -use Env qw(HOST);\nuse Env qw(HOME);\nuse Env qw(U\ -SER);\n\n \ - \nuse strict; \ - \nuse warnings;\nuse diagno\ -stics;\n\nmy $in_hit_list, my $in_aln=0, my(%name_\ -list)=(),my (%list)=(),my $n_seq=0; my $test=0;\nm\ -y($j)=0, my $n=0, my $nom, my $lg_query, my %vu=()\ -;\n\nopen (F, \">tmp\");\n\n$/=\"\\n\";\nwhile (<>\ -)\n{\n print F $_;\n if($_ =~ /Query=\\s*(.+\ -?)\\s/i) { $nom=$1;}\n\n if ( /Sequences produc\ -ing significant alignments/){$in_hit_list=1;}\n \ - \n if ($_=~ /^pdb\\|/i) { $_=~ s/pdb\\|//g; }\\ -n if ($_=~ /^(1_\\d+)\\s+\\d+/) { $_=~ s/$1/QUE\ -RY/;}\n \n if ( /^(\\S+).+?\\s+[\\d.]+\\s+\ -([\\de.-]+)\\s+$/ && $in_hit_list) \n {\n my($i\ -d)=$1; # \n $id=~ s/\\|/_/g; #\n if ($id =~ /.+_$/\ -) { chop($id) }; #\n $name_list{$n_seq++}=$id;\n $\ -name_list{$n_seq-1}=~ s/.*\\|//g; \n }\n \\ -n if (/query/i) {$in_aln=1;}\n if ( /^(\\S+)\ -\\s+(\\d+)\\s+([a-zA-Z-]+)\\s+(\\d+)/ || /^(\\S+)(\ -\\s+)(\\-+)(\\s+)/ && ($in_aln == 1))\n {\n my \ -$name=$1;\n my $start=$2;\n my $seq=$3;\n my $end=\ -$4;\n \n if ($name =~ /QUERY/i) { $lg_query=lengt\ -h($seq); }\n\n unless ($test > $n) #m\n {\n my\ -(@seqq)= split('',$seq);\n my($gap_missing)= s\ -calar(@seqq);\n \n while ($gap_missing != \ -$lg_query) { unshift (@seqq,\"-\"); $gap_missing=\ - scalar(@seqq); }\n $seq=join('',@seqq); #m\n\ - }\n \n if ($name =~ /QUERY/i)\n {\n $n=0; %vu\ -=(); $j=0;\n $list{$n}{'real_name'}=\"$nom\";\\ -n } \n else\n {\n unless (exists $vu{$name}) {\ - ++$j;} \n $list{$n}{'real_name'}=$name_list{$\ -j-1};\n }\n \n $list{$n}{'name'}=$name;\n\n $seq=\ -~tr/a-z/A-Z/;\n $list{$n}{'seq'}=$list{$n}{'seq'};\ -\n $list{$n}{'seq'}.=$seq;\n\n $n++;\n $vu{$name}+\ -+;\n $test++;\n } \n \n}\n\nmy @numero=();\n\\ -nfor (my $a=0; $a<$n; $a++) #m\n{\n my $long=le\ -ngth($list{0}{'seq'}); \n my $long1= length($l\ -ist{$a}{'seq'});\n \n while ($long1 ne $long)\\ -n {\n $list{$a}{'seq'}.=\"-\";\n $long1= length\ - ($list{$a}{'seq'});\n } \n \n push (@numero\ -,\"$list{$a}{'name'} $list{$a}{'real_name'}\\n\");\ -\n}\n\nmy %dejavu=();\n\n\nfor (my $i=0; $i<=$#num\ -ero; $i++)\n{\n my $s=\">$list{$i}{'real_name'}\ -\\n$list{$i}{'seq'}\\n\";\n my $k=0;\n \n \ - if (exists $dejavu{$numero[$i]}) {next;}\n els\ -e\n { \n for ($j=0; $j<$n ; $j++)\n {\n if \ -(\"$numero[$i]\" eq \"$numero[$j]\" && $j != $i )\\ -n {\n ++$k;\n $s .=\">$list{$j}{'real_name'}\ -\\n$list{$j}{'seq'}\\n\";\n }\n } \n }\n \ - \n if ($k>0) \n {\n my $cons;\n open (SOR,\\ -">tempo_aln2cons\"); print SOR $s; close SOR ;\n \ -open (COM,\"t_coffee -other_pg seq_reformat -in te\ -mpo_aln2cons -action +aln2cons +upper |\") ; \n \ - while ()\n { \n if (/^>/) { $cons =\">\ -$list{$i}{'real_name'}\\n\"; next;}\n $_=~ s/\\ -\n//g;\n $cons .=$_;\n }\n close COM; unlink (\ -\"tempo_aln2cons\");\n print $cons,\"\\n\"; print \ -F $cons,\"\\n\";\n } \n else { print $s; p\ -rint F $s; }\n \n $dejavu{$numero[$i]}++;\n}\ - #m\n\nexit;\n\n\n\n\n\n\n\n\n\n\n\n","use Env;\n\\ -n\n$tmp_dir=\"\";\n$init_dir=\"\";\n$program=\"tc_\ -generic_method.pl\";\n\n$blast=@ARGV[0];\n\n$name=\ -\"query\";$seq=\"\";\n%p=blast_xml2profile($name,$\ -seq,100, 0, 0, $blast);\n&output_profile (%p);\n\n\ -\nsub output_profile\n {\n my (%profile)=(@_);\ -\n my ($a);\n for ($a=0; $a<$profile{n}; $a+\ -+)\n {\n \n print \">$profile{$a}{name} $prof\ -ile{$a}{comment}\\n$profile{$a}{seq}\\n\";\n \ -}\n return;\n }\nsub file_contains \n {\n \ -my ($file, $tag, $max)=(@_);\n my ($n);\n $n\ -=0;\n \n if ( !-e $file && ($file =~/$tag/))\ - {return 1;}\n elsif ( !-e $file){return 0;}\n \ - else \n {\n open (FC, \"$file\");\n while \ -( )\n {\n if ( ($_=~/$tag/))\n {\n\ - close (FC);\n return 1;\n }\n elsif (\ -$max && $n>$max)\n {\n close (FC);\n retur\ -n 0;\n }\n $n++;\n }\n }\n clo\ -se (FC);\n return 0;\n }\n \n \nsub file\ -2string\n {\n my $f=@_[0];\n my $string, $l\ -;\n open (F,\"$f\");\n while ()\n {\\ -n\n $l=$_;\n #chomp ($l);\n $string.=$l;\n }\\ -n close (F);\n $string=~s/\\r\\n//g;\n $s\ -tring=~s/\\n//g;\n return $string;\n }\n\n\n\n\ -sub tag2value \n {\n \n my $tag=(@_[0]);\n \ - my $word=(@_[1]);\n my $return;\n \n $\ -tag=~/$word=\"([^\"]+)\"/;\n $return=$1;\n r\ -eturn $return;\n }\n \nsub hit_tag2pdbid\n \ -{\n my $tag=(@_[0]);\n my $pdbid;\n \n\ - $tag=~/id=\"(\\S+)\"/;\n $pdbid=$1;\n $p\ -dbid=~s/_//;\n return $pdbid;\n }\nsub id2pdbi\ -d \n {\n my $id=@_[0];\n \n if ($id =~/pdb\ -/)\n {\n $id=~/pdb(.*)/;\n $id=$1;\n }\n\ - $id=~s/[|¦_]//g;\n return $id;\n }\nsub se\ -t_blast_type \n {\n my $file =@_[0];\n if (\ -&file_contains ($file,\"EBIApplicationResult\",100\ -)){$BLAST_TYPE=\"EBI\";}\n elsif (&file_contain\ -s ($file,\"NCBI_BlastOutput\",100)) {$BLAST_TYPE=\\ -"NCBI\";}\n else\n {\n $BLAST_TYPE=\"\";\n\ - }\n return $BLAST_TYPE;\n }\nsub blast_x\ -ml2profile \n {\n my ($name,$seq,$maxid, $mini\ -d, $mincov, $file)=(@_);\n my (%p, $a, $string,\ - $n);\n \n\n\n if ($BLAST_TYPE eq \"EBI\" ||\ - &file_contains ($file,\"EBIApplicationResult\",10\ -0)){%p=ebi_blast_xml2profile(@_);}\n elsif ($BL\ -AST_TYPE eq \"NCBI\" || &file_contains ($file,\"NC\ -BI_BlastOutput\",100)){%p=ncbi_blast_xml2profile(@\ -_);}\n else \n {\n print \"************ ER\ -ROR: Blast Returned an unknown XML Format ********\ -**************\";\n die;\n }\n for ($a=0; \ -$a<$p{n}; $a++)\n {\n my $name=$p{$a}{name};\\ -n $p{$name}{seq}=$p{$a}{seq};\n }\n return\ - %p;\n }\nsub ncbi_blast_xml2profile \n {\n m\ -y ($name,$seq,$maxid, $minid, $mincov, $string)=(@\ -_);\n my ($L,$l, $a,$b,$c,$d,$nhits,@identifyer\ -L);\n \n \n $seq=~s/[^a-zA-Z]//g;\n $L\ -=length ($seq);\n \n %hit=&xml2tag_list ($st\ -ring, \"Hit\");\n \n \n for ($nhits=0,$a=\ -0; $a<$hit{n}; $a++)\n {\n my ($ldb,$id, $ide\ -ntity, $expectation, $start, $end, $coverage, $r);\ -\n my (%ID,%DE,%HSP);\n \n $ldb=\"\";\n\n %ID=&xml\ -2tag_list ($hit{$a}{body}, \"Hit_id\");\n $identif\ -yer=$ID{0}{body};\n \n %DE=&xml2tag_list ($hit{$a}\ -{body}, \"Hit_def\");\n $definition=$DE{0}{body};\\ -n \n %HSP=&xml2tag_list ($hit{$a}{body}, \"Hsp\");\ -\n for ($b=0; $b<$HSP{n}; $b++)\n {\n my (%S\ -TART,%END,%E,%I,%Q,%M);\n\n \n %START=&xml2ta\ -g_list ($HSP{$b}{body}, \"Hsp_query-from\");\n \ - %HSTART=&xml2tag_list ($HSP{$b}{body}, \"Hsp_hit-\ -from\");\n \n %LEN= &xml2tag_list ($HSP{$\ -b}{body}, \"Hsp_align-len\");\n %END= &xml2ta\ -g_list ($HSP{$b}{body}, \"Hsp_query-to\");\n %\ -HEND= &xml2tag_list ($HSP{$b}{body}, \"Hsp_hit-to\ -\");\n %E=&xml2tag_list ($HSP{$b}{body}, \\ -"Hsp_evalue\");\n %I=&xml2tag_list ($HSP{$\ -b}{body}, \"Hsp_identity\");\n %Q=&xml2tag_lis\ -t ($HSP{$b}{body}, \"Hsp_qseq\");\n %M=&xm\ -l2tag_list ($HSP{$b}{body}, \"Hsp_hseq\");\n \ - \n for ($e=0; $e<$Q{n}; $e++)\n\n {\n\ - $qs=$Q{$e}{body};\n $ms=$M{$e}{body};\n if ($s\ -eq eq\"\"){$seq=$qs;$L=length($seq);}\n \n $expe\ -ctation=$E{$e}{body};\n $identity=($LEN{$e}{body}\ -==0)?0:$I{$e}{body}/$LEN{$e}{body}*100;\n $start=\ -$START{$e}{body};\n $end=$END{$e}{body};\n $Hsta\ -rt=$HSTART{$e}{body};\n $Hend=$HEND{$e}{body};\n \ -\n $coverage=(($end-$start)*100)/$L;\n\n \n if (\ -$identity>$maxid || $identity<$minid || $coverage<\ -$mincov){next;}\n @lr1=(split (//,$qs));\n @lr2=\ -(split (//,$ms));\n $l=$#lr1+1;\n for ($c=0;$c<$\ -L;$c++){$p[$nhits][$c]=\"-\";}\n for ($d=0,$c=0; \ -$c<$l; $c++)\n {\n $r=$lr1[$c];\n if \ -( $r=~/[A-Za-z]/)\n {\n \n $p[$nhits][$\ -d + $start-1]=$lr2[$c];\n $d++;\n }\n \ -}\n $Qseq[$nhits]=$qs;\n $Hseq[$nhits]=$ms;\n $\ -QstartL[$nhits]=$start;\n $HstartL[$nhits]=$Hstar\ -t;\n $identityL[$nhits]=$identity;\n $endL[$nhit\ -s]=$end;\n $definitionL[$nhits]=$definition;\n $\ -identifyerL[$nhits]=$identifyer;\n $comment[$nhit\ -s]=\"$ldb|$identifyer [Eval=$expectation][id=$iden\ -tity%][start=$Hstart end=$Hend]\";\n $nhits++;\n \ - }\n }\n }\n \n $profile{n}=0;\n\ - $profile{$profile{n}}{name}=$name;\n $profi\ -le{$profile{n}}{seq}=$seq;\n $profile {n}++;\n \ - \n for ($a=0; $a<$nhits; $a++)\n {\n $n\ -=$a+1;\n \n $profile{$n}{name}=\"$name\\_$a\";\n $\ -profile{$n}{seq}=\"\";\n $profile{$n}{Qseq}=$Qseq[\ -$a];\n $profile{$n}{Hseq}=$Hseq[$a];\n $profile{$n\ -}{Qstart}=$QstartL[$a];\n $profile{$n}{Hstart}=$Hs\ -tartL[$a];\n $profile{$n}{identity}=$identityL[$a]\ -;\n $profile{$n}{definition}=$definitionL[$a];\n $\ -profile{$n}{identifyer}=$identifyerL[$a];\n $profi\ -le{$n}{comment}=$comment[$a];\n for ($b=0; $b<$L; \ -$b++)\n {\n if ($p[$a][$b])\n {\n $pr\ -ofile{$n}{seq}.=$p[$a][$b];\n }\n else\n\ - {\n $profile{$n}{seq}.=\"-\";\n }\n \ - }\n }\n \n $profile{n}=$nhits+1;\n \ - return %profile;\n }\nsub ebi_blast_xml2profile \ -\n {\n my ($name,$seq,$maxid, $minid, $mincov,\ - $string)=(@_);\n my ($L,$l, $a,$b,$c,$d,$nhits\ -,@identifyerL,$identifyer);\n \n\n \n $se\ -q=~s/[^a-zA-Z]//g;\n $L=length ($seq);\n %hi\ -t=&xml2tag_list ($string, \"hit\");\n \n for\ - ($nhits=0,$a=0; $a<$hit{n}; $a++)\n {\n my (\ -$ldb,$id, $identity, $expectation, $start, $end, $\ -coverage, $r);\n my (%Q,%M,%E,%I);\n \n $ldb=&tag2\ -value ($hit{$a}{open}, \"database\");\n $identifye\ -r=&tag2value ($hit{$a}{open}, \"id\");\n\n $descri\ -ption=&tag2value ($hit{$a}{open}, \"description\")\ -;\n \n %Q=&xml2tag_list ($hit{$a}{body}, \"querySe\ -q\");\n %M=&xml2tag_list ($hit{$a}{body}, \"matchS\ -eq\");\n %E=&xml2tag_list ($hit{$a}{body}, \"expec\ -tation\");\n %I=&xml2tag_list ($hit{$a}{body}, \"i\ -dentity\");\n \n\n for ($b=0; $b<$Q{n}; $b++)\n \ -{\n \n \n $qs=$Q{$b}{body};\n $ms=\ -$M{$b}{body};\n if ($seq eq\"\"){$seq=$qs;$L=l\ -ength($seq);}\n\n $expectation=$E{$b}{body};\n\ - $identity=$I{$b}{body};\n \n \n \ - $start=&tag2value ($Q{$b}{open}, \"start\");\n\ - $end=&tag2value ($Q{$b}{open}, \"end\");\n \ - $startM=&tag2value ($M{$b}{open}, \"start\");\n \ - $endM=&tag2value ($M{$b}{open}, \"end\");\n \ - $coverage=(($end-$start)*100)/$L;\n \n # \ -print \"$id: ID: $identity COV: $coverage [$start \ -$end]\\n\";\n \n \n if ($identity>$max\ -id || $identity<$minid || $coverage<$mincov){next;\ -}\n # print \"KEEP\\n\";\n\n \n @lr1=(\ -split (//,$qs));\n @lr2=(split (//,$ms));\n \ - $l=$#lr1+1;\n for ($c=0;$c<$L;$c++){$p[$nhit\ -s][$c]=\"-\";}\n for ($d=0,$c=0; $c<$l; $c++)\\ -n {\n $r=$lr1[$c];\n if ( $r=~/[A-Za-z]/)\\ -n {\n \n $p[$nhits][$d + $start-1]=$l\ -r2[$c];\n $d++;\n }\n }\n \n \\ -n $identifyerL[$nhits]=$identifyer;\n $com\ -ment[$nhits]=\"$ldb|$identifyer [Eval=$expectation\ -][id=$identity%][start=$startM end=$endM]\";\n \ - $nhits++;\n }\n }\n \n $profile{n}=0\ -;\n $profile{$profile{n}}{name}=$name;\n $pr\ -ofile{$profile{n}}{seq}=$seq;\n $profile {n}++;\ -\n \n for ($a=0; $a<$nhits; $a++)\n {\n\ - $n=$a+1;\n $profile{$n}{name}=\"$name\\_$a\";\n $\ -profile{$n}{seq}=\"\";\n $profile{$n}{identifyer}=\ -$identifyerL[$a];\n \n $profile{$n}{comment}=$comm\ -ent[$a];\n for ($b=0; $b<$L; $b++)\n {\n if \ -($p[$a][$b])\n {\n $profile{$n}{seq}.=$p[$a\ -][$b];\n }\n else\n {\n $profile{\ -$n}{seq}.=\"-\";\n }\n }\n }\n $pr\ -ofile{n}=$nhits+1;\n \n return %profile;\n \ -}\n\nsub blast_xml2hit_list\n {\n my $string=(\ -@_[0]);\n return &xml2tag_list ($string, \"hit\\ -");\n }\nsub xml2tag_list \n {\n my ($string\ -_in,$tag)=@_;\n my $tag_in, $tag_out;\n my %\ -tag;\n \n if (-e $string_in)\n {\n $str\ -ing=&file2string ($string_in);\n }\n else\\ -n {\n $string=$string_in;\n }\n $tag_\ -in1=\"<$tag \";\n $tag_in2=\"<$tag>\";\n $ta\ -g_out=\"/$tag>\";\n $string=~s/>/>##1/g;\n $\ -string=~s//g;\n @l=($string=~/(\\<[^>]+\ -\\>)/g);\n $tag{n}=0;\n $in=0;$n=-1;\n \n \\ -n\n foreach $t (@l)\n {\n\n $t=~s/<#//;\n \ -$t=~s/#>//;\n \n if ( $t=~/$tag_in1/ || $t=~/$tag_\ -in2/)\n {\n \n $in=1;\n $tag{$tag{n}}{o\ -pen}=$t;\n $n++;\n \n }\n elsif ($t=~/$t\ -ag_out/)\n {\n \n\n $tag{$tag{n}}{close}\ -=$t;\n $tag{n}++;\n $in=0;\n }\n elsif (\ -$in)\n {\n \n $tag{$tag{n}}{body}.=$t;\n \ - }\n }\n \n return %tag;\n }\n\n\n\n\n"\ -,"use Env qw(HOST);\nuse Env qw(HOME);\nuse Env qw\ -(USER);\nwhile (<>)\n {\n if ( /^>(\\S+)/)\n \ - {\n if ($list{$1})\n {\n print \">$1_$li\ -st{$1}\\n\";\n $list{$1}++;\n }\n else\n {\ -\n print $_;\n $list{$1}=1;\n }\n }\ -\n else\n {\n print $_;\n }\n }\n \ - \n","\n\n\nuse Env qw(HOST);\nuse Env qw(HOME);\ -\nuse Env qw(USER);\n\n\nopen (F,$ARGV[0]);\nwhile\ - ( <>)\n {\n @x=/([^:,;\\)\\(\\s]+):[^:,;\\)\\\ -(]*/g;\n @list=(@list,@x);\n }\n$n=$#list+1;\n\ -foreach $n(@list){print \">$n\\nsequence\\n\";}\n\\ -n\nclose (F);\n","\nopen (F, $ARGV[0]);\n\nwhile (\ - )\n {\n @l=($_=~/(\\S+)/g);\n \n $na\ -me=shift @l;\n \n print STDOUT \"\\n>$name\\\ -n\";\n foreach $e (@l){$e=($e eq \"0\")?\"O\":\\ -"I\";print \"$e\";}\n }\nclose (F);\n\n \\ -n \n","use Env qw(HOST);\nuse Env qw(HOME);\nus\ -e Env qw(USER);\n\n$tmp=\"$ARGV[0].$$\";\nopen (IN\ -, $ARGV[0]);\nopen (OUT, \">$tmp\");\n\nwhile ( )\n {\n $file=$_;\n $file=~s/\\r\\n/\\n/g\ -;\n $file=~s/\\n\\r/\\n/g;\n $file=~s/\\r\\r\ -/\\n/g;\n $file=~s/\\r/\\n/g;\n print OUT \"\ -$file\";\n }\nclose (IN);\nclose (OUT);\n\nopen (\ -OUT, \">$ARGV[0]\");\nopen (IN, \"$tmp\");\n\nwhil\ -e ( )\n{\n print OUT \"$_\";\n}\nclose (IN);\\ -nclose (OUT);\nunlink ($tmp);\n\n"}; -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/random.c b/binaries/src/tcoffee/t_coffee_source/random.c deleted file mode 100644 index 84b6c8e..0000000 --- a/binaries/src/tcoffee/t_coffee_source/random.c +++ /dev/null @@ -1,94 +0,0 @@ -/* -* -* Rand.c -* -* - linear and additive congruential random number generators -* (see R. Sedgewick, Algorithms, Chapter 35) -* -* Implementation: R. Fuchs, EMBL Data Library, 1991 -* -*/ - - - -#define m1 10000 -#define m 100000000 - -/* linear congruential method -* -* linrand() returns an unsigned long random number in the range 0 to r-1 -*/ - - - -static unsigned long mult(unsigned long p, unsigned long q) -{ - unsigned long p1,p0,q1,q0; - - p1 = p/m1; p0 = p % m1; - q1 = q/m1; q0 = q % m1; - return (unsigned long)((((p0*q1 + p1*q0) % m1) * m1 + p0*q0) % m); -} - - -/* additive congruential method -* -* addrand() returns an unsigned long random number in the range 0 to r-1 -* The random number generator is initialized by addrandinit() -*/ - -static unsigned long j; -static unsigned long a[55]; - -unsigned long addrand(unsigned long r) -{ -int x,y; -/* fprintf(stdout,"\n j = %d",j); */ - j = (j + 1) % 55; -/* fprintf(stdout,"\n j = %d",j); */ - x = (j+23)%55; - y = (j+54)%55; - a[j] = (a[x] + a[y]) % m; -/* a[j] = (a[(j+23)%55] + a[(j+54)%55]) % m; */ -/* fprintf(stdout,"\n a[j] = %d",a[j]); */ - return( ((a[j] / m1) * r) / m1 ); -} - -void addrandinit(unsigned long s) -{ - a[0] = s; - j = 0; - do { - ++j; - a[j] = (mult(31,a[j-1]) + 1) % m; - } while (j<54); -} -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/reformat.c b/binaries/src/tcoffee/t_coffee_source/reformat.c deleted file mode 100644 index 959a287..0000000 --- a/binaries/src/tcoffee/t_coffee_source/reformat.c +++ /dev/null @@ -1,11819 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "dp_lib_header.h" -#include "define_header.h" -#include "dev1_lib_header.h" //JM_STRAT - -#define ACTION(x) ((n_actions>=(x+1))?action_list[x]:NULL) -#define ACTION2(x,y) ((n_actions>=(x+1))?action_list[x]:y) -#define ATOI_ACTION(x) ((ACTION(x)!=NULL)?(atoi(ACTION(x))):0) - -/**************************************************************************************************/ -/***************************** SEQ_REFORMAT ******************************************/ -/**************************************************************************************************/ -int output_transitions(char *outfile, Alignment *A); -static int output_age_matrix ( char *outfile, int val); -int SeqGCGCheckSum(char *seq, int len); -static Sequence *seq2year ( Sequence *S, int modulo); -static Sequence* output_n_pavie_age_channel (Sequence *S, char *name, int n); -static Sequence* output_pavie_age_channel (Sequence *S, char *name, int modulo); - -static int output_seq2struc(char *outfile, Alignment *A); -void output_conservation_statistics ( char *file, Alignment *A); -/**************************************************************************************************/ -/***************************** SEQ_REFORMAT ******************************************/ -/**************************************************************************************************/ -int seq_reformat ( int argc, char **in_argv) - { - - Sequence_data_struc *D1=NULL; - Sequence_data_struc *D2=NULL; - Sequence_data_struc *D_ST=NULL; - Action_data_struc *RAD; - - - - int a, b; - - char *in_format; - char *in2_format; - char *out_format; - char *in_file; - char *in2_file; - char *out_file; - char *out2_file; - char *struc_in_format; - char *struc_out_format; - char *struc_in_file; - char *struc_out_file; - char**action_list; - char **action; - char *rename_file; - char *cache; - char ***rename_list=NULL; - int code=CODE; - char **argv; - - int n_actions=0; - int print_format=0; - /*INITIALIZATIONS*/ - - RAD=vcalloc ( 1, sizeof ( Action_data_struc)); - RAD->keep_case=1; - declare_name (cache);sprintf ( cache, "use"); - declare_name(in_file); - declare_name(in2_file); - declare_name(out_file); - declare_name(out2_file); - declare_name(struc_in_format); - declare_name(struc_out_format); - declare_name(RAD->coor_file); - - declare_name(struc_in_file); - declare_name(struc_out_file); - declare_name(in_format); - declare_name(in2_format); - declare_name(out_format); - declare_name(rename_file); - - - argv=break_list ( in_argv, &argc, "=;, \n"); - - action_list=declare_char ( 100, 100); - -/*END INITIALIZATION*/ - - addrandinit ( (unsigned long) 500); - - if ( argc==1 || strm6 ( argv[1], "h", "-h", "help", "-help", "-man", "?")) - { - - fprintf ( stdout, "\n%s (%s,%s,%s [%s])\n",PROGRAM, VERSION,AUTHOR, DATE, URL); - fprintf ( stdout, "\n*********** MINIMUM SYNTAX *****************"); - fprintf ( stdout, "\nseq_reformat -in -output "); - fprintf ( stdout, "\nSome File formats are automatically recognised"); - fprintf ( stdout, "\nSee Format section"); - fprintf ( stdout, "\n"); - fprintf ( stdout, "\n*********** MAIN FLAGS ******************"); - fprintf ( stdout, "\n-in name........Name of the file read"); - - - fprintf ( stdout, "\n-input format......Name of the format read, see Input Format Section"); - fprintf ( stdout, "\n...................Automatic detection, except for seqs of numbers"); - fprintf ( stdout, "\n...................-input number_aln | number_fasta"); - fprintf ( stdout, "\n-in2 fname......Second alignment"); - fprintf ( stdout, "\n-input2 format.....See -input"); - fprintf ( stdout, "\n-exon_boundaries obj file"); - fprintf ( stdout, "\n-out fname......Output file (defualt is STDOUT"); - fprintf ( stdout, "\n-output format.....Output Format, default is fasta_aln"); - fprintf ( stdout, "\n-struc_in name...File containing a coded aln"); - fprintf ( stdout, "\n-struc_in_f format.See -input and output format section"); - fprintf ( stdout, "\n-struc_out fname..Name of the output structure"); - fprintf ( stdout, "\n-struc_out_f symbol"); - fprintf ( stdout, "\n-keep_case=on|off..keep case, On by default"); - fprintf ( stdout, "\n-action +ac1 +ac2..See the action section"); - fprintf ( stdout, "\n-rename .....Rename the sequences following indications"); - fprintf ( stdout, "\n...................File Format: One couple /line"); - fprintf ( stdout, "\n...................Rename order into "); - fprintf ( stdout, "\n...................code file: -output code_name"); - fprintf ( stdout, "\n-code Rename file to "); - fprintf ( stdout, "\n-decode Rename file to "); - fprintf ( stdout, "\n-no_warning........Suppresses all warnings"); - fprintf ( stdout, "\n-cache.............use,ignore,update,local, DirectoryName"); - - - fprintf ( stdout, "\n"); - - fprintf ( stdout, "\n*********** REFORMAT ACTIONS *****************"); - fprintf ( stdout, "\n +Xaction.............Specifies which file undergoes the action"); - fprintf ( stdout, "\n +Xaction.............X=1: -in"); - fprintf ( stdout, "\n +Xaction.............X=2: -in2"); - fprintf ( stdout, "\n +Xaction.............X=3: -struc_in"); - fprintf ( stdout, "\n +name2unique_name....replace duplicated name with name_#"); - fprintf ( stdout, "\n +swap_header........,swapp comments: replace comments/name in 1 by in 2"); - fprintf ( stdout, "\n +swap_lib_header.F...Replace the sequences in the tc_lib (-in) with those in F"); - fprintf ( stdout, "\n .....................F is a legal FASTA file"); - - - fprintf ( stdout, "\n +translate[0-2]......Translate on Frame 0, 1, 2 "); - fprintf ( stdout, "\n +translate[3]........longuest ORF on direct strand"); - fprintf ( stdout, "\n +translate[4]........longuest ORF on direct+complementary strand"); - - - fprintf ( stdout, "\n +add_scale...addscale below aln"); - - fprintf ( stdout, "\n +rm_gap n ...........Removes col with n%% gap [n=100]"); - fprintf ( stdout, "\n +rmgap_col SEQ1:SEQ2.Removes column with a gap in SEQ [#] "); - - fprintf ( stdout, "\n +backtranslate.......Random Backtranslation"); - fprintf ( stdout, "\n +complement..........Produces the reverse complement"); - - fprintf ( stdout, "\n +reorder.............Reorders sequences of according to "); - fprintf ( stdout, "\n .........random......Random_order"); - fprintf ( stdout, "\n .........tree........Tree Order (in2)"); - fprintf ( stdout, "\n +reorder_columns.....Reorders sequences of according to "); - fprintf ( stdout, "\n .........random......Random_order"); - fprintf ( stdout, "\n .........tree..mode..Tree Order (comuted with mode: sarmat, idmat, blosum62mt..."); - fprintf ( stdout, "\n +aln2random_aln SCR..Randomize the aln, S: swap sequences names"); - fprintf ( stdout, "\n .....................Swap residues within colums"); - fprintf ( stdout, "\n .....................Swap residues across the aln"); - fprintf ( stdout, "\n +aln2sample......N......"); - fprintf ( stdout, "\n +aln2bootstrap...N......"); - - - fprintf ( stdout, "\n +chain...............Identifies all the intermediate sequences from <-in>"); - fprintf ( stdout, "\n .....................needed to join every sequence pair in <-in2>"); - - fprintf ( stdout, "\n +aln2cons mat_name..Ouputs a consensus sequence"); - fprintf ( stdout, "\n .....................The consensus is determined using mat"); - fprintf ( stdout, "\n .....................By Default, mat=blosum62mt, name=Cons"); - fprintf ( stdout, "\n +aln2resindex........Prints the sequence index of each residue in -in for each -in2 sequence"); - fprintf ( stdout, "\n +collapse_aln | file name"); - fprintf ( stdout, "\n .....................Replaces a group of sequences with its consensus"); - fprintf ( stdout, "\n .....................The replacement sequence is named "); - fprintf ( stdout, "\n .....................List of sequences can be provided via a file"); - fprintf ( stdout, "\n .....................File:>new_name seq1 seq2 seq3...."); - fprintf ( stdout, "\n +original_seqnos.....Keep original seqnos [SWITCH]"); - fprintf ( stdout, "\n +seqnos..............Print Seqnos [SWITCH]"); - fprintf ( stdout, "\n +code_dna_aln........Undocumented") ; - fprintf ( stdout, "\n +grep..[NAME|SEQ|COMMENT]..[KEEP|REMOVE]..[string]......"); - fprintf ( stdout, "\n .....................Keeps or Removes Sequences matching string"); - fprintf ( stdout, "\n +extract_block | |"); - fprintf ( stdout, "\n .....................Extract column pos OR [start to end["); - fprintf ( stdout, "\n ..................... Format"); - fprintf ( stdout, "\n .......................seq start end | seq pos"); - fprintf ( stdout, "\n .......................# for comments"); - fprintf ( stdout, "\n .......................! seq offset_value (0 by default)"); - fprintf ( stdout, "\n .....................Can extract as many positions as needed"); - fprintf ( stdout, "\n .....................seq=cons: measure positions on the full aln"); - fprintf ( stdout, "\n +cat_aln.............Concatenates the alignments input via -in and -in2"); - fprintf ( stdout, "\n +cat_aln.............-if no -in2, -in is expected to be a list of alignments to concatenate"); - - fprintf ( stdout, "\n +msalist2cat_pwaln.min..max"); - fprintf ( stdout, "\n .....................extract all pw projections and conctaenates those\n"); - fprintf ( stdout, "\n .....................where id>=min and id<=max\n"); - fprintf ( stdout, "\n .....................min and max can be omitted (min=0, max=100)\n"); - - fprintf ( stdout, "\n +seq2blast ..gather all possible homologues from NR (EBI BLAST)"); - fprintf ( stdout, "\n +seq2msa ....makes a standard progressive alignment using matrix"); - fprintf ( stdout, "\n +realign_block "); - fprintf ( stdout, "\n .....................Realign column c1 to c2 (non inc.) with pg)"); - fprintf ( stdout, "\n .....................pg reads fasta and outputs fasta"); - fprintf ( stdout, "\n .....................pg -infile= -outfile="); - fprintf ( stdout, "\n +extract_seq seq_name (start end seq_name start end...) | filename"); - fprintf ( stdout, "\n .....................seq_name='*': every seq"); - fprintf ( stdout, "\n .....................start='*' : real start"); - fprintf ( stdout, "\n .....................end='*' : real end"); - fprintf ( stdout, "\n .....................filename: fasta format"); - fprintf ( stdout, "\n +extract_seq_list name1 name2"); - fprintf ( stdout, "\n .....................Extracts entire sequences"); - fprintf ( stdout, "\n +remove_seq sn1 sn2..Removes sequences sn1, sn2..."); - fprintf ( stdout, "\n +remove_seq empty....Removes empty sequences (gap only)"); - fprintf ( stdout, "\n +remove_seq unique...Remove all multiple occurences except the first"); - fprintf ( stdout, "\n +thread_profile_on_msa "); - fprintf ( stdout, "\n .....................Threads a list of profiles on corresponding seq"); - fprintf ( stdout, "\n .....................File: >seqname _R_ [nlines]"); - - fprintf ( stdout, "\n +thread_dna_on_prot_aln"); - fprintf ( stdout, "\n .....................-in DNA.seq and -in2 AA.aln"); - fprintf ( stdout, "\n +thread_struc_on_aln"); - fprintf ( stdout, "\n .....................-in structure and -in2 aln"); - fprintf ( stdout, "\n +use_cons............Use the consensus for n[SWITCH]"); - fprintf ( stdout, "\n +upper.n|[n1-n2].....n omitted sets everything to upper case"); - fprintf ( stdout, "\n .....................To use n: provide a number_aln via:"); - fprintf ( stdout, "\n .....................-struc_in -struc_in_f number_aln"); - fprintf ( stdout, "\n .....................if use_cons is set n, is read on the cons"); - fprintf ( stdout, "\n .....................n: will upper every residue with a value of n in struc_in"); - fprintf ( stdout, "\n .....................[n1-n2]: upper residues between n1 and n2"); - fprintf ( stdout, "\n +lower n|[n1-n2]....See +upper"); - fprintf ( stdout, "\n +switchcase n|[n1-n2]See +upper"); - fprintf ( stdout, "\n +color_residue | file"); - fprintf ( stdout, "\n .....................File: seq_name pos color"); - fprintf ( stdout, "\n .....................color: 0-9"); - fprintf ( stdout, "\n +edit_residue | file"); - fprintf ( stdout, "\n .....................File: seq_name pos color"); - fprintf ( stdout, "\n .....................edit: upper|lower|symbol"); - - - - fprintf ( stdout, "\n +keep n|[n1-n2]....Only keep residues that have a score between n1 and n2"); - - fprintf ( stdout, "\n +invert..............Inverts the sequences: CAT => TAC"); - fprintf ( stdout, "\n +rotate name Rotate an MSA, names each sequence name_col#"); - fprintf ( stdout, "\n +convert n|[n1-n2] s1 s2 ...."); - fprintf ( stdout, "\n +merge_annotation.... "); - - fprintf ( stdout, "\n .....................Converts residues with your alignment"); - fprintf ( stdout, "\n .....................similar to upper"); - fprintf ( stdout, "\n .....................s1: ABCDe turns every ABCD into e"); - fprintf ( stdout, "\n .....................s1: #e turns any residue into e"); - fprintf ( stdout, "\n aln2short_aln L C S..Turns sequences into shorter sequences"); - fprintf ( stdout, "\n .....................L: list of residues to keep"); - fprintf ( stdout, "\n .....................S: Size of Streches replaced by symbol C"); - - - fprintf ( stdout, "\n +random n l..........Generates N random sequences of len l"); - fprintf ( stdout, "\n .....................You must provide a file with -in"); - fprintf ( stdout, "\n +count n|[n1-n2] s1 s2...."); - fprintf ( stdout, "\n .....................Counts residues with your alignment"); - fprintf ( stdout, "\n .....................similar to convert"); - fprintf ( stdout, "\n +print_format........prints the format name"); - fprintf ( stdout, "\n +keep_name...........Keep the original sequence name on extraction"); - - fprintf ( stdout, "\n +remove_aa pos Ml Ncycle Random_len"); - fprintf ( stdout, "\n .....................Randomly modifies an alignment"); - fprintf ( stdout, "\n .....................pos=0: chosen randomly"); - fprintf ( stdout, "\n .....................MaxLen of the deletions, Ncycle: number of cycles"); - fprintf ( stdout, "\n .....................Random_len: 0 sets the len to maxlen, 1 to a random value"); - fprintf ( stdout, "\n +remove_nuc.x........Remove Position 1, 2 or 3 of every codon"); - fprintf ( stdout, "\n +evaluate matrix..gop..gep"); - fprintf ( stdout, "\n .....................Make a similarity evaluation with matrix"); - fprintf ( stdout, "\n .....................use -output=score_ascii, or score_html."); - fprintf ( stdout, "\n .....................You can filter on the values"); - fprintf ( stdout, "\n +evaluate matrix..gop..gep"); - fprintf ( stdout, "\n .....................Make an SP evaluation with matrix"); - fprintf ( stdout, "\n .....................Uses Natural Gap penalties"); - fprintf ( stdout, "\n .....................gop and gep must be negative"); - fprintf ( stdout, "\n .....................use -output=color_ascii, color_html to get a color display"); - - fprintf ( stdout, "\n.....+evaluate_lat........Make a lateral evaluation with matrix"); - fprintf ( stdout, "\n +msa_weight proc.....Computes weights using the procedure"); - fprintf ( stdout, "\nRNA analysis Post Processing___________________________________________________"); - fprintf ( stdout, "\n +aln2alifold.........Turns the MSA into a consensus structure"); - fprintf ( stdout, "\n +add_alifold.........adds an alifold consensus structure"); - - fprintf ( stdout, "\n +alifold2analyze.mode..mode=stat_cache_list_aln_color_html_ps_usegap"); - fprintf ( stdout, "\n .......................stat: compile Number of compensated mutations"); - fprintf ( stdout, "\n .......................cache: ascii-code compensated mutations on aln"); - fprintf ( stdout, "\n .......................html: color-code compensated mutations on aln"); - fprintf ( stdout, "\n .......................aln: mark compensated mutations on stockholm aln"); - fprintf ( stdout, "\n .......................usegap: do not ignore positions with gaps"); - - fprintf ( stdout, "\n +cmp_RNAfold.........compares the sec struc of in1 and in2 (computes them with alifold if missing)"); - - fprintf ( stdout, "\nMSA Post Processing___________________________________________________"); - fprintf ( stdout, "\n +force_aln filename|seq1 res1 seq2 res2"); - fprintf ( stdout, "\n .....................Forces residue 1 of seq1 to be aligned with res2 of seq 2"); - fprintf ( stdout, "\n .....................In a file, there must be one pair of interaction/line"); - fprintf ( stdout, "\n +sim_filter[_aln_Ix_iy_Cz_cw "); - fprintf ( stdout, "\n ....................._, aln is assumed"); - fprintf ( stdout, "\n ....................._I max identity to seq"); - fprintf ( stdout, "\n ....................._i min identity to seq"); - fprintf ( stdout, "\n ....................._C max cov on seq"); - fprintf ( stdout, "\n ....................._c min cov on seq"); - fprintf ( stdout, "\n +trim[_aln_%%%%50_n111_N50_T_Fn_fS_pS_max_sim] [string2]"); - fprintf ( stdout, "\n ....................._, aln is assumed"); - fprintf ( stdout, "\n ....................._%%%%"); - fprintf ( stdout, "\n ....................._max Or _min "); - fprintf ( stdout, "\n ....................._cov Or _sim Filter according to the coverage [Def: _sim]"); - fprintf ( stdout, "\n ....................._n "); - fprintf ( stdout, "\n ....................._N"); - fprintf ( stdout, "\n ....................._T Reorder the sequences according to a tree BEFORE triming"); - fprintf ( stdout, "\n ....................._Fn Keep only sequences that have AT LEAST ONE residue aligned"); - fprintf ( stdout, "\n ......................in the n first and n last columns. "); - fprintf ( stdout, "\n ....................._O Remove outlayers that have less than min average sim with other sequences"); - - fprintf ( stdout, "\n .....................Keeping Sequences: Sequences provided via -in2 will be kept"); - - fprintf ( stdout, "\n .....................Keeping Sequences: Sequences whose name contains in field fS will be kept"); - fprintf ( stdout, "\n ....................._f designates a field"); - fprintf ( stdout, "\n ..................... is a Perl regular expression"); - fprintf ( stdout, "\n +aln2unalign Mode Penalty Threshold"); - fprintf ( stdout, "\n .....................Identifies all the streches less conserved than than the average"); - fprintf ( stdout, "\n .....................Mode: lower|number|unalign Act on all the resiues withs score with tree <-in2>)"); - fprintf ( stdout, "\n ......................+tree_scan help to get P1 information"); - fprintf ( stdout, "\n ......................+aln2tree help to get P2 information"); - - fprintf ( stdout, "\n .....................-in and -in2 can contain different taxons"); - fprintf ( stdout, "\n +treelist2groups.N....count all topologies within a list of trees"); - fprintf ( stdout, "\n .....................-in is in fasta format with each name being a newick file"); - fprintf ( stdout, "\n .....................-in2 can be a list of sequences used to trim the trees"); - fprintf ( stdout, "\n ......................N can be used to unresolve the trees with Depth N"); - fprintf ( stdout, "\n +treelist2lti.N.C.....Reports the average stability of each sequence neighborhood"); - fprintf ( stdout, "\n ......................Species can be selected via -in2 [Fasta file with Taxon names]"); - fprintf ( stdout, "\n ......................OR the sequences observed in C% of the files are kept [Def: C=100]"); - - - fprintf ( stdout, "\n +treelist2seq.C.......Reports the species observed in C% of the trees"); - fprintf ( stdout, "\n +treelist2splits......List and counts all the splits in a list of trees"); - fprintf ( stdout, "\n ......................splits can be restricted to a list of sequences provided via -in2"); - fprintf ( stdout, "\n +treelist2dmat.......outputs a distance matrix for a list of trees"); - - fprintf ( stdout, "\n +tree_compute n s....Computes a tree using the MSA provided with -in"); - fprintf ( stdout, "\n ....................n:0-9, controls the way the MSA is filtered"); - fprintf ( stdout, "\n ....................s:pam250mt|blosum62mt|categories|enthropy"); - fprintf ( stdout, "\n ....................s:controls the column evaluation in MSA"); - fprintf ( stdout, "\n +change_distances.f.f:float, sets all the distances to f in the tree"); - fprintf ( stdout, "\n +change_bootstrap n..:n=0 removes all the bootstrap values"); - fprintf ( stdout, "\n .....................:n!=0 adds a the value n to every node"); - fprintf ( stdout, "\n +tree2dpatree........Replaces tree distances with the minimum %%ID in"); - fprintf ( stdout, "\n .....................the depending subgroup. The ID is measured on an"); - fprintf ( stdout, "\n .....................-in=TREE -in2=ALN"); - fprintf ( stdout, "\n +unroot..............Removes the root in the input tree"); - fprintf ( stdout, "\n +tree2group.N.I.P....Reports all the tree subgroup with at most Nseq"); - fprintf ( stdout, "\n .....................and at min I%% identity. Output format can be read by"); - fprintf ( stdout, "\n .....................collapse_tree. New groups are named P_1, P_2..."); - fprintf ( stdout, "\n +collapse_tree.F.....Collapses trees. F is either a file or a list"); - fprintf ( stdout, "\n ..................... ..."); - fprintf ( stdout, "\n +aln2tree............Computes a tree"); - fprintf ( stdout, "\n ..ktupN|aln|sarmat ktupN: match size N to estimate distances"); - fprintf ( stdout, "\n .....................aln: Measures distances on aln"); - fprintf ( stdout, "\n .....................sarmat: expects in to be a SAR matrix of O and I"); - fprintf ( stdout, "\n ..nj | cw............Runs Neighbor Joining OR Cw to compute Tree"); - fprintf ( stdout, "\n ..dpa................Turns the tree into a daptree (+tree2dpatree)"); - fprintf ( stdout, "\n +node_sort.....Sort leafs of tree n1, by node distance"); - - - fprintf ( stdout, "\nMatrix Analysis___________________________________________________"); - fprintf ( stdout, "\n +aln2mat_diaa........computes a dinucleotide matrix on a list of aln"); - fprintf ( stdout, "\n +aln2mat.............computes a log odd matrix"); - - fprintf ( stdout, "\n +seq2lat_mat.........computes a transition matrix on seq provided via -in"); - - fprintf ( stdout, "\nStructure Analysis___________________________________________________"); - fprintf ( stdout, "\n +struc2contacts.A.B D.Displays in capitals all the residues of A"); - fprintf ( stdout, "\n ......................Less than D Angs from a residue of B"); - fprintf ( stdout, "\n ......................A and B are pdb file, D is a distance in Angs"); - fprintf ( stdout, "\n +seq2contacts.A.D.....Identifies all the residues in contact with ligands"); - fprintf ( stdout, "\n ......................Ligands are in the FASTA header of struc in"); - fprintf ( stdout, "\n ......................>Name _S_ [Target Struc] [Ligand1] [Chain] ..."); - fprintf ( stdout, "\n ......................Output: number_fasta: 0=no contact, 1=ligand 1..."); - fprintf ( stdout, "\n ......................9: residues in contact with more than 1 ligand"); - fprintf ( stdout, "\n ......................Use -output=color_html/ascii to display result"); - fprintf ( stdout, "\n +struc2nb...D.........Display a list of all the residues D appart"); - fprintf ( stdout, "\n +rm_template...V......Removes _[S|G|R]_[template] to sequence names"); - fprintf ( stdout, "\n ......................V: omitted | sequences <=> Output sequences"); - fprintf ( stdout, "\n ......................V: template <=> Output templates"); - - fprintf ( stdout, "\n +add_template.F.......Add _[S|G|R]_[template] to sequence names"); - fprintf ( stdout, "\n ......................F can either be a fasta file or an executable"); - fprintf ( stdout, "\n ......................F: File: >name _S_ template"); - fprintf ( stdout, "\n ......................F: executable: pg -infile= -outfile="); - fprintf ( stdout, "\nMatrix Comparison___________________________________________________"); - fprintf ( stdout, "\n +mat2cmp...............Returns the correlation coefficient between two matrices"); - fprintf ( stdout, "\n .......................-in mat1 -input matrix, -in2 mat2 -input2 matrix"); - fprintf ( stdout, "\n*********** INPUT FORMATS: Alignments *****************"); - fprintf ( stdout, "\n AUTOMATIC RECOGNITION"); - fprintf ( stdout, "\n perl_xxx:............. runs xxx onto the input file"); - fprintf ( stdout, "\n xxxx > outfile..xxx reads any formats, outputs fasta"); - fprintf ( stdout, "\n amps_aln saga_aln "); - fprintf ( stdout, "\n clustal_aln fasta_aln msf_aln "); - fprintf ( stdout, "\n dali_aln gotoh_aln pima_aln"); - fprintf ( stdout, "\n dialign_aln matrix conc_aln"); - fprintf ( stdout, "\n NON AUTOMATIC RECOGNITION (use the -input file to specify the format"); - fprintf ( stdout, "\n number_aln newick_tree"); - fprintf ( stdout, "\n"); - fprintf ( stdout, "\n*********** INPUT FORMATS: Sequences *****************"); - fprintf ( stdout, "\n fasta_seq dali_seq pir_seq"); - fprintf ( stdout, "\n barton_list_tc amps_sd_scores EST_fasta"); - fprintf ( stdout, "\n gor_seq gor_struc number_fasta[*]"); - fprintf ( stdout, "\n swissprot tc_lib pdb_struc"); - fprintf ( stdout, "\n"); - fprintf ( stdout, "\n*********** INPUT FORMATS: Structures *****************"); - fprintf ( stdout, "\n rna_number"); - fprintf ( stdout, "\n alifold"); - fprintf ( stdout, "\n*********** OUTPUT FORMATS: Alignments ******************"); - fprintf ( stdout, "\n compressed_aln saga_aln clustal_aln"); - fprintf ( stdout, "\n phylip_aln msf_aln fasta_aln "); - fprintf ( stdout, "\n pir_aln "); - fprintf ( stdout, "\n color_html,color_ps......colored using the struc_in file "); - fprintf ( stdout, "\n color_protogene..........colors codons"); - fprintf ( stdout, "\n color_exoset.............mixes conservation (gray) and introns (RGB)"); - fprintf ( stdout, "\n color_pdf pw_lib_saga_aln tdna_aln"); - fprintf ( stdout, "\n thread_dna_on_prot_aln"); - fprintf ( stdout, "\n"); - fprintf ( stdout, "\n*********** OUTPUT FORMATS: sequence ******************"); - fprintf ( stdout, "\n fasta_seq fasta_seq1 gotoh_seq"); - fprintf ( stdout, "\n gor_seq cache_id"); - fprintf ( stdout, "\n"); - fprintf ( stdout, "\n*********** OUTPUT FORMATS: weights ******************"); - fprintf ( stdout, "\n constraints saga_pw_sd_weights nseq\n"); - fprintf ( stdout, "\n"); - fprintf ( stdout, "\n*********** OUTPUT Formats: special ****************"); - fprintf ( stdout, "\n len name statistics<_hnrglNL>"); - fprintf ( stdout, "\n sim............outputs a similarity matrix based on an id comparison of -in"); - fprintf ( stdout, "\n sim_sarmat.....in is sar matrix"); - fprintf ( stdout, "\n sim_idscore....makes dp alignment of the sequences using Blosum62mt"); - fprintf ( stdout, "\n sim_idscoreDNA.makes dp alignment of the sequences using idmat"); - fprintf ( stdout, "\n sim............if -in2 is set: in1 vs in2, idscore"); - - fprintf ( stdout, "\n code_name......Outputs a compact list of names for code/decode"); - - - - fprintf ( stdout, "\n"); - - - fprintf ( stdout, "\n"); - return EXIT_SUCCESS; - } - - argv=standard_initialisation (argv, &argc); - - - for ( a=1; a< argc; a++) - { - if (a==1 && argv[1][0]!='-') - { - sprintf( in_file, "%s", argv[a]); - } - else if ( strcmp ( argv[a], "-in_f")==0 ||strm(argv[a],"-input") ) - { - if ( strcmp ( argv[a], "-in_f")==0) fprintf ( stdout,"\nWARNING: %s deprecated, use -input instead", argv[a]); - - sprintf ( in_format, "%s", argv[a+1]); - a++; - } - - else if ( strcmp ( argv[a], "-cache")==0 ) - { - sprintf (cache, "%s", argv[a+1]); - - a++; - } - - - else if ( strcmp ( argv[a], "-exon_boundaries")==0 ) - { - - set_string_variable ("exon_boundaries", argv[a+1]); - a++; - } - else if ( strcmp ( argv[a], "-overaln_threshold")==0 ) - { - - set_int_variable ("overaln_threshold", atoi(argv[a+1])); - a++; - } - else if ( strcmp ( argv[a], "-overaln_target")==0 ) - { - - set_int_variable ("overaln_target", atoi(argv[a+1])); - a++; - } - else if ( strcmp ( argv[a], "-overaln_P1")==0 ) - { - - set_int_variable ("overaln_P1", atoi(argv[a+1])); - a++; - } - else if ( strcmp ( argv[a], "-overaln_P2")==0 ) - { - - set_int_variable ("overaln_P2", atoi(argv[a+1])); - a++; - } - else if ( strcmp ( argv[a], "-overaln_P3")==0 ) - { - - set_int_variable ("overaln_P3", atoi(argv[a+1])); - a++; - } - else if ( strcmp ( argv[a], "-overaln_P4")==0 ) - { - - set_int_variable ("overaln_P4", atoi(argv[a+1])); - a++; - } - - else if ( strcmp ( argv[a], "-in2_f")==0||strm(argv[a],"-input2") ) - { - if ( strcmp ( argv[a], "-in_f")==0) fprintf ( stdout,"\nWARNING: %s deprecated, use -input2 instead", argv[a]); - - sprintf ( in2_format, "%s", argv[a+1]); - a++; - } - else if ( strcmp ( argv[a], "-seqnos")==0) - { - sprintf (action_list[n_actions++], "seqnos"); - } - - else if ( strcmp( argv[a], "-action")==0) - { - while ((a+1)keep_case=1; - else RAD->keep_case=(strm3(argv[a], "on","ON","On"))?1:0; - - } - - else if ( strcmp ( argv[a], "-conv")==0) - { - if ( strncmp ( argv[a+1],"set",3)==0)RAD->symbol_list=make_symbols (argv[++a],&(RAD->n_symbol)); - else - { - RAD->symbol_list=declare_char (STRING, STRING); - while(!NEXT_ARG_IS_FLAG) - { - sprintf ( RAD->symbol_list[RAD->n_symbol], "%s", argv[++a]); - RAD->n_symbol++; - } - } - } - else if ( strcmp ( argv[a], "-struc_in_f")==0 ||strcmp ( argv[a], "-input3")==0 ) - { - sprintf ( struc_in_format, "%s", argv[a+1]); - a++; - } - else if ( strcmp ( argv[a], "-out_f")==0 ||strm(argv[a],"-output") ) - { - if ( strcmp ( argv[a], "-out_f")==0) fprintf (stdout, "\nWARNING: %s deprecated, use -output instead", argv[a]); - sprintf ( out_format, "%s", argv[a+1]); - a++; - } - else if ( strm ( argv[a], "-struc_out_f") || strm ( argv[a], "-output_struc") ) - { - sprintf ( struc_out_format, "%s", argv[a+1]); - a++; - } - else if ( strcmp (argv[a],"-in")==0) - { - sprintf( in_file, "%s", argv[a+1]); - a++; - } - else if ( strcmp (argv[a],"-rename")==0) - { - sprintf( rename_file, "%s", argv[a+1]); - a++; - } - else if ( strcmp (argv[a],"-code")==0) - { - code=CODE; - sprintf( rename_file, "%s", argv[a+1]); - a++; - } - else if ( strcmp (argv[a],"-decode")==0) - { - code=DECODE; - sprintf( rename_file, "%s", argv[a+1]); - a++; - } - else if ( strcmp (argv[a],"-in2")==0) - { - sprintf( in2_file, "%s", argv[a+1]); - a++; - } - else if ( strcmp (argv[a],"-coor")==0) - { - sprintf( RAD->coor_file, "%s", argv[a+1]); - a++; - } - else if (strcmp (argv[a],"-out")==0) - { - sprintf (out_file, "%s", argv[a+1]); - a++; - } - else if (strcmp (argv[a],"-out2")==0) - { - sprintf (out2_file, "%s", argv[a+1]); - a++; - } - else if ( strcmp (argv[a],"-struc_in")==0 || strcmp (argv[a],"-in3")==0 ) - { - sprintf( struc_in_file, "%s", argv[a+1]); - a++; - } - else if (strcmp (argv[a],"-struc_out")==0) - { - sprintf (struc_out_file, "%s", argv[a+1]); - a++; - } - else if ( strcmp ( argv[a], "-rm_gap")==0) - { - RAD->rm_gap=1; - } - else if ( strcmp ( argv[a], "-print_format")==0) - { - print_format=1; - } - else if ( strcmp ( argv[a], "-no_warning")==0) - { - set_warning_mode (NO); - } - - else - { - fprintf ( stdout, "\nUNKNOWN OPTION: %s", argv[a]); - myexit(EXIT_FAILURE); - } - } -/****************************************************************/ -/* */ -/* Data Preparation */ -/* */ -/* */ -/****************************************************************/ - - prepare_cache (cache); -/****************************************************************/ -/* */ -/* INPUT SEQ/ALN */ -/* */ -/* */ -/****************************************************************/ - - - if ( strm (out_format, "hasch")) - { - fprintf ( stdout, "%d\n", (int)hash_file(in_file)); - return EXIT_SUCCESS; - } - - if ( rename_file[0]) - { - rename_list=read_rename_file ( rename_file,code); - } - - - if ((D1=read_data_structure (in_format, in_file,RAD))!=NULL) - { - in_format=(in_format && in_format[0])?in_format:identify_seq_format(in_file); - - if (print_format)fprintf ( stdout, "\nFILE:%s FORMAT:%s\n", in_file, in_format); - } - else if ( in_file[0]) - { - fprintf ( stdout, "\nFORMAT of file %s Not Supported[FATAL:%s]\n", in_file, PROGRAM); - myexit(EXIT_FAILURE); - } - - if ((D2=read_data_structure (in2_format, in2_file,RAD))!=NULL){if (print_format)fprintf ( stderr, "\nFILE:%s FORMAT:%s\n", in2_file, (in2_format&&in2_format[0])?in2_format:identify_seq_format(in2_file));} - - else if (!D2 && in2_file[0]) - { - fprintf ( stderr, "\nFORMAT of file %s Not Supported [FATAL:%s]\n", in2_file, PROGRAM); - myexit(EXIT_FAILURE); - } - -/*STRUCTURE INPUT*/ - - - if ((D_ST=read_data_structure (struc_in_format, struc_in_file,RAD))) - { - - if ( D_ST->CL) - { - Constraint_list *CL; - int *entry; - - CL=D_ST->CL; - - entry=vcalloc ( LIST_N_FIELDS, sizeof (int)); - - for (a=0; ane; a++) - { - entry=extract_entry (entry, a, CL); - if ( D_ST->S)(D_ST->S)->seq[entry[SEQ1]][entry[R1]-1]=entry[WE]; - } - thread_seq_struc2aln (D_ST->A, D_ST->S); - } - else if ( name_is_in_list ("cons", ((D_ST)->A)->name, ((D_ST)->A)->nseq, 100)); - else - { - D_ST->A=copy_aln ( D1->A, D_ST->A); - - thread_seq_struc2aln (D_ST->A, D_ST->S); - } - } - else if ((strcmp (struc_in_format, "rna_number")==0) && in_file[0]) - { - D_ST->RNA_ST=read_rna_struc_number((D1->A),struc_in_file); - } - else if ( struc_in_format[0] && struc_in_file[0]) - { - - fprintf ( stderr, "\nSTRUC %s UNKNOWN[FATAL]", struc_in_format); - myexit(EXIT_FAILURE); - } - else - { - D_ST=vcalloc ( 1, sizeof (Sequence_data_struc)); - } - - action=declare_char(100, 100); - for ( a=0; a< n_actions;) - { - if (action_list[a][0]!='+') - { - fprintf ( stderr, "\nWARNING: Action %s Unknown. Actions start with a +", action_list[a]); - myexit (EXIT_FAILURE); - } - else - { - b=0; - sprintf ( action[b++], "%s", action_list[a++]+1); - while ( aA= rename_seq_in_aln(D1->A, rename_list); - if (D2)D2->A=rename_seq_in_aln (D2->A, rename_list); - if (D_ST)D_ST->A=rename_seq_in_aln (D_ST->A,rename_list); - - if (D1)D1->T =rename_seq_in_tree (D1->T, rename_list); - if (D2)D2->T =rename_seq_in_tree (D2->T, rename_list); - if (D_ST)D_ST->T=rename_seq_in_tree (D_ST->T,rename_list); - } - - - if ( !out_format[0] && ! struc_out_format[0])sprintf ( out_format, "%s", (in_format && in_format[0])?in_format:"fasta_aln"); - main_output ( D1, D2, D_ST, out_format, out_file); - main_output ( D1, D2, D_ST, struc_out_format, struc_out_file); - return EXIT_SUCCESS; - } - - - - -/**************************************************************************************************/ -/***************************** FORMAT GUESSING ******************************************/ -/**************************************************************************************************/ -Sequence_data_struc *read_data_structure ( char *in_format, char *in_file, Action_data_struc *RAD) - - { - Sequence_data_struc *D; - char **seq_name=NULL, **sequences=NULL; - int nseq=0, a; - - - D=vcalloc ( 1, sizeof (Sequence_data_struc)); - - - if (!in_file[0])return NULL; - if (!in_format[0]) - { - in_format=identify_seq_format(in_file); - } - if (!in_format[0])return NULL; - - - - D->A=declare_Alignment(NULL); - if ( RAD->keep_case)(D->A)->residue_case=KEEP_CASE; - - D->rm_gap=RAD->rm_gap; - sprintf ( D->format, "%s", in_format); - sprintf ( D->file, "%s", in_file); - - - - - if ( strm2(in_format,"saga_aln","clustal_aln")) - { - read_aln (in_file, D->A); - D->S=aln2seq(D->A); - - } - - else if ( strm (in_format, "treefile_list")) - { - int z; - D->S=get_tree_file_list(in_file); - D->A=seq2aln(D->S, D->A,NO_PAD); - } - else if ( strm (in_format, "file_list") || strm (in_format, "list")) - { - D->S=get_file_list(in_file); - D->A=seq2aln(D->S, D->A,KEEP_GAP); - } - else if ( strm (in_format, "fasta_tree")) - { - int z; - D->S=get_fasta_tree (in_file, NULL); - D->A=seq2aln(D->S, D->A,NO_PAD); - - } - else if ( strm (in_format, "tree_list") || strm (in_format, "treelist")) - { - char **line; - FILE *seq,*dnd; - int n=0; - char *seq_file; - FILE *fp; - Sequence *T; - seq_file=vtmpnam(NULL); - seq=vfopen (seq_file, "w"); - line=file2lines (in_file); - fp=vfopen (seq_file, "w"); - for ( n=1; nTree_%d\n%s\n", n,line[n]); - } - vfclose (fp); - - free_char (line, -1); - return read_data_structure ( "fasta_tree",seq_file,RAD); - } - - else if (strm (in_format, "matrix")) - { - D->M=read_matrice (in_file); - } - else if (strm4 (in_format, "newick_tree", "newick", "nh", "new_hampshire")) - { - D->T=main_read_tree (in_file); - D->S=tree2seq(D->T, NULL); - D->A=seq2aln (D->S,D->A, 0); - } - else if (strm (in_format, "blast_aln")) - { - if (read_blast_aln (in_file, D->A)) - { - D->S=aln2seq(D->A); - } - else - { - return NULL; - } - } - else if ( strm( in_format,"number_aln")) - { - read_number_aln (in_file, D->A); - D->S=aln2seq(D->A); - } - else if ( strm( in_format,"stockholm_aln")) - { - read_stockholm_aln (in_file, D->A); - D->S=aln2seq(D->A); - } - else if ( strm( in_format,"gotoh_aln")) - { - read_gotoh_aln (in_file, D->A); - D->S=aln2seq(D->A); - } - - else if ( strm ( in_format, "msf_aln")) - { - read_msf_aln (in_file, D->A); - D->S=aln2seq(D->A); - } - else if ( strm ( in_format, "amps_aln")) - { - read_amps_aln (in_file, D->A); - D->S=aln2seq(D->A); - } - else if ( strm (in_format, "excel_seq")) - { - D->S=perl_reformat2fasta ("excel2fasta.pl",in_file); - (D->S)->contains_gap=0; - D->A=seq2aln(D->S, D->A,RAD->rm_gap); - } - else if ( strm (in_format, "pavie_seq")) - { - D->S=perl_reformat2fasta ("pavie2fasta.pl",in_file); - (D->S)->contains_gap=0; - D->A=seq2aln(D->S, D->A,RAD->rm_gap); - } - else if ( strncmp (in_format, "perl_",5 )==0) - { - D->S=perl_reformat2fasta (in_format+5,in_file); - (D->S)->contains_gap=0; - D->A=seq2aln(D->S, D->A,RAD->rm_gap); - } - else if ( strm (in_format, "number_fasta")) - { - D->S=get_fasta_sequence_num (in_file, NULL); - (D->S)->contains_gap=0; - D->A=seq2aln(D->S, D->A,RAD->rm_gap); - } - else if ( strm (in_format, "raw_fasta")) - { - D->S=get_fasta_sequence_raw (in_file, NULL); - (D->S)->contains_gap=0; - D->A=seq2aln(D->S, D->A,RAD->rm_gap); - } - - else if ( strm2 (in_format, "fasta_aln", "fasta_seq")) - { - - D->S=get_fasta_sequence (in_file, NULL); - if ( strcmp (in_format, "fasta_aln")==0)(D->S)->contains_gap=0; - D->A=seq2aln(D->S, D->A,RAD->rm_gap); - } - else if ( strm (in_format, "fasta_tree")) - { - - D->S=get_fasta_tree (in_file, NULL); - D->A=seq2aln(D->S, D->A, NO_PAD); - } - - else if ( strm (in_format, "pdb") || strm (in_format, "pdb_struc")) - { - D->S=get_pdb_sequence (in_file); - if ( D->S==NULL) - { - add_warning (stderr, "FAILED TO find PDB File %s", in_file); - myexit (EXIT_FAILURE); - } - D->A=seq2aln(D->S, D->A,RAD->rm_gap); - } - else if ( strm2(in_format, "pir_seq", "pir_aln")) - { - D->S=get_pir_sequence ( in_file,NULL ); - seq2aln(D->S, D->A, RAD->rm_gap); - } - else if ( strm(in_format, "gor_seq") ) - { - D->S=get_gor_sequence ( in_file,NULL ); - seq2aln(D->S, D->A, RAD->rm_gap); - } - else if ( strm2 ( in_format, "dali_aln", "dali_seq")) - { - D->S=get_sequence_dali ( in_file); - seq2aln(D->S, D->A, RAD->rm_gap); - } - else if ( strm (in_format, "barton_list_tc")) - { - get_barton_list_tc_seq ( in_file); - } - else if ( strm (in_format, "amps_sd_scores")) - { - D->W=get_amps_sd_scores ( in_file); - } - - else if ( strm ( in_format, "pima_aln")) - { - D->S=get_pima_sequence ( in_file); - seq2aln (D->S, D->A, RAD->rm_gap); - } - else if ( strm( in_format, "gor_struc")) - { - D->S=get_struc_gor ( in_file); - seq2aln(D->S, D->A, RAD->rm_gap); - } - else if ( strm( in_format, "dialign_aln")) - { - D->S=get_dialign_sequence ( in_file); - seq2aln (D->S, D->A, RAD->rm_gap); - } - else if ( strm( in_format, "tc_lib") || strm( in_format, "mocca_lib") || strm( in_format, "lib")) - { - read_seq_in_list (in_file,&nseq,&sequences,&seq_name); - D->S=fill_sequence_struc ( nseq, sequences, seq_name); - D->CL=declare_constraint_list ( D->S,NULL, NULL, 0,NULL, NULL); - D->CL=read_constraint_list_file(D->CL,in_file); - seq2aln (D->S, D->A, RAD->rm_gap); - free_char (sequences,-1); - free_char (seq_name, -1); - } - else if ( strm( in_format,"swissprot_seq")) - { - D->S=get_swissprot_sequence ( in_file,NULL); - seq2aln (D->S, D->A, RAD->rm_gap); - } - else if (strm (in_format, "alifold")) - { - D->S=read_alifold ( in_file); - seq2aln (D->S, D->A,0); - } - else - { - return NULL; - } - - if ( D->A) - { - for ( a=0; a<(D->A)->nseq; a++)sprintf ( (D->A)->file[a], "%s", in_file); - } - if ( D->S) - { - for ( a=0; a<(D->A)->nseq; a++)sprintf ( (D->S)->file[a], "%s", in_file); - } - - return D; - } -Sequence *read_sequences (char *name) -{ - return main_read_seq (name); -} -Alignment * alifold2aln (char *file) -{ - Sequence *S; - S=read_alifold(file); - sprintf ( S->seq[0],"%s", S->seq[1]); - return seq2aln (S, NULL, 0); -} -Sequence * read_alifold (char *file) -{ - Sequence *S; - char **list; - int l; - S=declare_sequence (1,count_n_char_in_file (file),2); - list=file2lines (file); - - S->seq[0]=list[1]; - S->seq[1]=list[2]; - substitute (S->seq[0], "\n", "\0"); - substitute (S->seq[0], " ", "\0"); - substitute (S->seq[0], "_", STOCKHOLM_STRING); - l=strlen (S->seq[0]); - substitute (S->seq[1], "\n", "\0"); - substitute (S->seq[1], " ", "\0"); - substitute (S->seq[1], ".", STOCKHOLM_STRING); - S->seq[1][l]='\0'; - sprintf (S->name[0], "cons", file); - sprintf (S->name[1], "#=GC SS_cons", file); - return S; -} - - - - - - -Sequence * main_read_seq ( char *name) - { - char *format=NULL; - Sequence *S=NULL; - Alignment *A=NULL; - int a; - - - format=identify_seq_format (name); - - - if ( getenv4debug ("DEBUG_REFORMAT"))fprintf ( stderr, "\n\nFormat %s\n", format); - - - if (format &&strm(format, "fasta_seq")) - { - S= get_fasta_sequence ( name, NULL); - } - else if (format &&strm(format, "pir_seq")) S= get_pir_sequence ( name, NULL); - else if (format &&strm(format,"swissprot_seq"))S= get_swissprot_sequence (name, NULL); - else if (format && strstr (format, "aln")) - { - A=main_read_aln ( name, NULL); - S=aln2seq(A); - ungap_seq(S); - free_aln(A); - } - else if ( format && strstr (format, "tc_lib")) - { - int nseq,b; - char **sequences=NULL, **seq_name=NULL; - - read_seq_in_list (name,&nseq,&sequences,&seq_name); - S=fill_sequence_struc ( nseq, sequences, seq_name); - for ( b=0; b< S->nseq; b++)sprintf ( S->file[b], "%s",name); - free_char (seq_name, -1);free_char (sequences, -1); - } - else - { - /*Use The ClustalW routine*/ - S=cw_read_sequences (name); - } - - for ( a=0; anseq; a++)sprintf ( S->file[a], "%s", name); - vfree(format); - ungap_seq(S); - S=clean_sequence ( S); - return S; - } - -Alignment * main_read_aln ( char *name, Alignment *A) - { - int a; - - static char *format; - Sequence *S=NULL; - Sequence *IN_SEQ; - - - if ( !name)return NULL; - else if (!check_file_exists(name)) - { - if ( !check_file_exists (name+1))return NULL; - else if ( name[0]=='A') name++; - else if ( name[0]=='S') name++;/*Line Added for the -convert flag of T-Coffee*/ - } - - - if (!A)A=declare_aln(NULL); - format=identify_seq_format (name); - - IN_SEQ=A->S; - - if ((format && strm(format, "saga_aln" )) ||strm(format, "clustal_aln")||strm(format, "t_coffee_aln" ) ) - { - - read_aln ( name, A); - - } - else if (format && strm (format, "conc_aln"))A=input_conc_aln (name,NULL); - else if (format &&strm(format, "msf_aln" ))read_msf_aln ( name, A); - else if (format &&strm(format, "blast_aln"))read_blast_aln (name, A); - else if (format &&(strm(format, "fasta_aln"))) - { - - - S=get_fasta_sequence ( name, NULL); - - S->contains_gap=0; - seq2aln (S, A, 0); - } - else if (format &&strm(format, "pir_aln")) - { - S=get_pir_sequence ( name, NULL); - S->contains_gap=0; - seq2aln (S, A, 0); - } - else if (format && strm(format, "fasta_seq") && A) - { - S=get_fasta_sequence ( name, NULL); - - for ( a=1; anseq; a++)if ( strlen (S->seq[a-1])!=strlen (S->seq[a])){free_sequence (S, S->nseq); free_aln (A); return NULL;} - S->contains_gap=0; - seq2aln (S, A, 0); - } - - else if (format && strm(format, "pir_seq") && A) - { - S=get_pir_sequence ( name, NULL); - - for ( a=1; anseq; a++)if ( strlen (S->seq[a-1])!=strlen (S->seq[a])){free_sequence (S, S->nseq); free_aln (A); return NULL;} - S->contains_gap=0; - seq2aln (S, A, 0); - } - else - { - free_aln(A); - return NULL; - } - - - if ( check_list_for_dup( A->name, A->nseq)) - { - fprintf ( stderr, "\nWARNING (main_read_aln): %s is duplicated in File %s ", check_list_for_dup( A->name, A->nseq), A->file[0]); - A=aln2unique_name_aln(A); - } - - if (IN_SEQ)A->S=IN_SEQ; - else if (!A->S){A->S=aln2seq(A);} - - A->S=ungap_seq(A->S); - A=fix_aln_seq(A, A->S); - compress_aln (A); - for ( a=0; a< A->nseq; a++) sprintf ( A->file[a], "%s", name); - - A=clean_aln (A); - return A; - } - - -char * identify_aln_format ( char *file) - { - /*This function identify known sequence and alignmnent formats*/ - return identify_seq_format (file); - } -char * identify_seq_format ( char *file) - { - char *format=NULL; - /*This function identify known sequence and alignmnent formats*/ - - if ( format==NULL)format=vcalloc ( 100, sizeof (char)); - else format[0]='\0'; - - - - if ( !check_file_exists(file)) - { - fprintf (stderr, "ERROR: %s Does Not Exist [FATAL:%s]\n",file, PROGRAM); - myexit (EXIT_FAILURE); - } - else if ( is_stockholm_aln (file))sprintf (format, "stockholm_aln"); - else if ( is_blast_file (file))sprintf ( format, "blast_aln"); - else if ( is_pdb_file(file))sprintf ( format, "pdb_struc"); - else if ( format_is_msf (file))sprintf ( format, "msf_aln"); - else if ( format_is_fasta_seq(file))sprintf ( format, "fasta_seq"); - else if ( format_is_fasta_aln(file))sprintf ( format, "fasta_aln"); - else if ( format_is_pir_aln (file))sprintf ( format, "pir_aln"); - else if ( format_is_pir_seq (file))sprintf ( format, "pir_seq"); - else if ( format_is_oligo (file))sprintf ( format, "oligo_aln"); - else if ( format_is_swissprot (file))sprintf ( format, "swissprot_seq"); - else if ( format_is_saga (file))sprintf ( format, "clustal_aln"); - else if ( format_is_conc_aln (file))sprintf ( format, "conc_aln"); - else if ( is_lib (file))sprintf ( format, "tc_lib"); - else if ( is_lib_02 (file))sprintf ( format, "tc_lib_02"); - else if ( is_newick(file))sprintf ( format, "newick_tree"); - - else - { - add_warning ( stderr, "\nThe Format of File: %s was not recognized [SERIOUS:%s]",file, PROGRAM); - } - return format; - } -char **identify_list_format ( char **list, int n) - { - int a; - char *name; - char *string; - char mode; - - - - declare_name (name); - for ( a=0; a< n; a++) - { - - sprintf (name, "%s", list[a]); - string=list[a]; - if ((mode=identify_format ( &string))!='?') - { - sprintf ( name, "%s", string); - sprintf ( list[a], "%c%s", mode,name); - } - else - { - fprintf ( stderr, "\nERROR: %s not recognised [FATAL:%s]", name, PROGRAM); - } - - } - - vfree(name); - return list; - } - -char * name2type_name ( char *name) -{ - /*turns into , ...*/ - char *new_name; - char mode; - - new_name=vcalloc ( strlen (name)+2, sizeof (char)); - sprintf ( new_name, "%s", name); - if (is_in_set (name[0], "ALSMXPRW") && !check_file_exists(name)) - { - sprintf ( new_name, "%s", name); - } - else - { - mode=identify_format (&new_name); - sprintf ( new_name, "%c%s", mode,name); - } - return new_name; -} - -char identify_format (char **fname) - { - char mode='?'; - mode=fname[0][0]; - - if ((is_in_set (mode, "ALMSPR") && check_file_exists(fname[0]+1)) ||(mode=='X' && is_matrix ( fname[0]+1)) ||(mode=='M' && is_method(fname[0]+1)) ) - { - - fname[0]++; - } - else if (mode=='W' && !check_file_exists(fname[0])){fname[0]++;} - else - { - - /*WARNING: Order matters => internal methods can be confused with files, must be checked last*/ - if (is_lib(fname[0]))mode='L'; - else if (is_pdb_file(fname[0]))mode='P'; - else if (is_seq(fname[0]))mode='S'; - else if (is_aln(fname[0]))mode='A'; - else if (is_matrix(fname[0]))mode='X'; - else if (is_method(fname[0]))mode='M'; - else mode='?'; - } - return mode; - } - - - -int is_pdb_name ( char *name) - { - char command[1000]; - int result; - char *result_file; - static char **buf_names; - static int *buf_result; - static int nbuf; - FILE *fp; - - - /*Use the look up*/ - if ( !buf_names) - { - buf_names=declare_char (1000, 100); - buf_result=vcalloc (1000, sizeof (int)); - } - if ( (result=name_is_in_list ( name, buf_names,nbuf,100))!=-1)return buf_result[result]; - - - - result_file=vtmpnam (NULL); - - sprintf ( command, "extract_from_pdb -is_pdb_name \'%s\' > %s", name, result_file); - if ( getenv4debug ("DEBUG_EXTRACT_FROM_PDB"))fprintf ( stderr, "\n[DEBUG_EXTRACT_FROM_PDB:is_pdb_name] %s\n", command); - my_system ( command); - - fp=vfopen ( result_file, "r"); - fscanf ( fp, "%d", &result); - vfclose (fp); - vremove ( result_file); - - sprintf ( buf_names[nbuf], "%s", name); - result=buf_result[nbuf++]=(result==1)?1:0; - - return result; - - } - -char* get_pdb_id ( char *file) -{ - /*receives the name of a pdb file*/ - /*reads the structure id in the header*/ - /*returns the pdb_id*/ - char *tmp_name; - char command[10000]; - char cached [1000]; - char fname[1000]; - FILE *fp; - char *id; - char buf[1000]; - - - tmp_name=vtmpnam(NULL); - - sprintf ( cached, "%s/%s", get_cache_dir(),file); - if ( check_file_exists(cached))sprintf ( fname, "%s", cached); - else sprintf ( fname, "%s", file); - - sprintf ( command, "extract_from_pdb -get_pdb_id %s > %s",fname, tmp_name); - - if ( getenv4debug ("DEBUG_EXTRACT_FROM_PDB"))fprintf ( stderr, "\n[DEBUG_EXTRACT_FROM_PDB:get_pdb_id] %s\n", command); - my_system ( command); - - buf[0]='\0'; - fp=vfopen (tmp_name, "r"); - fscanf ( fp, "\n%s\n", buf); - vfclose (fp); - - if ( getenv4debug ("DEBUG_EXTRACT_FROM_PDB"))fprintf ( stderr, "\n[DEBUG_EXTRACT_FROM_PDB:get_pdb_id]DONE\n"); - - id=vcalloc ( strlen (buf)+1, sizeof (char)); - sprintf ( id, "%s", buf); - - - - return id; -} - - -char* get_pdb_struc(char *in_name, int start, int end) - { - char *name1,*name2; - char command[LONG_STRING]; - char *name; - - - - - name=vcalloc ( STRING, sizeof (char)); - sprintf ( name, "%s", in_name); - - if ( (name1=is_pdb_struc(name))==NULL && (name[0]=='P' && ((name1=is_pdb_struc (name+1))==NULL))) - { - fprintf ( stderr, "\nERROR Could not download structure %s [FATAL:%s]\n", name, PROGRAM);crash(""); - } - else if ( (start==0) && (end==0))return name1; - else - { - declare_name(name2); - sprintf ( name2, "%s_%d_%d.pdb", name, start, end); - sprintf ( command, "extract_from_pdb -infile \'%s\' -chain FIRST -coor %d %d > %s%s",check_file_exists(name1),start, end, get_cache_dir(),name2); - if ( getenv4debug ("DEBUG_EXTRACT_FROM_PDB"))fprintf ( stderr, "\n[DEBUG_EXTRACT_FROM_PDB:get_pdb_struc] %s\n", command); - my_system (command); - - if ( is_pdb_file(name2))return name2; - else - { - fprintf ( stderr, "\nERROR Could not extract segment [%d %d] from structure %s [FATAL:%s]\n",start, end, name, PROGRAM);crash(""); - } - exit (EXIT_FAILURE); - } - - return NULL; - } - -char* seq_is_pdb_struc ( Sequence *S, int i) -{ - - if (!S){return NULL;} - else if ( !S->T[i]){return NULL;} - else if ( !((S->T[i])->P)){return NULL;} - else return ((S->T[i])->P)->template_file; -} -char* is_pdb_struc ( char *name) - { - /*Receives a name - checks if this is the name of a local file that contains PDB data - checks if this is the name of a file from a local db - put the file in the cache - checks if this is a file from a remote db (extract_from_pdb - return NULL if everything fails - */ - - static char *file_name1; - static char *file_name2; - static char **buf_names; - static char **buf_result; - static int nbuf, s; - - - char *r=NULL; - char command[1000]; - - - if ( !name || name[0]=='\0')return NULL; - - - /*Use the look up*/ - if ( !buf_names) - { - - buf_names=vcalloc ( 1000, sizeof (char*)); - buf_result=vcalloc ( 1000, sizeof (char*)); - file_name1=vcalloc ( 1000, sizeof (char)); - file_name2=vcalloc ( 1000, sizeof (char)); - } - if ( (s=name_is_in_list ( name, buf_names,nbuf,-1))!=-1)return buf_result[s]; - - - r=NULL; - sprintf ( file_name1, "%s", name); - sprintf ( file_name2, "%s.pdb", name); - - - if (is_pdb_file(file_name1)){r=file_name1;} - else if (is_pdb_file(file_name2)){r=file_name2;} - else if (is_pdb_name (name)) - { - char *tmpname; - tmpname=vtmpnam (NULL); - - - sprintf ( file_name2, "%s.pdb", name); - /*sprintf ( command, "extract_from_pdb -netfile \'%s\' > %s%s 2>/dev/null",name, get_cache_dir(), file_name2);*/ - sprintf ( command, "extract_from_pdb -netfile \'%s\' > %s 2>/dev/null",name,tmpname); - if ( getenv4debug ("DEBUG_EXTRACT_FROM_PDB"))fprintf ( stderr, "\n[DEBUG_EXTRACT_FROM_PDB:is_pdb_struc] %s\n", command); - my_system (command); - - sprintf ( command, "cp %s %s%s", tmpname, get_cache_dir(), file_name2); - my_system (command); - - if ( is_pdb_file(file_name2))r=file_name2; - else r=NULL; - - } - - /*Fill the buffer*/ - buf_names[nbuf]=vcalloc ( strlen (name)+1, sizeof (char)); - sprintf ( buf_names[nbuf], "%s", name); - if ( r) - { - buf_result[nbuf]=vcalloc ( strlen (r)+1, sizeof (char)); - sprintf (buf_result[nbuf], "%s", r); - } - else buf_result[nbuf]=NULL; - nbuf++; - - return r; - } - -char *fix_pdb_file ( char *in) -{ - char *empty; - - empty=vcalloc(1, sizeof(char)); - - if ( !in || !check_file_exists (in))return empty; - else if ( is_pdb_file(in))return in; - else - { - char command[10000]; - char *tmp; - char *tmp2; - tmp=vtmpnam (NULL); - tmp2=vcalloc (strlen (tmp)+1, sizeof (char)); - sprintf (tmp2, "%s", tmp); - sprintf ( command, "extract_from_pdb %s > %s", check_file_exists(in), tmp2); - my_system (command); - if ( is_pdb_file (tmp))return tmp2; - else return empty; - - } -} - -int is_sap_file ( char *name) - { - FILE *fp; - if (!name); - if (!check_file_exists(name))return 0; - - if ((fp=find_token_in_file (name, NULL, "Percent"))!=NULL) - { - if ((fp=find_token_in_file (name,fp, "Percent"))!=NULL) - { - vfclose (fp); - return 1; - } - else - { - return 0; - } - } - else - { - return 0; - } - } - - -int is_blast_file ( char *name) - { - if ( !check_file_exists(name) ) return 0; - else if (token_is_in_file (name, "")) - { - return BLAST_XML; - } - else - { - if (token_is_in_file (name, "Lambda")) - { - return BLAST_TXT; - } - else - { - return 0; - } - } - return 0; - } -int is_simple_pdb_file ( char *name) -{ - FILE *fp; - if ((fp=find_token_in_file (name, NULL, "SIMPLE_PDB_FORMAT"))!=NULL){vfclose (fp);return 1;} - return 0; -} - - -int is_pdb_file ( char *name) - { - FILE *fp; - int ispdb=0; - - if ( name==NULL) return 0; - if (!check_file_exists (name))return 0; - - - - if ((fp=find_token_in_file (name, NULL, "\nHEADER"))!=NULL) - {vfclose (fp); - ispdb++; - } - if ((fp=find_token_in_file (name, NULL, "\nSEQRES"))!=NULL) - {vfclose (fp); - - ispdb++; - } - - if ((fp=find_token_in_file (name, NULL, "\nATOM"))!=NULL) - { - vfclose (fp); - ispdb++; - - } - else - { - ispdb=0; - } - - - - if ( ispdb>=2)return 1; - else return 0; - } -int is_seq ( char *name) - { - char *format; - - if ( !check_file_exists(name))return 0; - - format= identify_seq_format(name); - if(!format || format[0]=='\0'){vfree (format);return 0;} - else if (strstr(format, "seq")){vfree (format);return 1;} - else return 0; - } -int is_aln ( char *name) - { - char *format; - if ( !check_file_exists (name))return 0; - - format= identify_seq_format(name); - if ( !format || format[0]=='\0'){vfree (format);return 0;} - else if (strstr(format, "aln")){vfree (format); return 1;} - else return 0; - } - -int is_matrix (char *name) - { - int **m; - - if ((m=read_matrice (name))!=NULL){free_int (m, -1); return 1;} - return 0; - } -int is_newick (char *name) - { - int c; - FILE *fp; - - - fp=vfopen (name, "r"); - if ( (c=fgetc(fp))!='('){vfclose (fp); return 0;} - - - while ( (c=fgetc(fp))!=EOF) - { - if ( c==';'){vfclose (fp); return 1;} - } - vfclose (fp); - return 0; - } - -int is_clustalw_matrix ( char *name) -{ - - FILE *fp; - - - if ( (fp=find_token_in_file (name, NULL, "CLUSTALW_MATRIX"))!=NULL){vfclose(fp);return 1;} - else return 0; -} -int is_pavie_matrix ( char *name) -{ - - FILE *fp; - - - if ( (fp=find_token_in_file (name, NULL, "PAVIE_MATRIX"))!=NULL){vfclose(fp);return 1;} - else return 0; -} -int is_distance_matrix_file (char *name) -{ - FILE *fp; - if ( (fp=find_token_in_file (name, NULL, "TC_DISTANCE_MATRIX_FORMAT_01"))!=NULL){vfclose(fp);return 1;} - else return 0; -} -int is_similarity_matrix_file (char *name) -{ - FILE *fp; - if ( (fp=find_token_in_file (name, NULL, "TC_SIMILARITY_MATRIX_FORMAT_01"))!=NULL){vfclose(fp);return 1;} - else return 0; -} -int is_blast_matrix ( char *name) -{ - - FILE *fp; - - - if ( (fp=find_token_in_file (name, NULL, "BLAST_MATRIX"))!=NULL){vfclose(fp);return 1;} - else return 0; -} - -int is_single_seq_weight_file ( char *name) -{ - - - return token_is_in_file ( name, "SINGLE_SEQ_WEIGHT_FORMAT_01"); - -} -int is_stockholm_aln (char *file) -{ - FILE *fp; - - if ((fp=find_token_in_file_nlines (file, NULL, "STOCKHOLM",2))) - { - vfclose (fp); - return 1; - } - return 0; -} - -int is_lib ( char *name) -{ - return is_lib_01(name); -} - -int is_lib_02 ( char *name) -{ - - return token_is_in_file ( name, "TC_LIB_FORMAT_02"); - -} - -int is_lib_01 (char *name) - { - - - if ( token_is_in_file ( name, "TC_LIB_FORMAT_01")) return 1; - else if (token_is_in_file ( name, "T-COFFEE_LIB_FORMAT_01"))return 1; - else if (token_is_in_file (name, "SEQ_1_TO_N"))return 1; - else return 0; - } -int is_lib_list ( char *name) -{ - if ( !check_file_exists (name))return 0; - if ( token_is_in_file ( name, "TC_LIB_LIST_FORMAT_01")) return 1; - return 0; -} -int is_method ( char *file) - { - char new_file[200]; - - - sprintf ( new_file, "%s", file); - if ( (token_is_in_file(new_file, "TC_METHOD_FORMAT_01"))){return 1;} - if ( is_in_pre_set_method_list(new_file)) - { - - vremove ( new_file); - return 1; - } - else - { - - return 0; - } - } - -/*******************************************************************************************/ -/* */ -/* */ -/* SEQUENCE FORMAT IDENTIFIERS */ -/* */ -/***************************************************************************************** */ -int type_is_exon_boundaries(char **seq, int n) -{ - int a, l, b; - for (a=0; aseq[0]){free_sequence (S, S->nseq); return 1;} - l=strlen ( S->seq[0]); - for ( a=0; a< S->nseq; a++)if(strlen(S->seq[a])!=l){free_sequence (S, S->nseq);return 1;} - for ( a=0; a< S->nseq; a++) - { - l1=strlen ( S->seq[a]); - ungap (S->seq[a]); - l2=strlen ( S->seq[a]); - if ( l1!=l2) - { - free_sequence (S, S->nseq); - return 0; - } - } - free_sequence (S, S->nseq); - return 1; - } - else - { - return 0; - } - } - -int format_is_fasta ( char *file) - { - Sequence *S; - - if ( !check_file_exists(file))return 0; - - if ( get_first_non_white_char (file)!='>')return 0; - if ( !(S=get_fasta_sequence (file, NULL)))return 0; - free_sequence (S, -1); - if ( format_is_pir(file)) return 0; - return 1; - } - -int format_is_pir_aln ( char *file) - - { - if ( format_is_pir(file) && !format_is_pir_seq(file))return 1; - else return 0; - } - -int format_is_pir_seq ( char *file) - { - int a, l1, l2; - Sequence *S; - - - if ( format_is_pir (file)) - { - S=get_pir_sequence (file, NULL); - for ( a=0; a< S->nseq; a++) - { - l1=strlen ( S->seq[a]); - ungap (S->seq[a]); - l2=strlen ( S->seq[a]); - if ( l1!=l2) - { - free_sequence (S, S->nseq); - return 0; - } - } - return 1; - } - else - { - return 0; - } - } - - -int format_is_pir ( char *file) - { - Sequence *S; - int pir_name=1, star_end=1, a; - - S=get_fasta_sequence (file, NULL); - if (!S)return 0; - else if (!S->seq[0])return 0; - - pir_name=1; star_end=1; - for (a=0; a< S->nseq; a++) - { - int l; - if (!is_pir_name(S->name[a]))pir_name=0; - l=strlen (S->seq[a]); - if (!l || (l && S->seq[a][l-1]!='*')) - star_end=0; - } - free_sequence(S,-1); - if ( pir_name && star_end) return 1; - else return 0; - } -int is_pir_name (char *name) -{ - if ( strstr (name, "P1;"))return 1; - if ( strstr (name, "F1;"))return 1; - if ( strstr (name, "DL;"))return 1; - if ( strstr (name, "DC;"))return 1; - if ( strstr (name, "RL;"))return 1; - if ( strstr (name, "RC;"))return 1; - if ( strstr (name, "XX;"))return 1; - return 0; -} - - -int format_is_conc_aln (char *file) -{ - FILE *fp; - if ( (fp=find_token_in_file (file, NULL, "CONC_MSF_FORMAT_01"))){vfclose (fp); return 1;} - return 0; -} -int format_is_saga ( char *file) - { - FILE *fp; - int **list; - int n_blocks; - int n_seq; - int a, b; - - if ( (fp=find_token_in_file (file, NULL, "SAGA"))){vfclose (fp); return 1;} - else if ((fp=find_token_in_file (file, NULL, "CLUSTAL"))){vfclose (fp); return 1;} - else if ((fp=find_token_in_file (file, NULL, "ClustalW"))){vfclose (fp); return 1;} - else if ((fp=find_token_in_file (file, NULL, "clustalw"))){vfclose (fp); return 1;} - else if ((fp=find_token_in_file (file, NULL, "clustal"))){vfclose (fp); return 1;} - else if ((fp=find_token_in_file (file, NULL, "T-COFFEE_MSA"))){vfclose (fp); return 1;} - else if ((fp=find_token_in_file (file, NULL, "INTERLEAVED_MSA"))){vfclose (fp); return 1;} - - else return 0; - - if (1==1); - else if ((fp=find_token_in_file (file, NULL, "T-COFFEE"))){vfclose (fp); return 1;} - else if ((fp=find_token_in_file (file, NULL, "SAGA_FORMAT"))){vfclose (fp); return 1;} - else if ((fp=find_token_in_file (file, NULL, "GARP"))){vfclose (fp); return 1;} - else if ((fp=find_token_in_file (file, NULL, "INTERLEAVED"))){vfclose (fp); return 1;} - - else - { - list=get_file_block_pattern (file,&n_blocks,100); - if (n_blocks<=2){free_int (list, -1);return 0;} - else - { - n_seq=list[1][0]; - for ( a=1; a< n_blocks-1; a++) - { - if ( list[a][0]!=n_seq){free_int (list, -1);return 0;} - else - { - for ( b=1; b<=list[a][0]; b++) - if ( list[a][b]!=2){free_int (list, -1);return 0;} - } - } - } - return 1; - } - - return 0; - } - - -int format_is_swissprot (char *name) - { - FILE *fp; - - if ( !check_file_exists(name))return 0; - - - - - if ( (fp=find_token_in_file_nlines (name,NULL,"\nID ",10))!=NULL\ - &&(fp=find_token_in_file (name,NULL,"\nSQ "))!=NULL ) - { - - vfclose (fp);return 1; - } - else - { - return 0; - } - } - -/*******************************************************************************************/ -/* */ -/* */ -/* OUTPUT STUFF */ -/* */ -/***************************************************************************************** */ -int output_format_aln ( char *format, Alignment *inA, Alignment *inEA,char *name) - { - Sequence_data_struc *D1=NULL; - Sequence_data_struc *D2=NULL; - Alignment *A=NULL; - Alignment *EA=NULL; - - - A =copy_aln (inA, NULL); - A->CL=inA->CL; - EA=copy_aln (inEA,NULL); - A =expand_aln(A); - EA=expand_number_aln(inA,EA); - - - if (A && A->expanded_order )A=reorder_aln ( A, A->expanded_order,A->nseq); - if (EA && EA->expanded_order)EA=reorder_aln ( EA, EA->expanded_order,EA->nseq); - - - D1=vcalloc ( 1, sizeof (Sequence_data_struc)); - D1->A=A; - if (EA) - { - D2=vcalloc ( 1, sizeof (Sequence_data_struc)); - D2->A=EA; - } - - main_output ( D1, NULL,D2, format, name); - - vfree(D1); - vfree(D2); - free_aln (A); - free_aln (EA); - return 1; - } -int main_output (Sequence_data_struc *D1, Sequence_data_struc *D2, Sequence_data_struc *DST, char *out_format, char *out_file) - - { - FILE *fp; - int value; - Alignment *BUF_A; - int expanded=0; - - if ( !out_format[0])return 0; - if ( D1 && D1->rm_gap)ungap_aln ((D1->A)); - - if ( (strstr (out_format, "expanded_"))) - { - if (!D1) return 1; - out_format+=strlen ("expanded_"); - BUF_A=copy_aln (D1->A, NULL); - (D1->A)=thread_profile_files2aln ((D1->A), NULL, NULL); - expanded=1; - } - - if ( strm (out_format, ""))return 0; - else if ( ( strm (out_format, "aln2lib"))) - { - int a, b, c; - int r1,r2,s1, s2,s; - Constraint_list *CL; - FILE *fp; - Alignment *IN; - int **pos; - - if (!D1)return 1; - IN=D1->A; - CL=(D1->A)->CL; - pos=aln2pos_simple(IN, IN->nseq); - fp=vfopen (out_file, "w"); - fp=save_list_header (fp,CL); - - - for ( b=0; b< IN->nseq-1; b++) - { - for ( c=b+1; c< IN->nseq; c++) - { - s1=IN->order[b][0]; - s2=IN->order[c][0]; - fprintf ( fp, "#%d %d\n", s1+1, s2+1); - for ( a=0; a< IN->len_aln; a++) - { - r1=pos[b][a]; - r2=pos[c][a]; - - if ( s1==s2 && !CL->do_self)continue; - - if ( s1< s2)s=(CL->evaluate_residue_pair)( CL, s1, r1, s2, r2); - else s=(CL->evaluate_residue_pair)( CL, s2, r2, s1, r1); - - s=(s!=UNDEFINED)?s:0; - if ( r1>0 && r2>0) - { - fprintf (fp, "\t%5d %5d %5d \n", r1, r2, s); - } - } - } - } - vfclose (save_list_footer (fp, CL)); - } - else if ( strncmp (out_format, "score",5)==0 || strm (out_format, "html")) - { - Alignment *BUF; - - if (!D1)return 1; - if ( !DST) - { - fprintf ( stderr,"\n[You Need an evaluation File: Change the output format or use +evaluate][FATAL:%s]\n", PROGRAM); - myexit(EXIT_FAILURE); - } - if ( !strm ("html", out_format))while ( out_format[0]!='_' && out_format[0]!='\0' )out_format++; - - D1->S=aln2seq(D1->A); - BUF=copy_aln (DST->A, NULL); - DST->A=aln2number (DST->A); - - if ( strstr ( out_format, "html" ))output_reliability_html ( D1->A, DST->A, out_file); - else if( strm ( out_format, "_ps" ))output_reliability_ps ( D1->A, DST->A, out_file); - else if( strm ( out_format, "_pdf" ))output_reliability_pdf ( D1->A, DST->A, out_file); - else if( strm ( out_format, "_ascii" ))output_reliability_ascii ( D1->A, DST->A, out_file); - else if( strm ( out_format, "_seq" ))output_seq_reliability_ascii ( D1->A, DST->A, out_file); - else - { - DST->A=BUF; - main_output (DST, NULL, NULL, out_format+1, out_file); - } - } - else if (strm (out_format, "sec_html") || strm (out_format, "_E_html")) - { - Alignment *ST, *A; - Sequence *S; - - int a, b,c,i, ns=0; - char *buf; - if (!D1)return 1; - A=D1->A; - - - S=A->S; - ST=copy_aln (A, NULL); - for (a=0; anseq; a++) - { - i=name_is_in_list (ST->name[a],S->name, S->nseq, 100); - if ( i!=-1) - { - buf=seq2E_template_string(S, i); - if ( buf==NULL)continue; - else ns++; - for (c=0,b=0; blen_aln; b++) - { - int r1, s; - r1=ST->seq_al[a][b]; - if ( r1!='-') - { - s=tolower (buf[c]); - if (s=='e')r1='0'; - else if (s=='h')r1='9'; - else if (s=='c')r1='5'; - c++; - } - ST->seq_al[a][b]=r1; - } - } - } - - if (!ns) - { - printf_exit ( EXIT_FAILURE, stderr, "\nYou must provide a TM template file [FATAL:%s]", PROGRAM); - } - output_color_html ( A, ST, out_file); - } - else if (strm (out_format, "tm_html") || strm (out_format, "_T_html")) - { - Alignment *ST, *A; - Sequence *S; - - int a, b,c,i, ns=0; - char *buf; - if (!D1)return 1; - A=D1->A; - - - S=A->S; - ST=copy_aln (A, NULL); - for (a=0; anseq; a++) - { - i=name_is_in_list (ST->name[a],S->name, S->nseq, 100); - if ( i!=-1) - { - buf=seq2T_template_string(S, i); - if ( buf==NULL)continue; - else ns++; - for (c=0,b=0; blen_aln; b++) - { - int r1, s; - r1=ST->seq_al[a][b]; - if ( r1!='-') - { - s=tolower (buf[c]); - if (s=='o')r1='0'; - else if (s=='h')r1='9'; - else if (s=='i')r1='5'; - c++; - } - ST->seq_al[a][b]=r1; - } - } - } - - if (!ns) - { - printf_exit ( EXIT_FAILURE, stderr, "\nYou must provide a TM template file [FATAL:%s]", PROGRAM); - } - output_color_html ( A, ST, out_file); - } - - else if (strm (out_format, "color_exoset")) - { - Alignment *ST, *EX, *A; - Constraint_list *CL; - int a, b, n; - char *buf; - - if ( !DST->A) - { - printf_exit ( EXIT_FAILURE, stderr, "\nYou must provide an obj file via the -struc_in flag [FATAL:%s]", PROGRAM); - } - EX=DST->A; - A=D1->A; - - CL=declare_constraint_list ( DST->S,NULL, NULL, 0,NULL, read_matrice("pam250mt")); - - ST=copy_aln (A, NULL); - buf=vcalloc ( EX->len_aln+1, sizeof (int)); - - for ( a=0; a< A->nseq; a++) - { - int i; - - i=name_is_in_list (A->name[a],EX->name, EX->nseq, -1); - if ( i==-1)continue; - - sprintf ( buf, "%s", EX->seq_al[i]); - ungap (buf); - - for (n=0,b=0; blen_aln; b++) - { - if (!is_gap(A->seq_al[a][b])) - { - if ( buf[n]=='o') - ST->seq_al[a][b]='0'; - else if ( buf[n]=='j') - ST->seq_al[a][b]='1'; - else if ( buf[n]=='b') - ST->seq_al[a][b]='2'; - n++; - } - } - } - vfree (buf); - - output_color_html ( A, ST, out_file); - return EXIT_SUCCESS; - } - - else if (strm (out_format, "color_protogene")) - { - int n, a, b; - DST->A=copy_aln (D1->A, NULL); - for (n=1,a=0; a< (D1->A)->len_aln; a++, n++) - { - for ( b=0; b<(D1->A)->nseq; b++) - { - if (is_gap((D1->A)->seq_al[b][a])); - else if ( n<=3)(DST->A)->seq_al[b][a]=2; - else if ( n>3)(DST->A)->seq_al[b][a]=9; - } - - if ( n==6)n=0; - } - output_color_html ( D1->A, DST->A, out_file); - return EXIT_SUCCESS; - - } - else if ( strncmp (out_format, "color",5)==0) - { - Alignment *BUF; - - if (!D1)return 1; - - if ( !DST) - { - fprintf ( stderr,"\n[You Need an evaluation File: Change the output format or use +evaluate][FATAL:%s]\n", PROGRAM); - myexit(EXIT_FAILURE); - } - while ( out_format[0]!='_' && out_format[0]!='\0' )out_format++; - - BUF=copy_aln (DST->A, NULL); - - - - - if ( strm ( out_format, "_html" ))output_color_html ( D1->A, DST->A, out_file); - else if( strm ( out_format, "_ps" ))output_color_ps ( D1->A, DST->A, out_file); - else if( strm ( out_format, "_pdf" ))output_color_pdf ( D1->A, DST->A, out_file); - else if( strm ( out_format, "_ascii" ))output_color_ascii ( D1->A, DST->A, out_file); - else - { - DST->A=BUF; - return main_output (DST, NULL, NULL, out_format+1, out_file); - } - return EXIT_SUCCESS; - } - else if ( strm4 ( out_format, "tc_aln","t_coffee_aln", "t_coffee", "tcoffee")) - { - if (!D1)return 1; - vfclose (output_aln ( D1->A, vfopen (out_file, "w"))); - } - else if ( strm ( out_format, "analyse_pdb")) - { - if (!D1)return 1; - if ( !DST) - { - fprintf ( stderr,"\n[You Need an evaluation File: Change the output format][FATAL:%s]\n", PROGRAM); - myexit(EXIT_FAILURE); - } - analyse_pdb ( D1->A,DST->A, "stdout"); - (DST->A)=aln2number (DST->A); - output_reliability_ps ( D1->A, DST->A, out_file); - } - else if ( strm4 ( out_format, "lower0", "lower1", "lower2", "lower3") || strm4(out_format, "lower4", "lower5", "lower6", "lower7") || strm4 (out_format,"lower8", "lower9", "align_pdb", "malign_pdb") ) - { - if (!D1)return 1; - if ( !DST) - { - fprintf ( stderr,"\n[You Need an evaluation File: Change the output format][FATAL:%s]\n", PROGRAM); - myexit(EXIT_FAILURE); - } - - - - (DST->A)=aln2number (DST->A); - if ( strm (out_format, "align_pdb"))value=0; - else if ( strm (out_format, "malign_pdb"))value=5; - else value=atoi(out_format+5); - - D1->A=filter_aln_upper_lower (D1->A, DST->A,0, value); - output_clustal_aln ( out_file, D1->A); - } - else if ( strnm (out_format, "repeat", 6)) - { - int size; - int a, b, c; - Alignment *CONC; - - if ( !D1)return 1; - size=atoi (out_format+6); - print_aln (D1->A); - CONC=declare_aln2 ( (D1->A)->nseq, ((D1->A)->len_aln+1)*size+1); - - for ( a=0; a< (D1->A)->nseq; a++)(D1->A)->seq_al[a][(D1->A)->len_aln]='\0'; - for ( c=0,a=0; a< (D1->A)->nseq;c++) - { - - sprintf ( CONC->name[c], "%s", (D1->A)->name[a]); - for ( b=0; bseq_al[c], (D1->A)->seq_al[a]); - strcat (CONC->seq_al[c], "O"); - } - } - CONC->nseq=c;CONC->len_aln=strlen (CONC->seq_al[0]); - output_clustal_aln ( out_file, CONC); - free_aln (CONC); - } - - else if ( strnm (out_format, "upper", 5)) - { - - if (!D1)return 1; - if ( !DST) - { - fprintf ( stderr,"\n[You Need an evaluation File: Change the output format][FATAL:%s]\n", PROGRAM); - myexit(EXIT_FAILURE); - } - - - (DST->A)=aln2number (DST->A); - - value=atoi(out_format+5); - - D1->A=filter_aln_lower_upper (D1->A, DST->A,0, value); - output_clustal_aln ( out_file, D1->A); - } - - else if ( strm4 ( out_format, "filter0", "filter1", "filter2", "filter3")) - { - if (!D1)return 1; - if ( !DST) - { - fprintf ( stderr,"\n[You Need an evaluation File: Change the output format][FATAL:%s]\n", PROGRAM); - myexit(EXIT_FAILURE); - } - - (DST->A)=aln2number (DST->A); - - D1->A=filter_aln (D1->A, DST->A, atoi(out_format+6)); - output_clustal_aln ( out_file, D1->A); - } - - else if ( strm3 ( out_format, "phylip_aln", "phylip", "phy")) - { - if (!D1)return 1; - output_phylip_aln ( out_file, D1->A); - } - else if ( strm ( out_format, "mocca_aln")) - { - if (!D1)return 1; - output_mocca_aln ( out_file, D1->A, DST->A); - } - else if ( strm ( out_format, "saga_pw_sd_weights") ) - { - if (!D1)return 1; - output_pw_weights4saga ((D1->W),(D1->W)->PW_SD, out_file); - } - else if ( strm ( out_format, "saga_aln")) - { - if (!D1)return 1; - output_saga_aln (out_file, D1->A); - } - else if (strm2 ( out_format, "aln","clustal_tc")|| strm (out_format, "msa")) - { - - if (!D1)return 1; - output_clustal_aln (out_file, D1->A); - } - else if (strm5 ( out_format, "strict_clustal","clustal_aln", "clustalw","clustal", "clustalw_aln") || strm (out_format,"number_aln")) - { - if (!D1)return 1; - output_strict_clustal_aln (out_file, D1->A); - } - else if ( strm ( out_format, "conc_aln")) - { - if (!D1)return 1; - output_conc_aln (out_file, D1->A); - } - else if ( strm2 ( out_format, "lalign_aln","lalign")) - { - if (!D1)return 1; - output_lalign (out_file, D1->A); - } - else if ( strm2 ( out_format, "glalign_aln","glalign")) - { - if (!D1)return 1; - output_glalign (out_file, D1->A, DST->A); - } - - else if ( strm2 ( out_format, "fasta_aln","fasta" ) || strm (out_format, "blast_aln")) - { - if (!D1)return 1; - output_fasta_aln( out_file, D1->A); - } - else if ( strm (out_format, "overaln")) - { - Alignment *EB=NULL; - char *s, mode[100]; - OveralnP *F; - int eb=0; - if (!D1) return 1; - F=vcalloc (1, sizeof (OveralnP)); - - string_array_upper ((D1->A)->seq_al, (D1->A)->nseq); - if ( D2 && D2->A) - { - D1->A=mark_exon_boundaries (D1->A, D2->A); - eb=1; - } - else if ( (s=get_string_variable ("exon_boundaries"))) - { - Sequence *S; - Alignment *EB; - EB=seq2aln(S=main_read_seq(s),NULL, 0); - D1->A=mark_exon_boundaries (D1->A, EB); - free_sequence (S, S->nseq); free_aln (EB); - eb=1; - } - - sprintf (F->mode, "%s", ((s=get_string_variable ("overaln_mode")))?s:"lower"); - if (!strm (F->mode, "lower") && !strm (F->mode, "unalign"))printf_exit (EXIT_FAILURE,stderr,"\nERROR: unknown overal_mode in overal output [%s] [FATAL:%s]", mode, PROGRAM); - - if (int_variable_isset ("overaln_threshold"))F->t=get_int_variable ("overaln_threshold"); - if (int_variable_isset ("overaln_target"))F->f=get_int_variable ("overaln_target"); - if (int_variable_isset ("overaln_P1"))F->p1=get_int_variable ("overaln_P1"); - if (int_variable_isset ("overaln_P2"))F->p2=get_int_variable ("overaln_P2"); - if (int_variable_isset ("overaln_P3"))F->p3=get_int_variable ("overaln_P3"); - if (int_variable_isset ("overaln_P4"))F->p4=get_int_variable ("overaln_P4"); - - if (eb)sprintf (F->model, "fsa2"); - else sprintf (F->model, "fsa1"); - D1->A=aln2clean_pw_aln (D1->A, F); - - //if (eb)D1->A=aln2clean_pw_aln (D1->A, mode,t, f,p1,p2,p3, "fsa2"); - //else D1->A=aln2clean_pw_aln (D1->A, mode,t, f,p1,p2,p3, "fsa1"); - - D1->S=aln2seq(D1->A); - output_clustal_aln (out_file, D1->A); - } - else if ( strm ( out_format, "est_prf" )) - { - if (!D1)return 1; - output_est_prf( out_file, D1->A); - } - else if ( strm ( out_format, "clean_est_fasta_seq" )) - { - if (!D1)return 1; - D1->A=clean_est(D1->A); - output_fasta_seq(out_file, D1->A); - - } - - else if ( strm3 ( out_format, "msf_aln", "gcg", "msf")) - { - if (!D1)return 1; - output_msf_aln( out_file, D1->A); - } - else if ( strm ( out_format, "rnalign")) - { - if (!D1)return 1; - output_rnalign (out_file, D1->A, DST->S); - } - else if ( strm ( out_format, "fasta_seq") ||strm ( out_format, "list")||strm ( out_format, "file_list")) - { - int z; - if (!D1)return 1; - output_fasta_seq (out_file,D1->A); - } - else if (strm (out_format, "fasta_tree") ) - { - if (!D1)return 1; - output_fasta_tree (out_file,D1->A); - } - - else if ( strm ( out_format, "gotoh_seq")) - { - if (!D1)return 1; - output_gotoh_seq (out_file,D1->A); - } - else if ( strm (out_format, "fasta_seq1")) - { - if (!D1)return 1; - output_fasta_seq1 (out_file, D1->A); - } - else if ( strm2 (out_format, "pir_aln", "pir")) - { - if (!D1)return 1; - output_pir_aln (out_file, D1->A); - } - else if ( strm (out_format, "pir_seq")) - { - if (!D1)return 1; - output_pir_seq (out_file, D1->A); - } - else if ( strm (out_format, "gor_seq")) - { - if (!D1)return 1; - output_gor_seq (out_file, D1->A); - } - else if ( strm (out_format, "pir_seq1")) - { - if (!D1)return 1; - output_pir_seq1 (out_file, D1->A); - } - else if ( strm (out_format, "pw_lib_saga_aln")) - { - if (!D1)return 1; - output_pw_lib_saga_aln (out_file, D1->A); - } - else if ( strm (out_format, "lib")) - { - if (!D1)return 1; - output_lib (out_file, D1->A); - } - else if ( strm (out_format, "pdb_constraint_list")) - { - if (!D1)return 1; - output_constraints (out_file, "pdb",D1->A); - } - else if ( strm2 (out_format, "constraint_list","tc_lib")) - { - - if (!D1)return 1; - else if (!D1->CL)output_constraints (out_file,"sim", D1->A); - else if (D1->CL) vfclose ( save_constraint_list ( D1->CL, 0, (D1->CL)->ne, out_file, NULL, "ascii",(D1->CL)->S)); - } - else if ( strm2 (out_format, "extended_lib","extended_cosmetic")) - { - if (!D1)return 1; - output_constraints (out_file,out_format, D1->A); - } - else if ( strncmp (out_format, "extended_pair", 13)==0) - { - if (!D1)return 1; - output_constraints (out_file,out_format, D1->A); - } - else if ( strm (out_format, "cache_id")) - { - if (!D1)return 1; - cache_id (D1->A); - output_saga_aln (out_file, D1->A); - } - else if ( strm (out_format, "compress_aln")) - { - if (!D1)return 1; - compress_aln (D1->A); - output_saga_aln (out_file, D1->A); - } - else if (strm (out_format, "n_seq") ||strm (out_format, "nseq") ) - { - if (!D1)return 1; - fp=vfopen ( out_file, "w"); - fprintf ( fp, "%d\n", (D1->A)->nseq); - vfclose (fp); - } - - else if ( strm ( out_format, "thread_dna_on_prot_aln")) - { - if (!D1)return 1; - D1->A=thread_dnaseq_on_prot_aln (D1->S, D2->A); - output_saga_aln ( out_file, D1->A); - } - else if ( strm ( out_format, "tdna_fasta_seq1")) - {if (!D1)return 1; - D1->A=translate_dna_aln (D1->A,0); - output_fasta_seq1 (out_file, D1->A); - } - else if ( strm ( out_format, "tdna_aln")) - {if (!D1)return 1; - D1->A=translate_dna_aln (D1->A,0); - output_saga_aln ( out_file, D1->A); - } - else if ( strm ( out_format, "cdna_fasta_seq1")) - {if (!D1)return 1; - D1->A= gene2prot(D1->A); - output_fasta_seq1 ( out_file, D1->A); - } - else if ( strm ( out_format, "mutate_cdna_aln")) - {if (!D1)return 1; - D1->A= mutate_cdna_aln ( D1->A); - output_clustal_aln ( out_file, D1->A); - } - else if ( strm ( out_format, "tdna_sp_aln")) - { if (!D1)return 1; - if ( !DST) - { - fprintf ( stderr,"\n[You Need an evaluation File: Change the output format][FATAL:%s]\n", PROGRAM); - myexit(EXIT_FAILURE); - } - (DST->A)=aln2number (DST->A); - D1->A=translate_splice_dna_aln (D1->A, DST->A); - output_saga_aln ( out_file, D1->A); - } - else if (out_format && out_format[0] && (strcmp ( out_format,"rna_graph_fasta")==0)) - { - if (!D1)return 1; - sprintf ( (D1->A)->seq_al[0], "%s",(DST->S)->seq[0]); - (D1->A)->nseq=0; - output_fasta_seq (out_file, DST->A); - } - else if (strm ( out_format, "freq_mat")) - { - if (!D1)return 1; - output_freq_mat (out_file, D1->A); - } - else if (strm ( out_format, "maln_pval")) - {if (!D1)return 1; - output_maln_pval ( out_file, D1->A); - } - else if ( strm ( out_format, "model_aln")) - { - if (!D1)return 1; - output_model_aln ( out_file, D1->A); - } - else if (strncmp (out_format, "mult",4)==0) - { - if (!D1)return 1; - output_mult_fasta_seq ( out_file, D1->A, atoi(out_format+4)); - } - else if (strm (out_format, "conservation")) - { - output_conservation_statistics (out_file, D1->A); - } - else if (strm (out_format, "len")) - { - if (!D1)return 1; - output_statistics (out_file, D1->A, "nrl"); - } - else if ( strm (out_format, "name")) - { - if (!D1)return 1; - if ( D1->A)output_statistics (out_file, D1->A, "n"); - if ( D1->T) - { - Sequence *TS; - TS=tree2seq(D1->T, NULL);print_array_char (vfopen(out_file, "w"), TS->name, TS->nseq, "\n"); - } - } - else if ( strm (out_format, "code_name")) - { - char **nl=NULL; - int num, n=0; - Sequence *TS; - FILE *lfp; - if ( D1->A){n=(D1->A)->nseq, nl=(D1->A)->name;} - if ( D1->T){TS=tree2seq(D1->T, NULL);nl=TS->name;n=TS->nseq;} - - lfp=vfopen (out_file, "w"); - for ( num=0; numA); - } - else if ( strstr ( out_format, "pavie_age_channel")) - { - output_n_pavie_age_channel ( D1->S,out_file, atoi((out_format+strlen ("pavie_age_channel")))); - return EXIT_SUCCESS; - } - else if ( strstr ( out_format, "age_matrix")) - { - output_age_matrix (out_file, atoi((out_format+10))); - } - else if ( strm ( out_format, "transitions")) - { - output_transitions (out_file, D1->A); - } - - else if ( strncmp (out_format, "statistics",10)==0) - { - if (!D1)return 1; - - output_statistics (out_file, D1->A,out_format+10); - } - - - - - else if ( strm4 (out_format, "newick_tree","newick","binary","nh")) - { - if (!D1)return 1; - - /*D1->T=unroot_tree(D1->T);*/ - vfclose (print_tree ((D1->T), out_format, vfopen ( out_file, "w"))); - } - else if ( strncmp (out_format, "sarsim", 6)==0) - { - if (!D1)return 1; - compare_sar_sequence (D1->S, (D2 &&D2->S)?D2->S:D1->S, atoi(out_format+6)); - return EXIT_SUCCESS; - } - else if ( strncmp (out_format, "sim",3)==0) - { - if (!D1)return 1; - output_similarities (out_file, D1->A,out_format); - } - - else if ( strncmp (out_format, "cov",3)==0) - { - if (!D1)return 1; - output_similarities (out_file, D1->A,out_format); - } - else if ( strm (out_format, "stockholm_aln")) - { - output_stockholm_aln (out_file,D1->A, (D2)?D2->A:NULL); - } - else if ( strm (out_format, "pair_sim")) - { - if ( !D2) - { - fprintf ( stderr, "\n-output=pair_sim: provide aln1 via -in and aln2 via -in2 [FATAL:%s]\n", PROGRAM); - myexit (EXIT_FAILURE); - } - output_similarities_pw (out_file, D1->A,D2->A,out_format); - } - else if ( strm (out_format, "matrix") || strm (out_format, "blast_matrix")) - { - output_blast_mat (D1->M, out_file); - } - else - { - - fprintf ( stderr, "\n%s is an UNKNOWN OUTPUT FORMAT [FATAL:%s]\n",out_format, PROGRAM); - myexit (EXIT_FAILURE); - - } - - //Remove the expansion - if ( expanded) - { - free_aln (D1->A); - D1->A=BUF_A; - } - return 0; - } -int is_in_format_list ( char *name) - { - if ( strcmp ( name, "saga_aln")==0)return 1; - if ( strcmp ( name, "number_aln")==0)return 1; - if ( strcmp ( name, "clustal_aln")==0)return 1; - if ( strcmp ( name, "fasta_aln")==0)return 1; - if ( strcmp ( name, "number_fasta")==0)return 1; - if ( strcmp ( name, "fasta_seq")==0)return 1; - if ( strcmp ( name, "pdb")==0)return 1; - if ( strcmp ( name, "msf_aln")==0)return 1; - if ( strcmp ( name, "dali_aln")==0)return 1; - if ( strcmp ( name, "dali_seq")==0)return 1; - if ( strcmp ( name, "barton_list_tc")==0)return 1; - if ( strcmp ( name, "est_prf")==0)return 1; - - if ( strcmp ( name, "gotoh_aln")==0)return 1; - if ( strcmp ( name, "amps_aln")==0)return 1; - if ( strcmp ( name, "pir_aln")==0)return 1; - if ( strcmp ( name, "pir_seq")==0)return 1; - if ( strcmp ( name, "est_fasta")==0)return 1; - if ( strcmp ( name, "amps_sd_scores")==0)return 1; - if ( strcmp ( name, "pima_aln")==0)return 1; - if ( strcmp ( name, "dialign_aln")==0)return 1; - if ( strcmp ( name, "gor_seq")==0)return 1; - if ( strcmp ( name, "gor_struc")==0)return 1; - if ( strcmp ( name, "stockholm_aln")==0)return 1; - - return 0; - } -int is_struc_in_format_list ( char *name) - { - if ( strcmp ( name, "rna_number")==0)return 1; - if ( strcmp ( name, "fasta_seq")==0)return 1; - return 0; - } -char *format_name2aln_format_name (char *name) - { - if ( strm (name, "gcg"))sprintf (name, "msf"); - else if ( strm (name, "fasta"))sprintf (name, "fasta_aln"); - return name; - } -int is_out_format_list ( char *name) - { - return main_output (NULL, NULL, NULL, name, NULL); - } - -int is_struc_out_format_list ( char *name) - { - return main_output (NULL, NULL, NULL, name, NULL); - } - -/**************************************************************************************************/ -/*************************************REFORMAT UTIL*************************************************/ -/**************************************************************************************************/ - -/*************************************REFORMAT IN**************************************************/ -/**************************************************************************************************/ -/*******************************************************************************************/ -/* */ -/* */ -/* READ COG FILE */ -/* */ -/***************************************************************************************** */ - -/*******************************************************************************************/ -/* */ -/* */ -/* INPUT WEIGHTS */ -/* */ -/***************************************************************************************** */ - -Weights* get_amps_sd_scores ( char *fname) - { - FILE *fp; - char *buf; - char *buf2; - int nseq; - Weights *W; - int a, b,e; - int c; - float array[20]; - - buf=vcalloc ( 1001, sizeof (char)); - buf2=vcalloc ( 1001, sizeof (char)); - - fp=vfopen ( fname, "r"); - set_fp_id ( fp, "Index"); - buf=fgets ( buf, 1000, fp); - fscanf ( fp, "%s", buf2); - - nseq=0; - while ( isalnum(buf2[0]) && !isalpha(buf2[0])) - { - nseq++; - buf=fgets ( buf, 1000, fp); - fscanf ( fp, "%s", buf2); - } - vfclose ( fp); - - W=declare_weights (nseq); - - fp=vfopen ( fname, "r"); - set_fp_id ( fp, "Index"); - buf=fgets ( buf, 1000, fp); - fscanf ( fp, "%s", buf2); - - a=0; - while ( isalnum(buf2[0]) && !isalpha(buf2[0])) - { - fp=set_fp_after_char (fp, '>'); - fscanf ( fp, "%s",W->seq_name[a]); - buf=fgets ( buf, 1000, fp); - fscanf ( fp, "%s", buf2); - a++; - } - buf=fgets ( buf, 1000, fp); - c=1; - while ( c!=0) - { - for ( e=0; e< 16; e++) - { - c=fscanf ( fp, "%f", &array[e]); - } - fscanf ( fp, "\n"); - if ( c!=0) - { - - a=(int)array[0]-1; - b=(int)array[1]-1; - W->PW_ID[b][a]=W->PW_ID[a][b]=array[9]; - W->PW_SD[b][a]=W->PW_SD[a][b]=array[14]; - } - - } - vfclose ( fp); - sprintf ( W->comments, "SD WEIGHTS GENERATED WITH THE PROGRAM AMPS IN PAIRWISE MODE"); - vfree ( buf); - return W; - } - -Weights *read_seq_weight (char **name, int nseq, char* seq_weight) - { - int a, p; - Weights *W; - float w; - - FILE *fp; - char line[LONG_STRING]; - char sname[MAXNAMES]; - - - /*Read sequence weights: - * comment - name1 weight1 - ..... - - - NOTE: - weights must be between 0 and 1; - - sequences not in S do not get any weight - sequences in S but not in file get a weight of 1 - */ - if ( !is_single_seq_weight_file (seq_weight)) - { - fprintf ( stderr, "\nERROR: File %s is not in Format SINGLE_SEQ_WEIGHT_FORMAT_01 [FATA:%s]", seq_weight,PROGRAM); - myexit (EXIT_FAILURE); - return NULL; - } - else - { - W=declare_weights(nseq); - for ( a=0; a< nseq; a++) - { - sprintf ( W->seq_name[a], "%s", name[a]); - W->SEQ_W[a]=1; - } - sprintf ( W->mode, "%s", seq_weight); - fp=vfopen (seq_weight, "r"); - - - while ( fgets( line,LONG_STRING-1, fp)) - { - if ( line[0]=='*' ||line[0]=='#' || isblanc(line)); - else - { - if (sscanf(line, "%s %f", sname, &w)!=2)continue; - if ( (p=name_is_in_list ( sname, W->seq_name, nseq, MAXNAMES-1))!=-1) - { - W->SEQ_W[p]=w; - } - } - } - vfclose (fp); - return W; - } - } - - -/*******************************************************************************************/ -/* */ -/* */ -/* INPUT MISC */ -/* */ -/***************************************************************************************** */ - -char *** read_rename_file ( char *fname, int code) -{ - int n; - FILE *fp; - char ***convert=NULL; - - convert=declare_arrayN(3, sizeof (char),count_n_line_in_file(fname) +1,2,MAXNAMES+1); - fp=vfopen (fname, "r"); - n=0; - if ( code==CODE) while ( fscanf ( fp, "%s %s\n", convert[n][0], convert[n][1])==2)n++; - else if (code==DECODE)while ( fscanf ( fp, "%s %s\n", convert[n][1], convert[n][0])==2)n++; - vfclose (fp); - return convert; -} - -void get_barton_list_tc_seq ( char *in_file) - { - FILE *fp, *fp_make, *fp_length, *fp_long; - FILE *fp_small[9]; - - static char *buf; - int len_buf=10000; - char name[100]; - - char pwd[100]; - int a,c,nseq; - int k=0; - int *length; - int longest=0; - - c=0; - length=vcalloc ( 1000, sizeof(int)); - if ( buf==NULL)buf=vcalloc ( len_buf, sizeof (char)); - fp=vfopen (in_file, "r"); - fp_long=vfopen ( "barton_seq_list_large", "w"); - fp_make=vfopen ( "make_dir", "w"); - fp_length=vfopen ( "barton_length", "w"); - for ( a=0; a< 9; a++) - { - sprintf ( name, "barton_nseq%d",a); - fp_small[a]=vfopen ( name, "w"); - } - get_pwd (pwd); - - - while ( c!=EOF) - {a=0; - while ( (c=fgetc(fp))!='#'); - while ( (c=fgetc(fp))=='#'); - ungetc ( c, fp); - while ( (c=fgetc(fp))!='#')buf[a++]=c; - buf[a]='\0'; - - sprintf ( name, "%s", buf); - - while ( (c=fgetc(fp))=='#'); - - if ( c!=EOF) - { - a=0; - while ( (c=fgetc(fp))!='#' && c!=EOF) - { - buf[a++]=c; - if (a==len_buf) - { - len_buf+=10000; - buf=vrealloc ( buf, len_buf*sizeof (char)); - } - } - buf[a]='\0'; - if (c!=EOF) - { - - nseq=process_barton_entry ( buf,name); - length[nseq]++; - longest=(longest') - { - a=get_string_line (a,2, buf, buf2); - while ((c=buf[a++])!='*') - if (isalnum (c)|| c=='.' || c=='-') - clen++; - max_len_seq=(clen> max_len_seq)?clen: max_len_seq; - min_len_seq=(clen< min_len_seq)?clen: min_len_seq; - nseq++; - clen=0; - } - if ( buf[a]!='\0')a++; - } - - - LS=declare_sequence ( min_len_seq, max_len_seq, nseq); - LS->nseq=nseq; - - - for (a=0, current=0; current< nseq; current++) - { - a=get_string_line ( a, 1, buf, buf2); - sscanf ( buf2, ">P1;%s", LS->name[current]); - a=get_string_line ( a, 1, buf, buf2); - l=strlen ( buf2); - buf2[l-1]='\0'; - sprintf ( LS->seq_comment[current], buf2); - - p=0; - while ( (c=buf[a++])!='*') - { - if (isalpha (c)) - LS->seq[current][p++]=tolower (c); - else if ( isgraph(c)) - LS->seq[current][p++]=(c); - } - a++; - } - - LA=declare_Alignment(LS); - seq2aln ( LS, LA,rm_gap); - output_fasta_seq (fname,LA); - output_pir_check (com_name,LA->nseq, LA->seq_comment); - free_Alignment ( LA); - free_sequence ( LS, nseq); - - return nseq; - } - - - - -Structure *read_rna_struc_number (Alignment *A,char *fname) - { - FILE *fp; - int a; - char x,y; - float f; - Sequence *SA; - Structure *ST; - int first, last; - - SA=declare_sequence ( A->len_aln, A->len_aln, 1); - SA->len[0]=A->len[0]; - for ( a=0; a< SA->len[0]; a++) - SA->seq[0][a]='.'; - ST=declare_rna_structure_num (SA); - ST->S=SA; - - fp=vfopen ( fname, "r"); - fscanf ( fp, "%c\n%d\n",&x, &(ST)->tot_list); - for ( a=0; a<(ST)->tot_list; a++) - { - fscanf ( fp, "%d %d %d %c %c %f\n", &(ST)->list[a][0],&(ST)->list[a][1],&(ST)->list[a][2], &x, &y, &f); - (ST)->list[a][0]--; - (ST)->list[a][1]--; - (ST)->list[a][2]--; - if ( a==0) - { - (ST)->stem[0][0]=(ST)->list[a][0]; - (ST)->stem[0][1]=a; - } - else if ( (ST)->stem[(ST)->tot_stem][0]==(ST)->list[a][0]); - else if ( (ST)->stem[(ST)->tot_stem][0]!=(ST)->list[a][0]) - { - (ST)->stem[(ST)->tot_stem][2]=a-1; - (ST)->tot_stem++; - (ST)->stem[(ST)->tot_stem][0]=(ST)->list[a][0]; - (ST)->stem[(ST)->tot_stem][1]=a; - } - - SA->seq[0][(ST)->list[a][1]]='-'; - SA->seq[0][(ST)->list[a][2]]='-'; - } - (ST)->stem[(ST)->tot_stem][2]=a-1; - (ST)->tot_stem++; - for ( a=0; a< (ST)->tot_stem; a++) - { - - first=(ST)->stem[a][1]; - last=(ST)->stem[a][2]; - SA->seq[0][(ST)->list[first][1]]='>'; - SA->seq[0][(ST)->list[first][2]]='<'; - SA->seq[0][(ST)->list[last][1]]='>'; - SA->seq[0][(ST)->list[last][2]]='<'; - } - - return ST; - } - -Structure * declare_rna_structure_num (Sequence *SA) - { - Structure *ST; - ST=vcalloc ( 1, sizeof ( Structure)); - ST->list=declare_int ( SA->len[0], 3); - ST->stem=declare_int ( SA->len[0], 3); - return ST; - } -char ** read_lib_list (char *name, int *n) -{ - - char **lines; - char **list; - int a, b, l; - - lines=file2lines (name); - l=atoi (lines[0]); - - list=vcalloc (l, sizeof (char*)); - for ( n[0]=0,a=1; a .... - Groups must NOT be overlaping - list[group_index][0]="number of sequences" - list[group_index][1]="group name" - list[group_index][2...N]="sequence" - */ - - FILE *fp; - char *buf; - char ***list; - int a, c, l; - - - - l=measure_longest_line_in_file (file)+1; - buf=vcalloc (l, sizeof (char)); - list=vcalloc ( count_n_line_in_file (file )+1, sizeof (char**)); - - fp=vfopen (file, "r"); - - a=0; - while ((c=fgetc(fp))!=EOF) - { - buf=fgets (buf,l-1, fp); - if ( c=='>')list[a++]=string2list (buf); - } - vfclose (fp); - vfree (buf); - return list; -} -static Sequence* get_pdb_sequence_from_field (char *fname, char *field); -Sequence* get_pdb_sequence (char *fname) -{ - Sequence *S; - - - if ( (S=get_pdb_sequence_from_field(fname, "SEQRES"))!=NULL); - else if ( (S=get_pdb_sequence_from_field(fname, "ATOM"))!=NULL) - { - add_warning (stderr,"Warning: Read Sequence from ATOM field in %s [%s:WARNING]", fname, PROGRAM); - } - else - { - add_warning ( stderr, "\nWARNING: failed to extract sequence from %s [%s:WARNING]\n", fname, PROGRAM); - S=NULL; - } - return S; -} -static Sequence* get_pdb_sequence_from_field (char *fname, char *field) - { - char *tp_name; - char *command; - char *pdbid; - Sequence *S; - - - command=vcalloc ( LONG_STRING, sizeof (char)); - tp_name=vtmpnam (NULL); - - sprintf ( command, "extract_from_pdb -seq_field %s -chain FIRST -infile \'%s\' -mode fasta > %s", field, check_file_exists(fname), tp_name); - if ( getenv4debug ("DEBUG_EXTRACT_FROM_PDB"))fprintf ( stderr, "\n[DEBUG_EXTRACT_FROM_PDB:get_pdb_seq] %s\n", command); - my_system ( command); - - - S=get_fasta_sequence ( tp_name, NULL); - if (S==NULL)return NULL; - - if ( (pdbid=get_pdb_id (fname))){sprintf ( S->name[0], "%s",pdbid);vfree (pdbid);} - S->nseq=1; - - sprintf ( S->file[0], "%s", fname); - S->max_len=S->min_len=S->len[0]; - if ( S->len[0]==0) - { - free_sequence (S, -1); - S=NULL; - } - - vremove ( tp_name); - vfree ( command); - - return S; - } - -char * get_pdb_file ( char *fname) - { - char *file; - int a, c; - FILE *fp; - - - a=0; - file=vcalloc ( sizeof (char),count_n_char_in_file ( fname)+1); - fp=vfopen ( fname, "r"); - while ( (c=fgetc(fp))!=EOF)file[a++]=c; - file[a]='\0'; - return file; - } - -Sequence* get_struc_gor ( char *fname) - { - int nseq, min_len, max_len; - int a, c; - int len; - char name[STRING]; - - - FILE *fp; - Sequence *S; - - min_len=max_len=-1; - fp=vfopen ( fname, "r"); - nseq=0; - while ( (c=fgetc(fp))!=EOF) - { - if ( c!='!'); - else - { - nseq++; - fscanf ( fp, "%s %d", name, &len); - if (min_len==-1)min_len=max_len=len; - else - { - min_len=(len>min_len)?min_len:len; - max_len=(len>max_len)?len:max_len; - } - } - - } - vfclose (fp); - - S=declare_sequence ( min_len, max_len+1,nseq); - S->nseq=0; - - fp=vfopen (fname,"r"); - while ( (c=fgetc(fp))!=EOF) - { - if ( c!='!'); - else - { - fscanf ( fp, "%s %d\n",S->name[S->nseq], &(S->len[S->nseq])); - - while ( (c=fgetc(fp))!='\n'); - - for ( a=0; alen[S->nseq]; a++) - fscanf ( fp, " %*c %c %*f %*f %*f\n",&(S->seq[S->nseq][a])); - - S->seq[S->nseq][a]='\0'; - while ( (c=fgetc(fp))!='!' && c!=EOF); - ungetc (c, fp); - S->nseq++; - } - - } - vfclose (fp); - return S; - } - -Sequence* get_sequence_dali (char *fname) - { - Sequence *LS; - FILE *fp; - int c; - - char name[100]; - int clen=0; - int current=0; - int p=0; - int max_len_seq=0; - int min_len_seq=999999; - int nseq=0; - - if ((fp=vfopen (fname,"r"))==NULL) - {printf ( "\nCOULDN'T OPEN %s",fname); - myexit(EXIT_FAILURE); - } - c=fgetc(fp); - while (c!=EOF) - { - if (isdigit(c)) - { - ungetc(c, fp); - fscanf (fp, "%s",name); - while (!isdigit(c=fgetc(fp)) && c!=EOF) - if (isalnum (c) || c=='.' || c=='-') - clen++; - max_len_seq=(clen> max_len_seq)?clen: max_len_seq; - min_len_seq=(clen< min_len_seq)?clen: min_len_seq; - nseq++; - clen=0; - } - else - c=fgetc (fp); - } - vfclose (fp); - - LS=declare_sequence ( min_len_seq, max_len_seq+1,nseq); - LS->nseq=nseq; - - fp=vfopen (fname,"r"); - - current=0; - c=fgetc(fp); - while (c!=EOF) - { - if (isdigit(c)) - { - ungetc(c, fp); - fscanf_seq_name (fp, LS->name[current]); - p=0; - while (!isdigit(c=fgetc(fp)) && c!=EOF) - { - if (isalpha (c)) - LS->seq[current][p++]=tolower (c); - else if ( c=='.') - LS->seq[current][p++]='-'; - else if ( c=='-') - LS->seq[current][p++]='-'; - } - LS->seq[current][p]='\0'; - LS->len[current]=strlen ( LS->seq[current]); - current++; - } - else - c=fgetc ( fp); - } - - vfclose (fp); - - - return LS; - } - -Sequence* get_dialign_sequence (char *fname) - { - Sequence *LS; - FILE *fp; - int c; - - char name[10000]; - int clen=0; - int current=0; - int p=0; - int max_len_seq=0; - int min_len_seq=999999; - int nseq=0, l=0; - char *buf; - - buf=vcalloc ( 1000, sizeof (char)); - if ((fp=vfopen (fname,"r"))==NULL) - {printf ( "\nCOULDN'T OPEN %s",fname); - myexit(EXIT_FAILURE); - } - c=fgetc(fp); - while (c!=EOF) - { - if (c=='>') - {fscanf (fp, "%s",name); - - buf=fgets ( buf, 1000, fp); - while ((c=fgetc(fp))!='>' && c!=EOF && c!=' ' && c!='\t') - if (isalnum (c)|| is_gap(c)) - clen++; - max_len_seq=(clen> max_len_seq)?clen: max_len_seq; - min_len_seq=(clen< min_len_seq)?clen: min_len_seq; - nseq++; - clen=0; - } - else - c=fgetc (fp); - } - vfclose (fp); - - LS=declare_sequence ( min_len_seq, max_len_seq, nseq); - LS->nseq=nseq; - - fp=vfopen (fname,"r"); - - current=0; - c=fgetc(fp); - while (c!=EOF) - { - if (c=='>') - { - - fscanf_seq_name (fp, LS->name[current]); - l=strlen ( LS->name[current]); - if ( LS->name[current][l-1]==','||LS->name[current][l-1]==',')LS->name[current][l-1]='\0'; - buf=fgets ( buf, 1000, fp); - p=0; - while ((c=fgetc(fp))!='>' && c!=EOF && c!=EOF && c!=' ' && c!='\t') - if (isalpha (c)) - LS->seq[current][p++]=tolower (c); - else if ( isgraph(c)) - LS->seq[current][p++]=(c); - LS->seq[current][p]='\0'; - LS->len[current]=strlen ( LS->seq[current]); - current++; - } - else - c=fgetc ( fp); - } - - vfclose (fp); - return LS; - } - -Sequence* get_pima_sequence (char *fname) - { - Sequence *LS; - - FILE *fp; - int c; - - char name[10000]; - int clen=0; - int current=0; - int p=0; - int max_len_seq=0; - int min_len_seq=999999; - int nseq=0, l=0, len=0; - char *buf, *buf2; - char prefix[1000]; - - sprintf ( prefix, "%s",fname); - - buf=strstr(prefix, "-"); - buf[0]='\0'; - len=strlen (prefix); - - - - buf=vcalloc ( 1000, sizeof (char)); - if ((fp=vfopen (fname,"r"))==NULL) - {printf ( "\nCOULDN'T OPEN %s",fname); - myexit(EXIT_FAILURE); - } - c=fgetc(fp); - while (c!=EOF) - { - if (c=='>') - { - fscanf_seq_name (fp,name); - if ( strlen(name)>=len && strncmp ( name, prefix, len)==0) - { - c=fgetc(fp); - } - else - { - - buf=fgets ( buf, 1000, fp); - while ((c=fgetc(fp))!='>' && c!=EOF) - if (isalnum (c)|| is_gap(c)) - clen++; - max_len_seq=(clen> max_len_seq)?clen: max_len_seq; - min_len_seq=(clen< min_len_seq)?clen: min_len_seq; - nseq++; - clen=0; - } - } - else - c=fgetc (fp); - } - vfclose (fp); - - LS=declare_sequence ( min_len_seq, max_len_seq, nseq); - LS->nseq=nseq; - - fp=vfopen (fname,"r"); - - current=0; - c=fgetc(fp); - while (c!=EOF) - { - if (c=='>') - { - fscanf_seq_name (fp,LS->name[current]); - if ( strlen(LS->name[current])>=len && strncmp ( LS->name[current], prefix, len)==0) - c=fgetc (fp); - else - { - buf2=strstr (LS->name[current], "."); - if ( buf2!=NULL) buf2[0]='\0'; - - l=strlen ( LS->name[current]); - if ( LS->name[current][l-1]==','||LS->name[current][l-1]==',')LS->name[current][l-1]='\0'; - buf=fgets ( buf, 1000, fp); - p=0; - while ((c=fgetc(fp))!='>' && c!=EOF) - if (isalpha (c)) - LS->seq[current][p++]=tolower (c); - else if ( isgraph(c)) - LS->seq[current][p++]=(c); - LS->seq[current][p]='\0'; - LS->len[current]=strlen ( LS->seq[current]); - current++; - } - } - else - c=fgetc ( fp); - } - - vfclose (fp); - return LS; - } - -Sequence* perl_reformat2fasta (char *perl_command, char *fname) - { - char command[1000]; - char *file; - - file=vtmpnam (NULL); - - check_program_is_installed ( perl_command,"", perl_command,EMAIL,IS_FATAL); - sprintf ( command, "%s %s > %s", perl_command, fname, file); - my_system ( command); - return get_fasta_sequence (file, NULL); - } -Sequence* get_fasta_sequence_num (char *fname, char *comment_out) - { - Sequence *LS; - char *buffer; - FILE *fp; - int a; - - int c; - char *name; - int clen=0; - int current=0; - int p=0; - int max; - int max_len_seq=0; - int min_len_seq=0; - int nseq=0, l=0; - - - - - int *sub; - - buffer=vcalloc (1000, sizeof (char)); - name=vcalloc ( 100, sizeof (char)); - - nseq=count_n_char_x_in_file(fname, '>'); - min_len_seq=max=count_n_char_in_file(fname); - sub=vcalloc (max+1, sizeof (int)); - - fp=vfopen (fname,"r"); - - - c=fgetc(fp); - while (c!=EOF) - { - if (c=='>') - { - fscanf_seq_name (fp,name); - while ((c=fgetc(fp))!='\n' && c!=EOF); - while ((c=fgetc(fp))!='>' && c!=EOF) - if (isalnum (c)|| is_gap(c)) - clen++; - max_len_seq=(clen> max_len_seq)?clen: max_len_seq; - min_len_seq=(clen< min_len_seq)?clen: min_len_seq; - clen=0; - } - else - c=fgetc (fp); - - } - - vfclose (fp); - LS=declare_sequence ( min_len_seq, max_len_seq,nseq); - - LS->nseq=nseq; - - fp=vfopen (fname,"r"); - current=0; - c=fgetc(fp); - while (c!=EOF) - { - if (c=='>') - { - - fscanf_seq_name (fp,LS->name[current]); - l=strlen ( LS->name[current]); - if ( LS->name[current][l-1]==','||LS->name[current][l-1]==';')LS->name[current][l-1]='\0'; - LS->name[current]=translate_name ( LS->name[current]); - a=0; - while ((c=fgetc(fp))!='\n' && c!=EOF && a<(COMMENT_SIZE-1))LS->seq_comment[current][a++]=c; - LS->seq_comment[current][a]='\0'; - - - p=0; - while ((c=fgetc(fp))!='>' && c!=EOF) - { - if (isalnum (c)) - LS->seq[current][p++]=c; - else if (is_gap(c)) - LS->seq[current][p++]=c; - } - - LS->seq[current][p]='\0'; - LS->len[current]=strlen ( LS->seq[current]); - - current++; - - } - else - c=fgetc ( fp); - } - - - vfclose (fp); - - - vfree (sub); - vfree (name); - vfree (buffer); - return LS; - } - -Sequence *get_tree_file_list ( char *fname) -{ - - char ***list; - char *tmp; - int a; - FILE *fp; - - tmp=vtmpnam (NULL); - list=file2list (fname, "\n"); - fp=vfopen (tmp, "w"); - a=0; - while (list[a] && !isspace(list[a][1][0])) - { - char *s; - s=file2string (list[a][1]); - fprintf ( fp, ">%s\n%s\n", list[a][1], (s)?s:""); - a++; - } - vfclose (fp); - free_arrayN((void ***)list, 3); - return get_fasta_tree (tmp, NULL); -} -Sequence *get_file_list ( char *fname) -{ - - char ***list; - char *tmp; - int a; - FILE *fp; - - tmp=vtmpnam (NULL); - list=file2list (fname, "\n"); - fp=vfopen (tmp, "w"); - a=0; - while (list[a] && !isspace(list[a][1][0])) - { - - fprintf ( fp, ">%s\n", list[a][1]); - a++; - } - vfclose (fp); - free_arrayN((void ***)list, 3); - return get_fasta_sequence (tmp, NULL); -} -Sequence*get_fasta_tree (char *fname, char *comment_out) -{ - Sequence *LS; - char *buffer; - FILE *fp; - int a; - - int c; - char *name; - int clen=0; - int current=0; - int p=0; - int max; - int max_len_seq=0; - int min_len_seq=0; - int nseq=0, l=0; - - - - - int *sub; - - buffer=vcalloc (1000, sizeof (char)); - name=vcalloc ( 100, sizeof (char)); - - nseq=count_n_char_x_in_file(fname, '>'); - min_len_seq=max=count_n_char_in_file(fname); - sub=vcalloc (max+1, sizeof (int)); - - fp=vfopen (fname,"r"); - - - c=fgetc(fp); - while (c!=EOF) - { - if (c=='>') - { - fscanf_seq_name (fp,name); - while ((c=fgetc(fp))!='\n' && c!=EOF); - while ((c=fgetc(fp))!='>' && c!=EOF) - if (isgraph(c)) - clen++; - max_len_seq=(clen> max_len_seq)?clen: max_len_seq; - min_len_seq=(clen< min_len_seq)?clen: min_len_seq; - clen=0; - } - else - c=fgetc (fp); - - } - - vfclose (fp); - LS=declare_sequence ( min_len_seq, max_len_seq,nseq); - - LS->nseq=nseq; - - fp=vfopen (fname,"r"); - current=0; - c=fgetc(fp); - while (c!=EOF) - { - if (c=='>') - { - - fscanf_seq_name (fp,LS->name[current]); - l=strlen ( LS->name[current]); - if ( LS->name[current][l-1]==','||LS->name[current][l-1]==';')LS->name[current][l-1]='\0'; - LS->name[current]=translate_name ( LS->name[current]); - a=0; - while ((c=fgetc(fp))!='\n' && c!=EOF && a<(COMMENT_SIZE-1))LS->seq_comment[current][a++]=c; - LS->seq_comment[current][a]='\0'; - - - p=0; - while ((c=fgetc(fp))!='>' && c!=EOF) - { - LS->seq[current][p++]=c; - } - - LS->seq[current][p]='\0'; - LS->len[current]=strlen ( LS->seq[current]); - - current++; - - } - - else - c=fgetc ( fp); - } - - - vfclose (fp); - - - vfree (sub); - vfree (name); - vfree (buffer); - - return LS; -} -Sequence* get_fasta_sequence_raw (char *fname, char *comment_out) - { - Sequence *LS; - char *buffer; - FILE *fp; - int a; - - int c; - char *name; - int clen=0; - int current=0; - int p=0; - int max; - int max_len_seq=0; - int min_len_seq=0; - int nseq=0, l=0; - - - - - int *sub; - - buffer=vcalloc (1000, sizeof (char)); - name=vcalloc ( 100, sizeof (char)); - - nseq=count_n_char_x_in_file(fname, '>'); - min_len_seq=max=count_n_char_in_file(fname); - sub=vcalloc (max+1, sizeof (int)); - - fp=vfopen (fname,"r"); - - - c=fgetc(fp); - while (c!=EOF) - { - if (c=='>') - { - fscanf_seq_name (fp,name); - while ((c=fgetc(fp))!='\n' && c!=EOF); - while ((c=fgetc(fp))!='>' && c!=EOF) - if (isgraph(c)) - clen++; - max_len_seq=(clen> max_len_seq)?clen: max_len_seq; - min_len_seq=(clen< min_len_seq)?clen: min_len_seq; - clen=0; - } - else - c=fgetc (fp); - - } - - vfclose (fp); - LS=declare_sequence ( min_len_seq, max_len_seq,nseq); - - LS->nseq=nseq; - - fp=vfopen (fname,"r"); - current=0; - c=fgetc(fp); - while (c!=EOF) - { - if (c=='>') - { - - fscanf_seq_name (fp,LS->name[current]); - l=strlen ( LS->name[current]); - if ( LS->name[current][l-1]==','||LS->name[current][l-1]==';')LS->name[current][l-1]='\0'; - LS->name[current]=translate_name ( LS->name[current]); - a=0; - while ((c=fgetc(fp))!='\n' && c!=EOF && a<(COMMENT_SIZE-1))LS->seq_comment[current][a++]=c; - LS->seq_comment[current][a]='\0'; - - - p=0; - while ((c=fgetc(fp))!='>' && c!=EOF) - { - //if (c<'A')c+='z'; - if (c!='\n')LS->seq[current][p++]=c; - } - - LS->seq[current][p]='\0'; - LS->len[current]=strlen ( LS->seq[current]); - - current++; - - } - - else - c=fgetc ( fp); - } - - - vfclose (fp); - - - vfree (sub); - vfree (name); - vfree (buffer); - return LS; - } -Sequence* get_fasta_sequence (char *fname, char *comment_out) - { - Sequence *LS; - Sequence *pdb_S; - int a; - - char *pdb_name; - - char *buffer; - FILE *fp; - - int c; - char *name; - int clen=0; - int current=0; - int p=0; - int max; - int max_len_seq=0; - int min_len_seq=0; - int nseq=0, l=0; - char *sub; - int disk=0; - int coor=0; - char *test; - - - buffer=vcalloc (1000, sizeof (char)); - name=vcalloc ( 10000, sizeof (char)); - - nseq=count_n_char_x_in_file(fname, '>'); - if (disk==1 || get_int_variable ("use_disk") || getenv ("SEQ_ON_DISK_4_TCOFFEE")){disk=1;} - if ( nseq==0) - { - vfree (buffer); vfree (name); - return NULL; - } - - min_len_seq=max=count_n_char_in_file(fname); - sub=vcalloc (max+1, sizeof (char)); - - fp=vfopen (fname,"r"); - - nseq=0; - c=fgetc(fp); - while (c!=EOF) - { - if (c=='>') - { - nseq++; - fscanf_seq_name (fp,name); - while ((c=fgetc(fp))!='\n' && c!=EOF); - while ((c=fgetc(fp))!='>' && c!=EOF) - { - if (isalnum (c)|| is_gap(c)) - sub[clen++]=c; - } - - if (strm (sub, "PDB")) - { - pdb_name=get_pdb_struc(name,0, 0); - pdb_S=get_pdb_sequence (pdb_name); - if (pdb_S) - { - clen=strlen( pdb_S->seq[0]); - free_sequence ( pdb_S,1); - } - else - clen=0; - - } - - max_len_seq=(clen> max_len_seq)?clen: max_len_seq; - min_len_seq=(clen< min_len_seq)?clen: min_len_seq; - clen=0; - } - else - c=fgetc (fp); - - } - - vfclose (fp); - - - if ( disk==0) - LS=declare_sequence ( min_len_seq, max_len_seq,nseq); - else - { - LS=declare_sequence (0,0,nseq); - for (a=0; aseq[a]=NULL; - } - LS->nseq=nseq; - - fp=vfopen (fname,"r"); - current=0; - c=fgetc(fp);coor++; - - while (c!=EOF) - { - if (c=='>') - { - coor+=fscanf_seq_name (fp, LS->name[current]); - - - l=strlen ( LS->name[current]); - if ( LS->name[current][l-1]==','||LS->name[current][l-1]==';')LS->name[current][l-1]='\0'; - LS->name[current]=translate_name ( LS->name[current]); - a=0; - while ((c=fgetc(fp))!='\n' && c!=EOF && a<(COMMENT_SIZE-1)){LS->seq_comment[current][a++]=c;coor++;} - coor++; - - LS->seq_comment[current][a]='\0'; - - p=0; - while ((c=fgetc(fp))!='>' && c!=EOF) - { - coor++; - - if (!isspace(c)) - { - if (p==0)LS->dc[current][0]=coor; - - if (disk==0)LS->seq[current][p++]=c; - else p++; - } - - LS->dc[current][1]=coor; - } - coor++; - - if ( disk==0)LS->seq[current][p]='\0'; - - if (LS->seq[current] && strm (LS->seq[current], "PDB")) - { - - pdb_name=get_pdb_struc(LS->name[current],0, 0); - pdb_S=get_pdb_sequence (pdb_name); - if (pdb_S) - { - sprintf ( LS->seq[current], "%s", pdb_S->seq[0]); - clen=strlen( pdb_S->seq[0]); - free_sequence ( pdb_S, 1); - } - else - { - add_warning (stderr, "WARNING: Could not fetch PDB file: %s", pdb_name); - } - } - - - LS->len[current]=p; - current++; - } - - else - { - c=fgetc ( fp); - coor++; - } - } - - vfclose (fp); - vfree (sub); - vfree (name); - vfree (buffer); - //LS=clean_sequence (LS); - - return LS; - } - -Sequence* get_sub_fasta_sequence (char *fname, char *comment_out) - { - Sequence *LS; - - FILE *fp; - - int c; - char name[100]; - int clen=0; - int current=0; - int p=0; - int max; - int max_len_seq=0; - int min_len_seq=0; - int nseq=0, l=0; - char *buf; - - - - int *sub; - - nseq=count_n_char_x_in_file(fname, '>'); - min_len_seq=max=count_n_char_in_file(fname); - sub=vcalloc (max+1, sizeof (int)); - buf=vcalloc ( max+1, sizeof (char)); - fp=vfopen (fname,"r"); - - - c=fgetc(fp); - while (c!=EOF) - { - if (c=='>') - { - fscanf_seq_name (fp,name); - while ((c=fgetc(fp))!='\n' && c!=EOF); - buf=fgets ( buf,max, fp); - while ((c=fgetc(fp))!='>' && c!=EOF) - if (isalnum (c)|| is_gap(c)) - clen++; - max_len_seq=(clen> max_len_seq)?clen: max_len_seq; - min_len_seq=(clen< min_len_seq)?clen: min_len_seq; - clen=0; - } - else - c=fgetc (fp); - - } - - vfclose (fp); - LS=declare_sequence ( min_len_seq, max_len_seq,nseq); - LS->nseq=nseq; - - fp=vfopen (fname,"r"); - current=0; - c=fgetc(fp); - while (c!=EOF) - { - if (c=='>') - { - - fscanf_seq_name (fp,LS->name[current]); - l=strlen ( LS->name[current]); - if ( LS->name[current][l-1]==','||LS->name[current][l-1]==';')LS->name[current][l-1]='\0'; - LS->name[current]=translate_name ( LS->name[current]); - while ((c=fgetc(fp))!='\n' && c!=EOF); - - p=0; - while ((c=fgetc(fp))!='>' && c!=EOF) - { - if (isalpha (c)) - LS->seq[current][p++]=tolower (c); - else if (is_gap(c)) - LS->seq[current][p++]=(c); - } - - LS->seq[current][p]='\0'; - LS->len[current]=strlen ( LS->seq[current]); - - current++; - - } - - else - c=fgetc ( fp); - } - - - vfclose (fp); - - - vfree (sub); - return LS; - } -Sequence* get_pir_sequence (char *fname, char *comment_out) - { - Sequence *LS; - - FILE *fp; - int c; - - char name[100]; - int clen=0; - int current=0; - int p=0; - int max_len_seq=0; - int min_len_seq=999999; - int nseq=0, l=0; - char *buf; - - buf=vcalloc ( 1000, sizeof (char)); - if ((fp=vfopen (fname,"r"))==NULL) - {printf ( "\nCOULDN'T OPEN %s",fname); - myexit(EXIT_FAILURE); - } - c=fgetc(fp); - while (c!=EOF) - { - if (c=='>') - { - if ( (c=fgetc(fp))=='P')while ( (c=fgetc(fp))!=';'); - else ungetc ( c, fp); - fscanf_seq_name (fp,name); - - buf=fgets ( buf, 1000, fp); - while ((c=fgetc(fp))!='>' && c!=EOF) - if (isalnum (c)|| is_gap(c)) - clen++; - max_len_seq=(clen> max_len_seq)?clen: max_len_seq; - min_len_seq=(clen< min_len_seq)?clen: min_len_seq; - nseq++; - clen=0; - } - else - c=fgetc (fp); - } - vfclose (fp); - - - - LS=declare_sequence ( min_len_seq, max_len_seq,nseq); - LS->nseq=nseq; - - fp=vfopen (fname,"r"); - - current=0; - c=fgetc(fp); - while (c!=EOF) - { - if (c=='>') - { - if ( (c=fgetc(fp))=='P')while ( (c=fgetc(fp))!=';'); - else ungetc ( c, fp); - - fscanf_seq_name (fp,LS->name[current]); - - l=strlen ( LS->name[current]); - if ( LS->name[current][l-1]==','||LS->name[current][l-1]==',')LS->name[current][l-1]='\0'; - LS->name[current]=translate_name ( LS->name[current]); - buf=fgets ( buf, 1000, fp); - - LS->seq_comment[current]=fgets ( LS->seq_comment[current],COMMENT_SIZE-1, fp); - LS->seq_comment[current][strlen(LS->seq_comment[current])-1]='\0'; - p=0; - while ((c=fgetc(fp))!='>' && c!=EOF) - if (isalpha (c)) - LS->seq[current][p++]=tolower (c); - else if ( !isspace(c) && c!='*') - LS->seq[current][p++]=(c); - LS->seq[current][p]='\0'; - LS->len[current]=strlen ( LS->seq[current]); - current++; - } - else - c=fgetc ( fp); - } - - vfclose (fp); - if (comment_out!=NULL) output_pir_check ( comment_out,LS->nseq, LS->seq_comment); - return LS; - } - -Sequence* get_gor_sequence (char *fname, char *comment_out) - { - Sequence *LS; - - FILE *fp; - int c; - - char name[100]; - int clen=0; - int current=0; - int p=0; - int max_len_seq=0; - int min_len_seq=99999; - int nseq=0; - char *buf; - - buf=vcalloc ( 1000, sizeof (char)); - if ((fp=vfopen (fname,"r"))==NULL) - {printf ( "\nCOULDN'T OPEN %s",fname); - myexit(EXIT_FAILURE); - } - c=fgetc(fp); - while (c!=EOF) - { - if (c=='!') - { - fscanf_seq_name (fp,name); - - buf=fgets ( buf, 1000, fp); - while ((c=fgetc(fp))!='!' && c!=EOF) - if (isalnum (c)|| is_gap(c)) - clen++; - max_len_seq=(clen> max_len_seq)?clen: max_len_seq; - min_len_seq=(clen< min_len_seq)?clen: min_len_seq; - nseq++; - clen=0; - } - else - c=fgetc (fp); - } - vfclose (fp); - - LS=declare_sequence ( min_len_seq, max_len_seq,nseq); - LS->nseq=nseq; - - fp=vfopen (fname,"r"); - - current=0; - c=fgetc(fp); - while (c!=EOF) - { - if (c=='!') - { - - - fscanf_seq_name (fp,LS->name[current]); - LS->name[current]=translate_name ( LS->name[current]); - buf=fgets ( buf, 1000, fp); - - p=0; - while ((c=fgetc(fp))!='!' && c!=EOF) - if (isalnum (c)|| is_gap(c)) - LS->seq[current][p++]=tolower (c); - - LS->seq[current][p]='\0'; - LS->len[current]=strlen ( LS->seq[current]); - current++; - } - else - c=fgetc ( fp); - } - - vfclose (fp); - - return LS; - } -Sequence* get_swissprot_sequence (char *fname, char *comment_out) - { - Sequence *LS; - FILE *fp; - int c; - char *buf; - int nseq=0; - int len, max_len_seq=0, min_len_seq=0; - - if ( !check_file_exists(fname)) - {printf ( "\nCOULDN'T OPEN %s",fname); - myexit(EXIT_FAILURE); - } - - buf=vcalloc (LONG_STRING+1, sizeof (char)); - fp=NULL; - while ( (fp=find_token_in_file(fname,fp,"\nSQ"))) - { - nseq++; - fgets (buf, LONG_STRING, fp); - len=0; - while ((c=fgetc(fp))!='/')if(isalpha(c))len++; - if ( max_len_seq==0)max_len_seq=min_len_seq=len; - else - { - max_len_seq=MAX(len, max_len_seq); - min_len_seq=MIN(len, min_len_seq); - } - } - - LS=declare_sequence ( min_len_seq, max_len_seq,nseq); - LS->nseq=0; - - fp=NULL; - while ( (fp=find_token_in_file(fname,fp,"\nID"))) - { - fscanf_seq_name (fp, LS->name[LS->nseq]); - fp=find_token_in_file(fname,fp,"\nSQ"); - fgets (buf, LONG_STRING, fp); - while ((c=fgetc(fp))!='/')if (isalpha(c))LS->seq[LS->nseq][LS->len[LS->nseq]++]=c; - LS->seq[LS->nseq][LS->len[LS->nseq]]='\0'; - LS->nseq++; - } - - - return LS; - } -int fscanf_seq_name ( FILE *fp, char *sname) -{ - static char *name; - int r; - if ( !sname) return 0; - - if ( !name)name=vcalloc ( 10000, sizeof (char)); - fscanf (fp, "%s", name); - r=strlen (name); - if ( strlen (name)>MAXNAMES) - add_warning (stderr, "\nWARNING: Seq Name Too long: [%s]. Truncated to %d", name, MAXNAMES); - name[MAXNAMES]='\0'; - sprintf ( sname, "%s", name); - return r; -} - -/*******************************************************************************************/ -/* */ -/* */ -/* INPUT ALN */ -/* */ -/***************************************************************************************** */ -void undump_msa ( Alignment *A, char *tmp) -{ - FILE *fp; - int m; - char *buf; - int index; - - if ( !A || !tmp || !check_file_exists (tmp))return; - m=measure_longest_line_in_file (tmp ); - A=realloc_aln2 ( A,A->max_n_seq,m+1); - - buf=vcalloc (m+1, sizeof (char)); - fp=vfopen (tmp, "r"); - while (fscanf (fp, "%d %s\n", &index, buf)==2) - { - sprintf ( A->seq_al[index], "%s", buf); - } - vfclose (fp); - vfree (buf); -} -void dump_msa ( char *file,Alignment *A, int nseq, int *lseq) -{ - FILE *fp; - int a; - fp=vfopen (file, "w"); - for (a=0; aseq_al[lseq[a]]); - vfclose (fp); -} - -void read_aln (char *file_name, Alignment *A) -{ - char *tmp_name; - Sequence *S; - - - tmp_name=vtmpnam (NULL); - if (printf_system ( "clustalw_aln2fasta_aln.pl %s > %s",file_name, tmp_name)!=EXIT_SUCCESS) - { - printf_exit ( EXIT_FAILURE, stderr, "Could Not Read File %s [FATAL:%s]\n", file_name, PROGRAM); - } - else - { - S=get_fasta_sequence ( tmp_name,NULL); - A=seq2aln (S, A, 0); - } - return; -} -void read_stockholm_aln (char *file_name, Alignment *A) -{ - char *tmp_name; - Sequence *S; - - - tmp_name=vtmpnam (NULL); - if (printf_system ( "clustalw_aln2fasta_aln.pl %s > %s",file_name, tmp_name)!=EXIT_SUCCESS) - { - printf_exit ( EXIT_FAILURE, stderr, "Could Not Read File %s [FATAL:%s]\n", file_name, PROGRAM); - } - else - { - int a; - S=get_fasta_sequence ( tmp_name,NULL); - for (a=0; anseq; a++) - { - if (strstr (S->name[a], "_stockholm")) - { - substitute ( S->name[a], "_stockholmspace_", " "); - substitute ( S->name[a], "_stockholmhasch_", "#"); - } - } - A=seq2aln (S, A, 0); - } - return; -} -Alignment* read_blast_aln ( char *file_name, Alignment *A) -{ - char *tmp_name; - Sequence *S; - int type; - int a; - - if ( !(type=is_blast_file (file_name))) - { - myexit (EXIT_FAILURE); - } - tmp_name=vtmpnam ( NULL); - if (type==BLAST_TXT) - { - printf_system("cat %s | blast_aln2fasta_aln.pl | fasta_aln2fasta_aln_unique_name.pl >%s", file_name, tmp_name); - } - else if (type==BLAST_XML) - { - - printf_system("blast_xml2fasta_aln.pl %s >%s", file_name, tmp_name); - } - - main_read_aln (tmp_name, A); - return A; -} - - -void read_number_aln ( char *file_name, Alignment *A) - { - FILE *fp, *fp2; - int * ptr_aln; - int a,b,d; - int c; - char *buf=NULL; - - int tot=0; - int flag=0; - char *fname; - int n_comment=0; - - int nseq=0; - int max_len=0; - - - fp=vfopen ( file_name, "r"); - - fname=vtmpnam(NULL); - fp2=vfopen ( fname, "w"); - while ( (c=fgetc(fp))!=EOF) - { - fprintf ( fp2, "%c", c); - } - vfclose (fp); - vfclose (fp2); - - - /*1 Count The number of sequences*/ - fp=vfopen ( fname, "r"); - buf=vfgets ( buf,fp); - if ( !isblanc (buf)); - while ( isblanc (buf)) - { - buf=vfgets ( buf, fp); - } - while (!isblanc (buf)) - { - buf=vfgets ( buf,fp); - } - while ( !isalnum ((c=fgetc(fp)))) - { - ungetc(c,fp); - buf=vfgets ( buf,fp); - } - - if ( c!='\n')ungetc(c,fp); - - while ( isalnum ((c=fgetc(fp)))) - { - ungetc(c,fp); - a=0; - while ( isgraph ((c=fgetc(fp)))); - nseq++; - buf=vfgets ( buf, fp); - } - vfclose (fp); - - /*DONE*/ - /*2 get_max_len*/ - max_len=count_n_char_in_file(fname)/nseq; - A=realloc_alignment2( A, nseq+1, max_len+1); - - /*DONE*/ - - - fp=vfopen ( fname, "r"); - buf=vfgets ( buf, fp); - if ( !isblanc (buf))sprintf (A->aln_comment[n_comment++], "%s", buf); - while ( isblanc (buf)) - { - buf=vfgets ( buf,fp); - } - while (!isblanc (buf)) - { - buf=vfgets ( buf, fp); - sprintf ( A->aln_comment[n_comment++], "%s", buf); - - } - while ( !isalnum ((c=fgetc(fp)))) - { - ungetc(c,fp); - buf=vfgets ( buf, fp); - - } - - if ( c!='\n')ungetc(c,fp); - - while ( isalnum ((c=fgetc(fp)))) - { - ungetc(c,fp); - - fscanf_seq_name (fp, A->name[A->nseq]); - - if ( name_is_in_list (A->name[A->nseq], A->name, A->nseq, 100)!=-1) - { - fprintf ( stderr, "\nWARNING (read_number_aln): Sequence %s Duplicated in File %s ", A->name[A->nseq], A->file[A->nseq]); - if (!getenv("ALLOW_DUPLICATE")) - { - fprintf ( stderr, " [FATAL:%s]\n", PROGRAM); - myexit (EXIT_FAILURE); - } - } - A->nseq++; - buf=vfgets ( buf,fp); - } - - vfclose (fp); - - - - if ((fp=vfopen ( fname, "r"))==NULL) - printf ( "\nCOULDN'T READ %s", fname); - - ptr_aln=vcalloc ( A->nseq, sizeof(int)); - while ( flag==0) - { - while ( (c=fgetc(fp))!='\n'); - if ( (c=fgetc(fp))=='\n') - flag=1; - } - while ( !isalnum(c=fgetc(fp))); - ungetc ( c, fp); - while ( c!=EOF) - { - tot=0; - while(tot< A->nseq && c!=EOF) - { - b=0; - while ( !isgraph (c=fgetc(fp)) && c!=EOF); - if ( c!=EOF)ungetc(c, fp); - while ( isgraph((buf[b++]=fgetc(fp)))); - buf[b-1]='\0'; - for ( a=-1,d=0; d< A->nseq; d++) - if ( strcmp (A->name[d], buf)==0) - {a=d; - tot++; - } - - if ( a==-1) while ( (c=fgetc(fp))!='\n' && c!=EOF); - else - { - while ( (c=fgetc(fp))!='\n') - { - if ( isgraph(c) || is_gap(c)) - {if ( isalpha(c)) - c=(A->residue_case==2)?c:tolower(c); - - if (!isspace(c))A->seq_al[a][ptr_aln[a]++]=c; - } - } - } - } - while ( !isalnum(c=getc(fp)) && c!=EOF); - if ( c!=EOF) - ungetc (c, fp); - } - - vfclose (fp); - - - for ( a=0; a< A->nseq; a++) - {A->seq_al[a][ptr_aln[a]]='\0'; - A->order[a][0]=a; - A->order[a][1]=0; - } - - A->len_aln= strlen(A->seq_al[0]); - - vfree (buf); - vfree(ptr_aln); - vremove (fname); - - } -void read_amps_aln ( char *in_file, Alignment *A) - { - FILE *fp; - int a, b, c, cont=1; - A->nseq=get_amps_seq_name ( A->name, in_file); - - fp=vfopen ( in_file, "r"); - fp=set_fp_id(fp, "1*"); - while ( (c=fgetc(fp))!='\n'); - b=0; - while ( cont==1) - { - c=fgetc ( fp); - c=fgetc(fp); - if ( c=='*') - { - cont=0; - for ( a=0; anseq; a++) - A->seq_al[a][b]='\0'; - A->len_aln=b; - } - - else - { - ungetc (c, fp); - for ( a=0; a< A->nseq; a++) - { - c=fgetc(fp); - if ( c==' ')A->seq_al[a][b]='-'; - else - { - A->seq_al[a][b]=c; - A->len[a]++; - } - } - while ((c=fgetc(fp))!='\n'); - b++; - } - } - } - - - - - - -int get_amps_seq_name ( char **name, char* fname) - { - FILE *fp; - int nseq=0; - - fp=vfopen ( fname, "r"); - fp=set_fp_id ( fp, "Index"); - while ( (fgetc(fp))!='\n'); - while ( isspace(fgetc(fp))) - {fscanf (fp, "%*d >%s", name[nseq++]); - while ( (fgetc(fp))!='\n'); - } - vfclose ( fp); - return nseq; - } -Alignment * read_gotoh_aln ( char *fname, Alignment *A) - { - FILE *fp; - int * ptr_aln; - int a,b,d,e; - - - char *buf; - char buf2[VERY_LONG_STRING+1]; - char buf3[VERY_LONG_STRING+1]; - char buf4[VERY_LONG_STRING+1]; - - int tot=0; - - int l; - int nseq, max_len; - - - if ( !check_file_exists (fname))return NULL; - fp=vfopen ( fname, "r"); - -/*1 GET THE NUMBER OF SEQUENCES*/ - nseq=0; - buf=vcalloc ( VERY_LONG_STRING+1, sizeof (char)); - while ( isblanc (buf=fgets ( buf, VERY_LONG_STRING, fp))); - while (!isblanc (buf=fgets ( buf, VERY_LONG_STRING, fp))); - while ( isblanc (buf=fgets ( buf, VERY_LONG_STRING, fp))); - while ( !isblanc ( buf) && buf!=NULL) - { - a=-1; - d=sscanf ( buf, "%d %s %s %s", &a, buf2, A->name[A->nseq],buf3); - if ( a!=-1) - { - if ( name_is_in_list (A->name[A->nseq], A->name, A->nseq, 100)!=-1) - { - fprintf ( stderr, "\nWARNING (get_amps_seq_name): Sequence %s Duplicated in File %s ", A->name[A->nseq], A->file[A->nseq]); - if (!getenv("ALLOW_DUPLICATE")) - { - fprintf ( stderr, " [FATAL:%s]\n", PROGRAM); - myexit (EXIT_FAILURE); - } - } - nseq++; - fgets(buf, VERY_LONG_STRING, fp); - } - else ( buf=NULL); - } - vfclose (fp); -/*2 Get the MAX Len and Reallocate*/ - max_len=count_n_char_in_file(fname)/nseq; - A=realloc_aln2( A, nseq+1, max_len+1); -/*3 Get The Sequences Names*/ - A->nseq=0; - fp=vfopen ( fname, "r"); - while ( isblanc (buf=fgets ( buf, VERY_LONG_STRING, fp))); - while (!isblanc (buf=fgets ( buf, VERY_LONG_STRING, fp))); - while ( isblanc (buf=fgets ( buf, VERY_LONG_STRING, fp))); - while ( !isblanc ( buf) && buf!=NULL) - { - a=-1; - d=sscanf ( buf, "%d %s %s %s", &a, buf2, A->name[A->nseq],buf3); - if ( a!=-1) - { - if ( d==4)sprintf (A->name[A->nseq],"%s", buf3); - A->nseq++; - fgets(buf, VERY_LONG_STRING, fp); - } - else ( buf=NULL); - } - vfclose (fp); - -/*READ THE ALN*/ - fp=vfopen ( fname, "r"); - - buf=vcalloc ( VERY_LONG_STRING+1, sizeof (char));; - ptr_aln=vcalloc ( A->nseq, sizeof(int)); - - while ( isblanc (buf=fgets ( buf, VERY_LONG_STRING, fp))); - while (!isblanc (buf=fgets ( buf, VERY_LONG_STRING, fp))); - - - while ( isblanc (buf=fgets ( buf, VERY_LONG_STRING, fp))); - - while (buf!=NULL) - { - tot=0; - while(tot< A->nseq) - { - - e=sscanf (buf, "%d %s %s %s", &e, buf2, buf3, buf4); - if ( e==4)sprintf( buf3, "%s", buf4); - - - for ( d=0; d< A->nseq; d++) - { - - if ( strcmp (A->name[d], buf3)==0) - {a=d; - tot++; - } - } - l=strlen (buf2); - if ( buf2[l-1]=='|')l--; - buf2[l]='\0'; - - for (b=0; bseq_al[a][ptr_aln[a]++]=(A->residue_case==2)?buf2[b]:tolower (buf2[b]); - } - buf=fgets(buf, VERY_LONG_STRING, fp); - } - if ( buf!=NULL) - { - buf=fgets(buf, VERY_LONG_STRING, fp); - while ( isblanc (buf) && buf!=NULL) - { - buf=fgets ( buf, VERY_LONG_STRING, fp); - } - } - - } - - vfclose (fp); - - - for ( a=0; a< A->nseq; a++) - {A->seq_al[a][ptr_aln[a]]='\0'; - } - - A->len_aln= strlen(A->seq_al[0]); - - - - for ( a=0; a< A->nseq; a++) - { - for ( b=0; b< A->len_aln; b++) - A->len[a]+=1-is_gap(A->seq_al[a][b]); - } - for ( a=0, b=0; a< A->len_aln; a++) - { - if ( !is_gap(A->seq_al[0][a]) &&!is_gap(A->seq_al[1][a]))b++; - } - return A; - } - - - - - -void read_msf_aln ( char *fname, Alignment *A) - { - char command[1000]; - char *tmp_name; - Sequence *S; - - tmp_name=vtmpnam(NULL); - sprintf ( command, "msf_aln2fasta_aln.pl %s > %s", fname, tmp_name); - - if ( my_system (command)!=EXIT_SUCCESS) - { - fprintf ( stderr, "\nERROR: file %s does not have a legal msf format [FATAL:%s]", fname,PROGRAM); - myexit (EXIT_FAILURE); - } - - S=get_fasta_sequence ( tmp_name,NULL); - A=seq2aln (S, A, 0); - vremove (tmp_name); - return; - } - -/**************************************************************************************************/ -/*************************************REFORMAT OUT*************************************************/ -/**************************************************************************************************/ -/*******************************************************************************************/ -/* */ -/* */ -/* OUTPUT MATRICES */ -/* */ -/***************************************************************************************** */ - - - -int output_freq_mat ( char *outfile, Alignment *A) - { /* - function documentation: start - - int output_freq_mat ( char *outfile, Aligmnent *A) - - This function counts the number of residues in each column of an alignment (Prot) - It outputs these values in the following format - - A | 0 0 0 1 0 - B | 1 0 0 0 1 - - | 0 1 1 0 0 - - This format can be piped into: - The routine used for computing the p-value gmat-inf-gc-v2c - - function documentation: end - */ - - int a, b; - int **freq_mat; - FILE *fp; - - - freq_mat=aln2count_mat (A); - - fp=vfopen ( outfile, "w"); - for ( b=0; b< 26; b++) - { - fprintf (fp, "%c |", 'A'+b); - for ( a=0; a< A->len_aln; a++)fprintf (fp,"%d ", freq_mat[b][a]); - fprintf (fp, "\n"); - } - fprintf (fp, "- |"); - for ( a=0; a< A->len_aln; a++)fprintf (fp,"%d ", freq_mat[26][a]); - - free_int (freq_mat, -1); - vfclose ( fp); - return 1; - } -/*******************************************************************************************/ -/* */ -/* */ -/* OUTPUT P-Values */ -/* */ -/***************************************************************************************** */ -float output_maln_pval ( char *outfile, Alignment *A) - { - /* - function documentation: start - float output_maln_pval ( char *outfile, Aligmnent *A) - - This function outputs the p-value of a multiple alignmnet as described - in Hertz, Stormo, Bioinformatics, 15-7/8, 563/577 - ftp beagle.colorado.edu /pub/cosensus - Locally - packages/consensus/gmat-inf-gc-v2c - - - The routine used for computing the p-value is the program gmat-inf-gc-v2c - function documentation: end - */ - - - char *mat; - char *result; - FILE *fp; - float value; - char command[LONG_STRING]; - char string[STRING]; - mat=vtmpnam (NULL); - result=vtmpnam (NULL); - - output_freq_mat (mat,A); - sprintf ( command, "more %s | gmat-inf-gc-v2c -A abcdefghijklmnopqrstuvwxyz> %s",mat, result); - my_system ( command); - - if ( !check_file_exists(result))return 0; - fp=find_token_in_file ( result, NULL, "ln(p-value):"); - - fscanf ( fp, "%s",string); - value=atof ( string); - vfclose ( fp); - - vremove ( mat); - vremove ( result); - - fp=vfopen ( outfile, "w"); - fprintf ( fp, "%.6f\n", value); - vfclose ( fp); - - return value; - } - - -/*******************************************************************************************/ -/* */ -/* */ -/* OUTPUT WEIGHTS */ -/* */ -/***************************************************************************************** */ -int output_seq_weights ( Weights *W, char *wfile) - { - FILE*fp; - int a; - - if ( W==NULL)return 0; - - fp=vfopen (wfile, "w"); - if ( fp==NULL)return 0; - - - for ( a=0; a< W->nseq; a++) - { - - fprintf ( fp, "%s %.2f\n", W->seq_name[a],W->SEQ_W[a]); - } - vfclose ( fp); - return 1; - } -void output_pw_weights4saga ( Weights *W, float **w_list, char *wfile) - { - FILE*fp; - int a, b; - fp=vfopen (wfile, "w"); - - fprintf ( fp, "%s\n$\n", W->comments); - for ( a=0; a< W->nseq-1; a++) - { - for (b=a+1; b< W->nseq; b++) - { - fprintf ( fp, "%s %s %f\n", W->seq_name[a], W->seq_name[b],w_list[a][b]); - } - } - fprintf ( fp, "$\n"); - vfclose ( fp); - } - -FILE * display_weights (Weights *W, FILE *fp) -{ - int a; - int max_len; - - if ( W==NULL) - { - fprintf ( fp, "\n\nUN-WEIGHTED MODE: EVERY SEQUENCE WEIGHTS 1\n"); - return fp; - } - fprintf ( fp, "\n\nWEIGHTED MODE:%s\n\n", (W)->mode); - for ( a=0, max_len=0; a< W->nseq; a++)max_len=MAX(max_len, strlen (W->seq_name[a])); - for ( a=0; a< (W->nseq); a++) - { - fprintf ( fp, "\t%*s %.2f\n", max_len,(W)->seq_name[a],W->SEQ_W[a]); - } - fprintf ( fp, "\n"); - return fp; -} - -/*******************************************************************************************/ -/* */ -/* */ -/* OUTPUT SEQ */ -/* */ -/***************************************************************************************** */ -int ** input_similarities (char *file, Alignment *A, char *mode) -{ - int a, b, i, n; - int **sim; - float score; - char name[1000]; - FILE *fp=NULL; - char *buf1=NULL, *buf2=NULL; - int new_aln=0; - - - - if ( !check_file_exists (file) || !is_distance_matrix_file (file) ||!is_similarity_matrix_file (file) ) - { - return NULL; - } - - if ( A) - { - fp=vfopen (file, "r"); - while ((buf2=vfgets (buf1,fp))!=NULL ) - { - if (strstr (buf2, "SEQ_INDEX")) - { - buf1=buf2; - sscanf (buf1, "# SEQ_INDEX %s %d",name, &i); - if ( !strm (A->name[i], name)) - { - return NULL; - } - } - } - vfclose (fp); - } - else - { - - A=similarities_file2aln(file); - new_aln=1; - } - - sim=declare_int ( A->nseq, A->nseq); - for ( a=0; anseq; a++)sim[a][a]=100; - - - fp=find_token_in_file (file, NULL, "PW_SEQ_DISTANCES"); - fp=find_token_in_file (file, fp, "BOT"); - while ((buf2=vfgets (buf1,fp))!=NULL ) - { - if ( !(strstr (buf2, "BOT\t") || strstr (buf2, "TOP\t")))continue; - buf1=buf2; - n=sscanf (buf1, "%*s %d %d %f", &a, &b, &score); - if ( n!=3) - { - free_int (sim, -1); - return NULL; - } - else sim[a][b]=sim[b][a]=(int)score; - } - vfclose (fp); - vfree (buf1); - if (new_aln)free_aln(A); - return sim; -} - -Alignment * similarities_file2aln ( char *file) -{ - int nseq=0, i; - FILE *fp; - char name[1000]; - Alignment *A; - - - fp=vfopen (file, "r"); - while ((fp=find_token_in_file (file,fp, "SEQ_INDEX")))nseq++; - A=declare_aln2 (nseq+1, 10); - - while ((fp=find_token_in_file (file,fp, "SEQ_INDEX"))) - { - fscanf (fp, "%s %d", name,&i); - sprintf ( A->name[i], "%s", name); - } - A->nseq=nseq; - - return A; -} - -void output_similarities (char *file, Alignment *A, char *mode) -{ - float s; - float *tot; - float bigtot=0; - int n, max; - FILE *fp; - int a, b; - char *p; - int **M=NULL; - for (max=0, a=0; a< A->nseq; a++)max=MAX(max,(strlen (A->name[a]))); - - - tot=vcalloc ( A->nseq, sizeof (float)); - fp=vfopen (file, "w"); - fprintf (fp, "# TC_SIMILARITY_MATRIX_FORMAT_01\n"); - for ( a=0; anseq; a++) - fprintf ( fp, "# SEQ_INDEX %s %d\n",A->name[a],a); - fprintf ( fp, "# PW_SEQ_DISTANCES \n"); - for (n=0,a=0;a< A->nseq-1; a++) - { - for ( b=a+1; bnseq; b++, n++) - { - if (strstr (mode, "_sarmat2")) - { - s=get_sar_sim (A->seq_al[a], A->seq_al[b]); - } - else if (strstr (mode, "_sar")) - { - s=get_sar_sim (A->seq_al[a], A->seq_al[b]); - } - else if ( (p=strstr (mode, "_memory_"))) - { - int **sim; - sscanf ( p, "_memory_%ld", (long int*)&sim); - s=sim[a][b]; - } - else if ( strstr (mode, "_idscore") || strstr ( mode, "_covscore")) - { - static Sequence *S; - if (a==0 && b==1) - { - free_sequence (S, -1); - if ( strstr (mode, "idscoreDNA")) - M=read_matrice ("idmat"); - else - M=read_matrice("blosum62mt"); - - S=aln2seq(A); - } - if ( strstr (mode, "_idscore"))s=idscore_pairseq(S->seq[a], S->seq[b], -10,-1, M, "sim"); - else s=idscore_pairseq(S->seq[a], S->seq[b], -10,-1, M, "cov"); - } - else if ( strstr (mode, "cov")) - { - s=get_seq_sim ( A->seq_al[a], A->seq_al[b],GAP_LIST, "cov"); - } - else - { - s=get_seq_fsim2 (A->seq_al[a], A->seq_al[b],GAP_LIST, mode); - } - fprintf (fp, "BOT\t %4d %4d\t %5.2f %*s\t %*s\t %5.2f\n", a,b,s,max,A->name[a], max, A->name[b], s); - fprintf (fp, "TOP\t %4d %4d\t %5.2f %*s\t %*s\t %5.2f\n", b,a,s,max,A->name[b], max, A->name[a], s); - tot[a]+=s; - tot[b]+=s; - bigtot+=s; - } - } - for ( a=0; a< A->nseq; a++) - { - fprintf (fp, "AVG\t %d\t %*s\t %*s\t %5.2f\n", a,max,A->name[a], max, "*", tot[a]/(A->nseq-1)); - - } - vfree (tot);free_int (M, -1); - fprintf (fp, "TOT\t %*s\t %*s\t %5.2f\n", max,"TOT", max, "*", bigtot/n); - vfclose (fp); -} - -void output_similarities_pw (char *file, Alignment *A, Alignment *B,char *mode) -{ - float s; - float *tot; - float bigtot=0; - int n, max; - FILE *fp; - int a, b; - - int **M=NULL; - Sequence *SA, *SB; - - if ( strstr (mode, "idscoreDNA")) - M=read_matrice ("idmat"); - else - M=read_matrice("blosum62mt"); - - SA=aln2seq(A); - SB=aln2seq(B); - - for (max=0, a=0; a< A->nseq; a++)max=MAX(max,(strlen (A->name[a]))); - for (a=0; a< B->nseq; a++)max=MAX(max,(strlen (B->name[a]))); - - - tot=vcalloc ( A->nseq, sizeof (float)); - fp=vfopen (file, "w"); - fprintf (fp, "# TC_SIMILARITY_MATRIX_FORMAT_01\n"); - for ( a=0; anseq; a++) - fprintf ( fp, "# SEQ_INDEX %s %d\n",A->name[a],a); - fprintf ( fp, "# PW_SEQ_DISTANCES \n"); - for (n=0,a=0;a< A->nseq; a++) - { - for ( b=0; bnseq; b++, n++) - { - s=idscore_pairseq(SA->seq[a], SB->seq[b], -10,-1, M, "sim"); - fprintf (fp, "BOT\t %4d %4d\t %5.2f %*s\t %*s\t %5.2f\n", a,b,s,max,A->name[a], max, B->name[b], s); - fprintf (fp, "TOP\t %4d %4d\t %5.2f %*s\t %*s\t %5.2f\n", b,a,s,max,B->name[b], max, A->name[a], s); - tot[a]+=s; - tot[b]+=s; - bigtot+=s; - } - } - - for ( a=0; a< A->nseq; a++) - { - fprintf (fp, "AVG\t %d\t %*s\t %*s\t %5.2f\n", a,max,A->name[a], max, "*", tot[a]/(A->nseq-1)); - } - vfree (tot);free_int (M, -1); - fprintf (fp, "TOT\t %*s\t %*s\t %5.2f\n", max,"TOT", max, "*", bigtot/n); - vfclose (fp); -} -void output_conservation_statistics ( char *file, Alignment *A) -{ - int a, b, c,c1, c2; - double **tot; - char aa[1000]; - int naa; - - sprintf (aa, "%s", BLAST_AA_ALPHABET); - naa=strlen (aa); - - tot=declare_double (256, 256); - - - for ( a=0; anseq; a+=2) - { - b=a+1; - for ( c=0; clen_aln; c++) - { - c1=tolower (A->seq_al[a][c]); - c2=tolower (A->seq_al[b][c]); - if ( !is_gap(c1) && !is_gap(c2)) - { - tot[c1][c2]++; - tot[c2][c1]++; - tot[c1][0]++; - tot[c2][0]++; - tot[0][0]++; - } - } - } - - fprintf ( stdout, "# BLAST_MATRIX FORMAT\n#ALPHABET=%s\n",aa); - for (a=0; anseq; a++)maxname=MAX(strlen(A->name[a]), maxname); - maxname++; - - - fp=vfopen (file, "w"); - - if (mode[0]=='h') - { - b=0; - while ((c=mode[b++])!='\0') - { - if ( c=='n') fprintf (fp, "%-*s ",maxname,"name"); - if ( c=='l') fprintf (fp, "%-*s ",5,"nres"); - if ( c=='g') fprintf (fp, "%-*s ",5,"ngap"); - if ( c=='t') fprintf (fp, "%-*s ",5,"len"); - } - if (is_in_set ( c, "nlgt")) fprintf (fp, "\n"); - mode++; - } - b=0; - while ((c=mode[b++])!='\0') - { - if ( c=='n')break; - if ( c=='N'){d=1;fprintf (fp, "NSEQ %d ", A->nseq);} - if ( c=='L'){d=1;fprintf (fp, "LEN %d ", A->len_aln);} - } - if ( d) fprintf (fp, "\n"); - - for (a=0; anseq; a++) - { - b=0; - d=0; - while ((c=mode[b++])!='\0') - { - if (is_in_set ( c, "nlgt"))d=1; - - if (c=='n'){d=1;fprintf ( fp, "%-*s ", maxname,A->name[a]);} - if (c=='l') - { - for (n=0,d=0; dlen_aln; d++)n+=!is_gap(A->seq_al[a][d]); - fprintf ( fp, "%-5d ",n); - } - if (c=='g') - { - for (n=0,d=0; dlen_aln; d++)n+=((is_gap(A->seq_al[a][d]) && !is_gap(A->seq_al[a][d+1]))||(is_gap(A->seq_al[a][d])&& A->seq_al[a][d+1]=='\0')) ; - fprintf ( fp, "%-5d ",n); - } - if (c=='t') - { - fprintf ( fp, "%-5d ",strlen (A->seq_al[a])); - } - if (c=='N' && d) - { - fprintf ( fp, "%-5d ",A->nseq); - } - if (c=='L'&& d) - { - fprintf ( fp, "%-5d ",A->len_aln); - } - } - if (d)fprintf ( fp, "\n"); - } - vfclose (fp); - } - -int output_age_matrix ( char *outfile, int val) -{ - int **mat; - int a, b; - char alp[]="abcdefghij-"; - int naa; - - mat=declare_int ( 256, 256); - naa=strlen (alp); - for ( a=0; anseq; a++) - { - ungap (A->seq_al[a]); - lower_string (A->seq_al[a]); - s=A->seq_al[a]; - l=strlen (s); - if ( s[0]=='\0') continue; - symbols[(int)s[0]]++; - for ( b=1; b< l; b++) - { - symbols[(int)s[b]]++; - table[(int)s[b-1]][(int)s[b]]++; - tot++; - } - } - for (naa=0, a=0; a< 256; a++) - { - if (symbols[a])alp[naa++]=a; - } - - - for ( a=0; a< 256; a++) - for (b=0; b<256; b++) - { - if (symbols[a]&& symbols[b] && table[a][b] && tot>0) - { - freq=(table[a][b])/tot; - expected=(symbols[a]*symbols[b])/(tot*tot); - log_odd=log (freq/expected); - mat[a-'A'][b-'A']=log_odd*10; - fmat[a-'A'][b-'A']=log_odd; - } - else if ( symbols[a]&& symbols[b]) - { - mat[a-'A'][b-'A']=-999; - fmat[a-'A'][b-'A']=-999; - } - } - output_mat ( mat,outfile, alp, 'A'); - - fp=vfopen (outfile, "a"); - for ( a=0; a<256; a++) - if ( symbols[a]) - { - fprintf (fp, "# %c tot: %6d freq: %7.5f\n", a, (int)symbols[a],(float)symbols[a]/tot); - } - - for ( a=0; a< 256; a++) - for (b=0; b<256; b++) - { - if (symbols[a]&& symbols[b]) - { - freq=(table[a][b])/tot; - fprintf (fp, "# %c%c tot: %6d freq: %7.5f log_odd: %9.3f\n", a, b, (int)table[a][b],(float)freq,fmat[a-'A'][b-'A']); - } - } - vfclose (fp); - vfree(alp); - free_arrayN ((void **)mat, 2); - free_arrayN ((void **)fmat, 2); - - return 1; -} - - - -void output_est_prf (char *fname, Alignment *A) - { - int a; - FILE *fp; - - if ( !A->P) - { - fprintf ( stderr, "\nFormat output_est_prf Impossible: No profile\n"); - myexit(EXIT_FAILURE); - } - - - fp=vfopen ( fname, "w"); - fprintf ( fp, "Consensus Sequence\nReconstructed with %s (%s,%s)\n",PROGRAM,AUTHOR,DATE); - fprintf ( fp, "%4c %4c %4c %4c %15s Consensus\n", 'A','G','C','T', "Internal Gaps"); - - for ( a=0; a< A->len_aln; a++) - { - fprintf (fp, "%4d %4d %4d %4d %15d %c\n", (A->P)->count[0][a],(A->P)->count[1][a],(A->P)->count[2][a], (A->P)->count[3][a], (A->P)->count[4][a],A->seq_al[0][a]); - } - return; - } - - -void output_gotoh_seq (char *fname, Alignment*A ) - { - int a; - FILE *fp; - - fp=vfopen ( fname, "w"); - fprintf ( fp, "%d %d\n",A->nseq, A->max_len); - for ( a=0; a< A->nseq; a++) - { - ungap ( A->seq_al[a]); - fprintf ( fp, ">%s\n", A->name[a]); - fp=output_string_wrap ( 50,A->seq_al[a] , fp); - fprintf ( fp, "//\n"); - } - - vfclose (fp); - } - -void output_mult_fasta_seq (char *fname, Alignment*A, int n ) - { - int a; - FILE *fp; - - fp=vfopen (fname, "w"); - ungap(A->seq_al[0]); - for (a=0; a%s_%d\n%s\n", A->name[0],a+1, A->seq_al[0]); - } - vfclose (fp); - } - -char * output_fasta_seqX (char *name, char *mode, Sequence *S, Alignment *A, int i) -{ - FILE *fp; - - if (!name)name=vtmpnam (NULL); - fp=vfopen (name, mode); - if ( (S && S->nseq<=i) || (A && S->nseq<=i) || (!A && !S)) - { - fprintf ( stderr, "\nERROR in function reformat:output_fasta_seqX[FATAL:%s]", PROGRAM); - myexit (EXIT_FAILURE); - } - - else if ( S) - fprintf ( fp, ">%s %s\n%s\n", S->name[i], S->seq_comment[i], S->seq[i]); - else if ( A) - { - ungap (A->seq_al[i]); - fprintf ( fp, ">%s %s\n%s\n", A->name[i], A->seq_comment[i], A->seq_al[i]); - } - vfclose (fp); - return name; -} - -void output_fasta_seq1 (char *fname, Alignment*A ) - { - char seq_name[VERY_LONG_STRING]; - int a; - FILE *fp; - char *extension; - - for ( a=0; a< A->nseq; a++) - { - if ( strncmp( fname, "name",4)==0) - { - if ( (fname+4)[0]!='\0')extension=fname+5; - else - extension=NULL; - - sprintf ( seq_name,"%s.%s", A->name[a],(extension==NULL)?"seq":extension); - } - else - sprintf ( seq_name,"%s.seq",A->name[a]); - - ungap ( A->seq_al[a]); - fp=vfopen (seq_name, "w"); - fprintf (fp, ">%s %s\n", A->name[a], A->seq_comment[a]); - fp=output_string_wrap ( 50, A->seq_al[a],fp); - fprintf ( fp, "\n"); - vfclose (fp); - } - } -void output_pir_check (char *fname,int nseq, char **comment ) - { - int a; - FILE *fp; - - if ( fname==NULL)return; - fp=vfopen ( fname, "w"); - - for ( a=0; a< nseq; a++)fprintf (fp, "%s\n", comment[a]); - vfclose (fp); - } -void output_fasta_seq (char *fname, Alignment*A) -{ - main_output_fasta_seq (fname, A, HEADER); -} -void output_fasta_tree (char *fname, Alignment*A) - { - int a; - FILE *fp; - if ( !A || !A->nseq) return; - - fp=vfopen ( fname, "w"); - - for ( a=0; a< A->nseq; a++) - { - fprintf ( fp, ">%s %s\n%s\n", A->name[a], A->seq_comment[a], A->seq_al[a]); - } - vfclose (fp); - } -void main_output_fasta_seq (char *fname, Alignment*A,int header ) - { - int a; - FILE *fp; - - fp=vfopen ( fname, "w"); - - for ( a=0; a< A->nseq; a++) - { - ungap(A->seq_al[a]); - fprintf ( fp, ">%s", A->name[a]); - if (header==HEADER && A->seq_comment[a][0] && !isblanc(A->seq_comment[a]))fprintf (fp," %s\n",A->seq_comment[a]); - else fprintf ( fp, "\n"); - fp=output_string_wrap ( 50, A->seq_al[a],fp); - fprintf ( fp, "\n"); - } - vfclose (fp); - } -void output_gor_seq (char *fname, Alignment*A ) - { - int a; - FILE *fp; - - fp=vfopen ( fname, "w"); - - for ( a=0; a< A->nseq; a++) - { - ungap(A->seq_al[a]); - fprintf ( fp, "!%s %d \n", A->name[a], (int)strlen(A->seq_al[a])); - upper_string ( A->seq_al[a]); - fp=output_string_wrap ( 50, A->seq_al[a],fp); - fprintf ( fp, "@\n"); - } - vfclose (fp); - } -void output_pir_seq (char *fname, Alignment*A ) - { - int a; - for ( a=0; a< A->nseq; a++)ungap(A->seq_al[a]); - output_pir_aln (fname, A); - } -void output_pir_seq1 (char *fname, Alignment*A ) - { - char seq_name[VERY_LONG_STRING]; - int a; - FILE *fp; - char type[20]; - - - for ( a=0; a< A->nseq; a++) - { - if ( strm ( get_string_type (A->seq_al[a]),"DNA") || strm ( get_string_type (A->seq_al[a]),"RNA"))sprintf(type, "DL"); - else if ( strm ( get_string_type (A->seq_al[a]),"PROTEIN"))sprintf(type, "P1"); - sprintf ( seq_name,"%s;%s_%s.seq",type, fname,A->name[a]); - ungap ( A->seq_al[a]); - fp=vfopen (seq_name, "w"); - fprintf (fp, ">%s\n\n", A->name[a]); - fp=output_string_wrap ( 50, A->seq_al[a],fp); - fprintf ( fp, "\n*\n"); - vfclose (fp); - } - } -/*******************************************************************************************/ -/* */ -/* */ -/* OUTPUT ALN */ -/* */ -/***************************************************************************************** */ -void output_mocca_aln (char *outfile, Alignment *A, Alignment *S) - { - FILE *fp; - int **score; - char **new_name_order; - int a, maxl; - - score=declare_int (S->nseq, 2); - new_name_order=declare_char ( S->nseq,MAXNAMES+1); - for ( a=0; anseq; a++) - { - score[a][0]=a; - score[a][1]=S->score_seq[a]; - } - sort_int_inv (score+1,2,1,0,S->nseq-2); - for ( a=0; anseq; a++) - { - sprintf ( new_name_order[a], "%s", A->name[score[a][0]]); - } - A=reorder_aln (A, new_name_order, A->nseq); - - fp=vfopen (outfile, "w"); - fprintf ( fp, "MOCCA,(%s,%s, C. Notredame)\nSCORE %d\nNSEQ %d\nLEN %d\n",VERSION,DATE, A->score_aln, A->nseq, A->len_aln); - - maxl=return_maxlen ( new_name_order, A->nseq); - - - for (a=0; a< A->nseq; a++) - { - fprintf (fp, "%-*s: %3d\n", maxl, A->name[a], score[a][1]); - } - - fprintf ( fp, "\n"); - - fp=output_Alignment_without_header ( A, fp); - vfclose (fp); - free_int (score, -1); - free_char (new_name_order, -1); - return ; - } - -void print_sub_aln ( Alignment *B, int *ns, int **ls) -{ - Alignment *X; - int a, b; - - - X=copy_aln (B, NULL); - X->nseq=0; - X->len_aln=strlen ( B->seq_al[ls[0][0]]); - - - for (a=0; a< 2; a++) - for ( b=0; bnseq++) - { - sprintf ( X->seq_al[X->nseq], "%s", B->seq_al[ls[a][b]]); - sprintf ( X->name[X->nseq], "%s", B->name[ls[a][b]]); - } - X->name[X->nseq][0]='\0'; - - print_aln (X); - free_aln (X); -} -void print_aln ( Alignment *B) - { - - while(B) - { - output_Alignment_without_header ( B, stderr); - B=B->A; - } - } - - -FILE * output_aln ( Alignment *B, FILE *fp){return output_Alignment(B, fp);} -FILE * output_Alignment ( Alignment *B, FILE *fp) - { - fprintf ( fp, "%s, %s (%s) [%s] [MODE: %s]\n%s\nCPU %d sec\nSCORE %d\nNSEQ %d\nLEN %d\n",PROGRAM,VERSION,DATE,retrieve_mode(),URL,AUTHOR, (B->cpu+get_time())/1000, B->score_aln, B->nseq, B->len_aln); - return output_Alignment_without_header ( B, fp); - } - -FILE * output_Alignment_without_header ( Alignment *B, FILE *fp) - { - int a,b, c; - int max_len=0; - int line; - int *n_residues; - char s; - - - if (fp==NULL)return fp; - for ( a=0; a< B->nseq; a++) - {if ( strlen (B->name[a])>max_len) - max_len= strlen ( (B->name[a])); - } - max_len=MAX(max_len+2, 16); - line=get_msa_line_length (0, 0); - n_residues=vcalloc ( B->nseq+1, sizeof (int)); - for ( a=0; anseq; a++)n_residues[a]=(B->output_res_num==2)?B->order[a][1]:0; - - - - - fprintf ( fp, "\n"); - for (a=0; alen_aln; a+=line) - {for (b=0; b<=B->nseq; b++) - { - fprintf (fp,"%-*s",max_len,B->name[b]); - if (B->output_res_num)fprintf (fp, " %4d ", n_residues[b]+1); - for (c=a;clen_aln;c++) - { - if (b==B->nseq){n_residues[b]++;s=analyse_aln_column ( B, c);} - else - {n_residues[b]+=!is_gap(B->seq_al[b][c]); - s=GET_CASE(B->residue_case, B->seq_al[b][c]); - } - - fprintf (fp,"%c",s ); - } - if (B->output_res_num)fprintf (fp, " %4d", n_residues[b]); - fprintf (fp,"\n"); - } - - fprintf (fp,"\n"); - } - - fprintf (fp,"\n\n"); - vfree (n_residues); - - return fp; - } -FILE * output_aln_score ( Alignment *B, FILE *fp){return output_Alignment_score(B, fp);} -FILE * output_Alignment_score ( Alignment *B, FILE *fp) - { - int a, b, c; - static int max_len=0; - static int line; - int ch; - - if (fp==NULL)return fp; - if ( max_len==0) - { - for ( a=0; a< B->nseq; a++) - {if ( strlen (B->name[a])>max_len) - max_len= strlen ( (B->name[a])); - } - max_len+=4; - - } - line=get_msa_line_length(0, 0); - sprintf (B->name[B->nseq], "CONS"); - fprintf ( fp, "T_COFFEE ALIGNMENT\nCPU TIME:%d sec.\n", (B->cpu+get_time())/1000); - fprintf ( fp, "SCORE=%d\n", B->score_aln); - for ( a=0;anseq; a++)fprintf ( fp, "%s: %d\n", B->name[a], B->score_seq[a]); - fprintf ( fp, "\n"); - for (a=0; alen_aln; a+=line) - {for (b=0; bnseq; b++) - { - fprintf (fp,"%-*s",max_len,B->name[b]); - for (c=a;clen_aln;c++) - { - ch=B->seq_al[b][c]; - if (ch==NO_COLOR_RESIDUE)fprintf (fp,"-"); - else if ( ch==NO_COLOR_GAP)fprintf (fp,"*"); - else if ( ch<10 && ch>=0)fprintf (fp,"%d",ch); - else if ( ch>10)fprintf (fp,"#"); - else if ( ch<0)fprintf (fp,"."); - else fprintf (fp,"9"); - } - fprintf (fp,"\n"); - } - fprintf (fp,"\n"); - fprintf (fp,"%-*s",max_len,B->name[b]); - for (c=a;clen_aln;c++) - { - ch=B->seq_al[b][c]; - if (ch==NO_COLOR_RESIDUE)fprintf (fp,"-"); - else if ( ch==NO_COLOR_GAP)fprintf ( fp, "*"); - else if ( ch<10 && ch>=0)fprintf (fp,"%d",ch); - else if ( ch>10)fprintf (fp,"#"); - else if ( ch<0)fprintf (fp,"."); - else fprintf (fp,"9"); - } - fprintf (fp,"\n\n\n"); - } - fprintf (fp,"\n\n"); - return fp; - } -FILE * output_aln_with_res_number ( Alignment *B, FILE *fp){return output_Alignment_with_res_number(B, fp);} -FILE * output_Alignment_with_res_number ( Alignment *B, FILE *fp) - { - int a, b, c; - static int max_len=0; - static int line; - int**order; - - if (fp==NULL)return fp; - if ( max_len==0) - { - for ( a=0; a< B->nseq; a++) - {if ( strlen (B->name[a])>max_len) - max_len= strlen ( (B->name[a])); - } - max_len+=4; - line=60; - } - order=copy_int ( B->order,declare_int ( B->nseq, 2), B->nseq, 2); - - fprintf ( fp, "T_COFFEE ALIGNMENT\nCPU TIME:%d sec.\n", (B->cpu+get_time())/1000); - fprintf ( fp, "\n"); - for (a=0; alen_aln; a+=line) - {for (b=0; bnseq; b++) - { - fprintf (fp,"%-*s %3d %4d ",max_len,B->name[b], order[b][0], order[b][1] ); - for (c=a;clen_aln;c++) - { - order[b][1]+=1-is_gap(B->seq_al[b][c]); - fprintf (fp,"%c",toupper(B->seq_al[b][c]) ); - } - fprintf (fp," %4d\n", order[b][1] ); - } - fprintf (fp,"\n"); - } - fprintf (fp,"\n\n"); - - free_int (order, -1); - return fp; - } - -void output_constraints ( char *fname, char *mode,Alignment *A) - { - FILE *fp; - Constraint_list *CL; - char *buf; - char **name_list; - - if ( !A->CL || strm ( mode, "pdb")) - { - if (!A->S) - { - A->S=aln2seq(A); - } - - CL=declare_constraint_list ( A->S, NULL, NULL, 0, NULL, NULL); - CL=aln2constraint_list (A,CL, mode); - compact_list (CL, 0, CL->ne, "default"); - fp=save_constraint_list ( CL, 0, CL->ne,fname, NULL, "lib",A->S); - vfclose (fp); - free_constraint_list (CL); - return; - } - else if ( strncmp ( mode, "extended_pair", 13)==0) - { - buf=duplicate_string (mode+14); - - name_list=vcalloc(2, sizeof(char*)); - name_list[0]=strtok (buf,"_"); - name_list[1]=strtok (NULL,"_"); - mode[13]='\0'; - - - CL=A->CL; - compact_list (CL, 0, CL->ne, "default"); - fp=save_sub_list_header (vfopen(fname, "w"),2, name_list,CL); - fp=save_extended_constraint_list_pair (CL, "pair",name_list[0],name_list[1],fp); - fp=save_list_footer (fp, CL); - vfree (buf); - } - else if ( strm2 (mode, "extended_lib","extended_cosmetic")) - { - CL=A->CL; - compact_list (CL, 0, CL->ne, "default"); - fp=save_extended_constraint_list ( CL,mode+9, vfopen(fname, "w")); - } - else - { - CL=(Constraint_list *)A->CL; - compact_list (CL, 0, CL->ne, "default"); - fp=save_constraint_list ( CL, 0, CL->ne,fname, NULL, "lib",A->S); - } - vfclose ( fp); - - if ( (Constraint_list *)A->CL !=CL)free_constraint_list (CL); - - return; - - } -void output_model_aln (char *fname, Alignment*A ) - { - FILE *fp; - int a; - Dp_Model *M; - Dp_Result *R; - char *string; - - if ( A->Dp_result==NULL) - { - fprintf ( stderr, "\nWARNING Could Not Output Model %s [%s]", fname, PROGRAM); - } - R=A->Dp_result; - M=R->Dp_model; - - fp=vfopen ( fname, "w"); - for (a=0; anstate; a++) - { - if (M->model_comments[a][0])fprintf ( fp, "#STATE %c: %s\n", 'a'+a, M->model_comments[a]); - } - string=vcalloc ( R->len+1, sizeof (char)); - for (a=0; alen; a++)string[a]=R->traceback[a]+'a'; - fprintf ( fp, ">%s\n",fname); - fp=output_string_wrap ( 50,string, fp); - vfree(string); - fprintf ( fp, "\n"); - - vfclose (fp); - return; - } -char * output_fasta_sub_aln (char *fname, Alignment*A, int ns, int *ls ) -{ - int a,s; - FILE *fp; - if (fname==NULL)fname=vtmpnam (NULL); - fp=vfopen (fname, "w"); - for (a=0; a%s %s\n%s\n", A->name[s],A->seq_comment[s],A->seq_al[s]); - } - vfclose (fp); - return fname; -} -char * output_fasta_sub_aln2 (char *fname, Alignment*A, int *ns, int **ls ) -{ - int a,g,s; - FILE *fp; - if (fname==NULL)fname=vtmpnam (NULL); - fp=vfopen (fname, "w"); - for ( g=0; g<2; g++) - for (a=0; a%s %s\n%s\n", A->name[s],A->seq_comment[s],A->seq_al[s]); - } - vfclose (fp); - return fname; -} - -int output_suchard_aln (char *out_file, Alignment *A) -{ - int a, b, c, d; - FILE *fp; - - A=back_translate_dna_aln (A); - - for ( c=0,a=0; alen_aln; a++, c++) - { - if (c==3)c=0; - for (b=0; bnseq; b++) - { - if (c==2) - { - A->seq_al[b][a]='-'; - } - } - } - A=ungap_aln_n (A, 1); - fp=vfopen (out_file, "w"); - for ( a=0; a< A->nseq; a++) - { - for (b=0; b< A->len_aln; b++) - { - c=tolower(A->seq_al[a][b]); - if ( c=='a')d=1; - else if ( c=='g')d=2; - else if ( c=='c')d=3; - else if ( c=='t')d=4; - else if ( c=='u')d=5; - else d=6; - - fprintf ( fp, "%d", d); - } - fprintf ( fp, "\n"); - } - vfclose (fp); - exit (EXIT_SUCCESS); -} - -void output_fasta_aln (char *fname, Alignment*A ) - { - FILE *fp; - int a; - int line=0; - - line=get_msa_line_length (line, A->len_aln+1); - fp=vfopen ( fname, "w"); - - for ( a=0; a< A->nseq; a++) - { - fprintf ( fp, ">%s", A->name[a]); - - if ( A->seq_comment[a][0] && !isblanc (A->seq_comment[a]))fprintf ( fp, " %s", A->seq_comment[a]); - fprintf ( fp, "\n"); - fp=output_string_wrap ( line,A->seq_al[a] , fp); - fprintf ( fp, "\n"); - } - vfclose (fp); - } - -void output_pir_aln (char *fname, Alignment*A ) - { - int a; - FILE *fp; - char type[20]; - - - - - - fp=vfopen ( fname, "w"); - for ( a=0; a< A->nseq; a++) - { - if ( strm ( get_string_type (A->seq_al[a]),"DNA") || strm ( get_string_type (A->seq_al[a]),"RNA"))sprintf(type, "DL"); - else if ( strm ( get_string_type (A->seq_al[a]),"PROTEIN"))sprintf(type, "P1"); - fprintf ( fp, ">%s;%s\n%s\n",type, A->name[a], A->seq_comment[a]); - fp=output_string_wrap ( 50,A->seq_al[a] , fp); - fprintf ( fp, "\n*\n"); - } - - vfclose (fp); - } - -int landscape_msa; -int set_landscape_msa (int len) -{ - if ( len==0)landscape_msa=-1; - else - { - landscape_msa=len; - } - return landscape_msa; -} -int get_msa_line_length (int line, int aln_len) -{ - if (landscape_msa==-1) return aln_len; - else if ( landscape_msa)return landscape_msa; - else if (line) return line; - else - { - return (getenv ("ALN_LINE_LENGTH"))?atoi(getenv("ALN_LINE_LENGTH")):ALN_LINE_LENGTH; - } -} - -void output_msf_aln (char *fname,Alignment *B) - { - int a, b, c; - char *seq; - int *all_checks; - int i,j; - long grand_checksum; - FILE *fp; - int max_len; - int line=0; - int block=10; - int c_block; - char aa; - - - line=get_msa_line_length (line, B->len_aln+1); - - - for ( max_len=0,a=0; a< B->nseq; a++)max_len= MAX(strlen ( B->name[a]),max_len); - - - max_len+=5; - - fp=vfopen (fname, "w"); - - seq =vcalloc(B->len_aln, sizeof(char)); - all_checks =vcalloc(B->nseq, sizeof(int)); - for ( i=0; i< B->nseq; i++) - { - for ( j=0; jlen_aln; j++) - { - if ( is_gap(B->seq_al[i][j]))seq[j]='.'; - else seq[j]=B->seq_al[i][j]=toupper(B->seq_al[i][j]); - - } - all_checks[i] = SeqGCGCheckSum(seq, (int)B->len_aln); - } - grand_checksum = 0; - for(i=0; inseq; i++) grand_checksum += all_checks[i]; - grand_checksum = grand_checksum % 10000; - fprintf(fp,"PileUp\n\n"); - B=get_aln_type(B); - fprintf(fp,"\n\n MSF:%5d Type: ",B->len_aln); - if(strm ( (B->S)->type, "DNA") || strm ( (B->S)->type, "RNA")) - fprintf(fp,"N"); - else - fprintf(fp,"P"); - fprintf(fp," Check:%6ld .. \n\n", (long)grand_checksum); - for (i=0; i< B->nseq; i++) - { - fprintf ( fp, " Name: %s oo Len:%5d Check:%6ld Weight: %.3f\n", B->name[i], B->len_aln,(long)all_checks[i],(B->S)->W?((B->S)->W)->SEQ_W[i]:1.00); - } - fprintf(fp,"\n//\n\n"); - - for (a=0; alen_aln; a+=line) - { - fprintf ( fp,"\n\n"); - for (b=0; bnseq; b++) - { - fprintf (fp,"%-*s ",max_len,B->name[b]); - for (c_block=0,c=a;clen_aln;c++) - { - if ( c_block==block) - { - fprintf (fp, " "); - c_block=0; - } - c_block++; - aa=(is_gap(B->seq_al[b][c]))?'.': toupper(B->seq_al[b][c]); - fprintf (fp,"%c",aa ); - } - if ( c_block==block) - { - fprintf (fp, " "); - c_block=0; - } - fprintf (fp,"\n"); - - } - } - fprintf ( fp,"\n"); - vfclose ( fp); - - - vfree(seq); - vfree(all_checks); - - - return; -} -int SeqGCGCheckSum(char *seq, int len) -{ - int i; - long check; - - for( i=0, check=0; i< len; i++,seq++) - check += ((i % 57)+1) * toupper(*seq); - - return(check % 10000); -} -void old_output_msf_aln (char *fname,Alignment *B) - { - FILE *fp; - static int *put_seq; - int a, b, c; - int line=0; - char aa; - char *buf; - int max_len; - int seq_max_len; - - line=get_msa_line_length (line, B->len_aln+1); - - - for ( max_len=0,a=0; a< B->nseq; a++)max_len= MAX(strlen ( B->name[a]),max_len); - for ( seq_max_len=0,a=0; a< B->nseq; a++)seq_max_len= MAX(strlen ( B->seq_al[a]),max_len); - - - buf=vcalloc(seq_max_len+1, sizeof (int)); - - if ( put_seq==NULL) - put_seq= vcalloc ( B->nseq, sizeof (int)); - put_seq[0]=1; - - - for ( b=1; b< B->nseq; b++) - { - sprintf ( buf, "%s", B->seq_al[b]); - ungap(buf); - put_seq[b]=( strlen (buf)>0)?1:0; - } - - fp=vfopen ( fname, "w"); - fprintf ( fp, "MSF: %d Type P Check: 5083 ..\n", B->len_aln); - for ( a=0; a< B->nseq; a++) - { - if ( put_seq[a]==1) - fprintf ( fp,"Name: %s\n",B->name[a]); - } - fprintf ( fp, "//\n"); - for (a=0; alen_aln; a+=line) - {for (b=0; bnseq; b++) - { - if ( put_seq[b]==1) - { - fprintf (fp,"%-*s ",max_len,B->name[b]); - for (c=a;clen_aln;c++) - { - - - - aa=(B->seq_al[b][c]=='-')?'.': toupper(B->seq_al[b][c]); - fprintf (fp,"%c",aa ); - } - fprintf (fp,"\n"); - } - } - fprintf (fp,"\n"); - } - fprintf ( fp,"\n\n"); - vfclose ( fp); - - vfree (buf); - vfree(put_seq); - } - -void output_saga_aln ( char *name, Alignment *B) - { - int a, b, c; - FILE *fp; - - - - int max_len; - int line=0; - - line=get_msa_line_length (line, B->len_aln+1); - - - - for ( max_len=0,a=0; a< B->nseq; a++)max_len= (strlen ( B->name[a])>max_len)?(strlen ( B->name[a])):max_len; - - - - - fp= vfopen ( name, "w"); - - fprintf (fp, "\nSAGA FORMAT\nalignement %s nseq=%d len=%d\n", name, B->nseq, B->len_aln); - - fprintf (fp, "\n\n"); - for (a=0; alen_aln; a+=line) - {for (b=0; bnseq; b++) - {fprintf (fp,"%-*s ",max_len,B->name[b]); - for (c=a;clen_aln;c++) - { - fprintf (fp,"%c",(B->seq_al[b][c]) ); - } - fprintf (fp,"\n"); - } - fprintf (fp,"\n"); - } - fprintf (fp,"\n\n"); - vfclose ( fp); - } -void output_compact_aln ( char *name, Alignment *B) - { - int a, b, c; - FILE *fp; - int do_print=0; - - - int max_len; - int line=0; - - line=get_msa_line_length (line, B->len_aln+1); - - - for ( max_len=0,a=0; a< B->nseq; a++)max_len= (strlen ( B->name[a])>max_len)?(strlen ( B->name[a])):max_len; - - - - - fp= vfopen ( name, "w"); - - fprintf (fp, "\nSAGA FORMAT\nalignement %s nseq=%d len=%d", name, B->nseq, B->len_aln); - fprintf (fp, "\n\n"); - for (a=0; alen_aln; a+=line) - {for (b=0; bnseq; b++) - { - - for ( do_print=0, c=a;clen_aln;c++) - do_print+=1-is_gap(B->seq_al[b][c]); - if ( do_print>0) - { - fprintf (fp,"%-*s ",max_len,B->name[b]); - - - - for (c=a;clen_aln;c++) - { - if ( is_gap(B->seq_al[b][c])&& B->seq_al[b][c]!='-' )fprintf (fp,"%c", '-'); - else fprintf (fp,"%c",(B->seq_al[b][c]) ); - } - fprintf (fp,"\n"); - } - } - fprintf (fp,"\n"); - } - fprintf (fp,"\n\n"); - vfclose ( fp); - } - -void output_clustal_aln ( char *name, Alignment *B) -{ - return output_generic_clustal_aln (name, B, "tc_clustal"); -} -void output_strict_clustal_aln ( char *name, Alignment *B) -{ - return output_generic_clustal_aln (name, B, "strict_clustal"); -} - -void output_generic_clustal_aln ( char *name, Alignment *B, char *mode) - { - int a, b, c; - FILE *fp; - int max_len=0; - int line=0; - int *n_residues; - - if ( getenv ("SEP_4_TCOFFEE")) - { - while ( linelen_aln && B->seq_al[0][line]!='o' && B->seq_al[0][line]!='O')line++; - if ( B->seq_al[0][line]=='O' || B->seq_al[0][line]=='o')line++; - } - else - { - while ( linelen_aln)line++; - } - - if ( line==B->len_aln)line=get_msa_line_length (0, B->len_aln+1); - - n_residues=vcalloc ( B->nseq+1, sizeof (int)); - for ( a=0; a< B->nseq; a++) - {if ( strlen (B->name[a])>max_len) - max_len= strlen ( (B->name[a])); - n_residues[a]=B->order[a][1]; - } - max_len=MAX(max_len+2, 16); - - - fp= vfopen ( name, "w"); - - if ( strm (mode, "strict_clustal")) - fprintf ( fp, "CLUSTAL W (1.83) multiple sequence alignment"); - else - fprintf (fp, "CLUSTAL FORMAT for %s %s [%s] [MODE: %s ], CPU=%.2f sec, SCORE=%d, Nseq=%d, Len=%d ", PROGRAM, VERSION,URL, retrieve_mode (),(float)(B->cpu+get_time())/1000, B->score_aln, B->nseq, B->len_aln); - fprintf (fp, "\n\n"); - - - if ( B->len_aln==0) - { - for (b=0; b<=B->nseq; b++) - fprintf (fp,"%-*s -\n",max_len, B->name[b]); - } - - else - { - for (a=0; alen_aln; a+=line) - {for (b=0; b<=B->nseq; b++) - { - if (b!=B->nseq) - { - fprintf (fp,"%-*s",max_len, B->name[b]); - for (c=a;clen_aln;c++) - { - if ( is_gap(B->seq_al[b][c]))fprintf (fp,"%c", '-'); - else - { - n_residues[b]++; - fprintf (fp, "%c", GET_CASE(B->residue_case, B->seq_al[b][c])); - - } - - } - if (B->output_res_num)fprintf (fp, " %d", n_residues[b]); - fprintf (fp,"\n"); - } - else if ( b==B->nseq) - { - fprintf (fp,"%-*s",max_len," "); - for (c=a;clen_aln;c++) - { - fprintf ( fp, "%c", analyse_aln_column (B, c)); - } - fprintf (fp,"\n"); - } - } - fprintf (fp,"\n"); - } - } - fprintf (fp,"\n\n"); - vfree (n_residues); - vfclose ( fp); - } -FILE * output_generic_interleaved_aln (FILE *fp, Alignment *B, int line, char gap, char *mode) - { - int a, b, c; - int max_len=0; - int *n_residues; - - - n_residues=vcalloc ( B->nseq+1, sizeof (int)); - for ( a=0; a< B->nseq; a++) - {if ( strlen (B->name[a])>max_len) - max_len= strlen ( (B->name[a])); - n_residues[a]=B->order[a][1]; - } - max_len=MAX(max_len+2, 16); - - - - - if ( B->len_aln==0) - { - for (b=0; b<=B->nseq; b++) - fprintf (fp,"%-*s -\n",max_len, B->name[b]); - } - - else - { - for (a=0; alen_aln; a+=line) - {for (b=0; b<=B->nseq; b++) - { - if (b!=B->nseq) - { - fprintf (fp,"%-*s",max_len, B->name[b]); - for (c=a;clen_aln;c++) - { - if ( is_gap(B->seq_al[b][c]))fprintf (fp,"%c", gap); - else - { - n_residues[b]++; - fprintf (fp, "%c", GET_CASE(B->residue_case, B->seq_al[b][c])); - - } - - } - if (B->output_res_num)fprintf (fp, " %d", n_residues[b]); - fprintf (fp,"\n"); - } - } - fprintf (fp,"\n"); - } - } - vfree (n_residues); - return fp; - } -void output_phylip_aln ( char *name, Alignment *B) - { - int a, b, c, d; - FILE *fp; - - int *print_name; - static int line=0; - line=get_msa_line_length(0, 0); - - print_name=vcalloc ( B->nseq, sizeof (int)); - fp= vfopen ( name, "w"); - - fprintf (fp, "%3d %d\n", B->nseq, B->len_aln); - for (a=0; alen_aln; a+=line) - {for (b=0; bnseq; b++) - {if ( print_name[b]==0) - { - - fprintf (fp,"%-10.10s ",B->name[b]); - print_name[b]=1; - } - else - { - fprintf (fp, "%10.10s ", " "); - } - - - for (d=0,c=a;clen_aln;c++, d++) - { - if ( d==10) - { - fprintf ( fp, " "); - d=0; - } - if ( is_gap(B->seq_al[b][c])&& B->seq_al[b][c]!='-' )fprintf (fp,"%c", '-'); - else fprintf (fp,"%c",(B->seq_al[b][c]) ); - } - fprintf (fp,"\n"); - } - fprintf (fp,"\n"); - } - fprintf (fp,"\n\n"); - vfclose ( fp); - } - -void output_rnalign (char *out_file, Alignment *A, Sequence *STRUC) - { - int a, b; - FILE *fp; - char bank_file[100]; - char pep_file[100]; - char *buf; - - sprintf ( bank_file, "%s.mss", out_file); - sprintf ( pep_file, "%s.one_rna", out_file); - - - buf=vcalloc ( strlen ( A->seq_al[0]+1), sizeof (char)); - - for ( b=0,a=0; a< strlen(A->seq_al[0]); a++) - { - if ( is_gap(A->seq_al[0][a])) - buf[a]='.'; - else - buf[a]=STRUC->seq[0][b++]; - } - buf[a]='\0'; - - fp=vfopen ( bank_file, "w"); - - fprintf ( fp, "ST\n"); - fp=output_string_wrap ( 50, buf, fp); - fprintf ( fp, "\n\n"); - - for ( a=0; anseq-1; a++) - { - fprintf ( fp, "AS %s\n ", A->name[a]); - fp=output_string_wrap ( 50, A->seq_al[a], fp); - fprintf ( fp, "\n\n"); - } - vfclose ( fp); - fp=vfopen ( pep_file, "w"); - fprintf ( fp, ">%s\n", A->name[A->nseq-1]); - fp=output_string_wrap ( 50, A->seq_al[A->nseq-1], fp); - fprintf ( fp, "\n"); - vfclose (fp); - } - -void output_lib (char *pw_lib_saga_aln_name, Alignment *A ) - { - Alignment *B; - char fname[VERY_LONG_STRING]; - int a,b; - - B=declare_Alignment (NULL); - - B->nseq=2; - - for ( a=0; a< A->nseq-1; a++) - { - for ( b=a+1; bnseq; b++) - { - sprintf ( B->seq_al[0], "%s", A->seq_al[a]); - sprintf ( B->name[0], "%s", A->name[a]); - sprintf(B->name[1], "%s", A->name[b]); - sprintf ( B->seq_al[1], "%s",A->seq_al[b]); - B->nseq=2; - sprintf ( fname, "%s_%s_%s.lib",pw_lib_saga_aln_name, A->name[a], A->name[b]); - - B->len_aln=strlen ( B->seq_al[0]); - ungap_aln (B); - output_clustal_aln (fname,B); - } - } - } -void output_pw_lib_saga_aln (char *pw_lib_saga_aln_name, Alignment *A ) - { - Alignment *B; - char fname[VERY_LONG_STRING]; - int a,b; - - B=declare_Alignment (NULL); - - B->nseq=2; - - for ( a=0; a< A->nseq-1; a++) - { - for ( b=a+1; bnseq; b++) - { - sprintf ( B->seq_al[0], "%s", A->seq_al[a]); - sprintf ( B->name[0], "%s", A->name[a]); - sprintf(B->name[1], "%s", A->name[b]); - sprintf ( B->seq_al[1], "%s",A->seq_al[b]); - B->nseq=2; - sprintf ( fname, "%s_%s_%s.pw_lib_saga_aln",pw_lib_saga_aln_name, A->name[a], A->name[b]); - - B->len_aln=strlen ( B->seq_al[0]); - ungap_aln (B); - output_clustal_aln (fname,B); - } - } - } -void output_lalign_header( char *name, Alignment *A) - { - FILE *fp; - - fp=vfopen ( name, "w"); - fprintf ( fp, " Lalign mode: best local alignments between two sequences\n"); - fprintf ( fp, " %s(%s) [%s]\n\n", VERSION, DATE, URL); - fprintf ( fp, " Comparison of:\n(A) %s\t%s\t-%d aa\n", (A->S)->file[A->order[0][0]],(A->S)->name[A->order[0][0]], (A->S)->len[A->order[0][0]]); - fprintf ( fp, "(B) %s\t%s\t-%d aa\n", (A->S)->file[A->order[1][0]],(A->S)->name[A->order[1][0]], (A->S)->len[A->order[1][0]]); - - - vfclose ( fp); - return; - } -void output_stockholm_aln (char *file, Alignment *A, Alignment *ST) -{ - FILE *fp; - int a,b,l; - - for (a=0; anseq; a++) - for (b=0; blen_aln; b++) - if (A->seq_al[a][b]==STOCKHOLM_CHAR)A->seq_al[a][b]='.'; - - fp=vfopen (file, "w"); - fprintf ( fp, "# STOCKHOLM 1.0\n\n"); - output_generic_interleaved_aln (fp,A, 50, '.', NULL); - fprintf ( fp, "//\n"); - vfclose (fp); -} - -void output_glalign ( char *name, Alignment *B, Alignment *S) -{ - int a, b, g, s; - int naln=0; - FILE *fp; - int **nr; - B=B->A; - if ( B==NULL){return;} - - fp=vfopen (name, "w"); - fprintf (fp, "Format: GLALIGN_01 [Generated with %s ]\n", PROGRAM); - fprintf (fp, "#Each Line corresponds to a column\n"); - fprintf (fp, "#First column coresponds to first genome\n"); - fprintf (fp, "#Last Column gives the column reliability on a 0-9 scale\n"); - fprintf (fp, "#[-1] Indicates that the reliability was not evaluated\n"); - - fprintf (fp, "Genome List\n"); - for ( a=0; a< B->nseq; a++) - fprintf (fp, "\tGenome %s\n", B->name[a]); - fprintf (fp, "Alignment List\n"); - while (B) - { - fprintf (fp, "Alignment %d Len %d Score %d\n", ++naln, B->len_aln, S->score_aln); - nr=duplicate_int (B->order, -1, -1); - for ( a=0; a< B->len_aln; a++) - { - fprintf ( fp, "\t"); - for ( b=0; b< B->nseq; b++) - { - g=is_gap (B->seq_al[b][a]); - nr[b][1]+=1-g; - - if (g)fprintf (fp, "---- "); - else fprintf ( fp, "%4d ",nr[b][1]); - } - s=((S)?S->seq_al[S->nseq][a]:-1); - if (s==NO_COLOR_RESIDUE)s=-1; - fprintf ( fp,"[ %d ]",s); - fprintf ( fp, "\n"); - - } - free_int (nr, -1); - B=B->A; - S=S->A; - } - vfclose ( fp); -} -Alignment *input_conc_aln ( char *name, Alignment *IN) -{ - FILE *fp; - char *string, *p, *file; - Alignment *F=NULL,*A=NULL, *B=NULL; - - file=vtmpnam (NULL); - - string=file2string(name); - string=substitute ( string, "@", "!Protected!"); - string=substitute ( string, TC_REC_SEPARATOR, "@"); - strtok (string,"@"); - - - while ( (p=strtok (NULL,"@"))!=NULL) - { - char *buf; - buf=vcalloc ( strlen (p)+1, sizeof (char)); - sprintf (buf,"%s", p); - buf=substitute (buf,"!protected!", "@"); - - fp=vfopen (file, "w"); - fprintf ( fp, "%s",buf); - vfclose (fp); - vfree (buf); - - if ( is_aln (file)) - { - B=main_read_aln (file,NULL); - - if ( !A) - { - if (IN){copy_aln (B, IN);F=A=IN;} - else F=A=B; - } - else - { - A->A=B; - A=A->A; - } - } - } - - vfree (string); - return F; -} - -void output_conc_aln ( char *name, Alignment *B) -{ - FILE *fp; - int a; - - fp=vfopen (name, "w"); - fprintf (fp, "# CONC_MSF_FORMAT_01\n"); - while (B) - { - fprintf (fp, "%s\n", TC_REC_SEPARATOR); - for ( a=0; a< B->nseq; a++) - { - fprintf ( fp, ">%s\n%s\n", B->name[a], B->seq_al[a]); - } - B=B->A; - - } - vfclose (fp); -} - -void output_lalign ( char *name, Alignment *B) -{ - static int output_header; - - B=B->A; - if ( B==NULL){output_header=0;return;} - else if ( output_header==0) - { - output_lalign_header(name, B); - output_header=1; - } - while (B) - { - output_lalign_aln ( name, B); - B=B->A; - } -} -void output_lalign_aln ( char *name, Alignment *B) - { - int a, b, c,d=0, s=0; - char col; - - float tot=0; - float id=0; - - FILE *fp; - int max_len=0; - int line; - int *n_residues; - int res; - - - n_residues=vcalloc ( B->nseq+1, sizeof (int)); - for ( a=0; a< B->nseq; a++) - {if ( strlen (B->name[a])>max_len) - max_len= strlen ( (B->name[a])); - n_residues[a]=B->order[a][1]; - } - max_len=MAX(max_len+2, 16); - line=60; - - - - fp= vfopen ( name, "a"); - - for (a=0; a< B->len_aln; a++) - { - if ( !is_gap(B->seq_al[0][a]) && !is_gap(B->seq_al[1][a])) - { - tot++; - id+=(B->seq_al[0][a]==B->seq_al[1][a]); - } - } - - id=(id*100)/tot; - fprintf (fp, " %.1f%% identity in %d aa overlap; score: %d\n\n", id,(int)tot, B->score_aln); - - - for (a=0; alen_aln; a+=line) - {for (b=0; b<5; b++) - { - if ( b==0 || b==4) - { - if ( b==0)s=0; - if ( b==4)s=1; - fprintf (fp,"%-*s",max_len," "); - for (d=0,c=a;clen_aln;c++) - { - res=!is_gap ( B->seq_al[s][c]); - n_residues[s]+=res; - if ( (n_residues[s]%10)==0 && res && (c-a+4)name[s]); - for (c=a;clen_aln;c++) - { - if ( is_gap(B->seq_al[s][c]))fprintf (fp,"%c", '-'); - else - { - fprintf (fp, "%c", GET_CASE(B->residue_case, B->seq_al[s][c])); - } - } - fprintf (fp,"\n"); - } - else if ( b==2) - { - fprintf (fp,"%-*s",max_len," "); - for (c=a;clen_aln;c++) - { - col=analyse_aln_column (B, c); - if ( col=='*')col=':'; - else if ( col==':')col='.'; - else if ( col=='.')col=' '; - fprintf ( fp, "%c", col); - } - fprintf (fp,"\n"); - } - } - fprintf (fp,"\n"); - } - - fprintf (fp,"\n\n----------\n\n"); - vfree (n_residues); - vfclose ( fp); - } - - -/****************************************************************************************************/ -/*************************************UTIL *********************************************************/ -/**************************************************************************************************/ - - -/****************************************************************************************************/ -/*************************** *************************************/ -/*************************** PROCESSING *************************************/ -/*************************** *************************************/ -/*******************************************************************************************/ -/* */ -/* */ -/* THREADING */ -/***************************************************************************************** */ - -char *thread_aa_seq_on_dna_seq( char *s) - { - int l, b, c; - char *array; - - - l=strlen ( s); - array=vcalloc ( l*3 +1, sizeof (char)); - for ( b=0, c=0; b< l; b++, c+=3) - { - array[c]=s[b]; - array[c+1]='o'; - array[c+2]='o'; - } - array[c]='\0'; - return array; - } - -Alignment *thread_dnaseq_on_prot_aln (Sequence *S, Alignment *A) - { - Alignment *B=NULL; - int a, b, c, n, la, ls, ln, m; - - B=copy_aln ( A, B); - B=realloc_aln2 ( B, B->nseq, B->len_aln*3 +1); - - for ( n=0,a=0; a< A->nseq; a++) - { - for ( m=0,b=0; b< S->nseq; b++) - { - if (strm (A->name[a], S->name[b]) ) - { - m=1; - n++; - ungap ( S->seq[b]); - B->seq_al[a][0]='\0'; - for (la=0, ls=0, ln=0; la< A->len_aln; la++) - { - for (c=0; c< 3; c++) - B->seq_al[a][ls++]=(is_gap(A->seq_al[a][la]))?'-':S->seq[b][ln++]; - } - B->seq_al[a][ls]='\0'; - } - } - if ( m==0) - { - for (la=0, ls=0, ln=0; la< A->len_aln; la++) - { - - B->seq_al[a][ls++]=A->seq_al[a][la]; - B->seq_al[a][ls++]='-'; - B->seq_al[a][ls++]='-'; - } - } - } - - B->len_aln=strlen ( B->seq_al[0]); - return B; - } -void thread_seq_struc2aln ( Alignment *A, Sequence *ST) - { - int a, b, c,d; - int len, cons; - - for ( a=0; a< A->nseq; a++) - for ( b=0; b< ST->nseq; b++) - { - if ( strcmp ( A->name[a], ST->name[b])==0) - { - ungap (ST->seq[b]); - len=strlen(A->seq_al[a]); - for ( c=0, d=0; cseq_al[a][c]))A->seq_al[a][c]=ST->seq[b][d++]; - } - } - } - cons=name_is_in_list ("Cons", ST->name, ST->nseq, 100); - if ( cons!=-1 && A->len_aln==strlen ( ST->seq[cons])) - { - sprintf (A->name[A->nseq], "Cons"); - sprintf (A->seq_al[A->nseq],"%s", ST->seq[cons]); - A->nseq++; - } - } -void cache_id ( Alignment *A) - { - int a, b,n; - char r1, r2, r3; - - for ( a=0; a< A->len_aln; a++) - { - for ( b=0, n=0; b< A->nseq; b++)if ( !is_gap(A->seq_al[b][a]))n++; - for ( b=0; b< A->nseq; b++) - if ( !is_gap(A->seq_al[b][a]) && n==A->nseq)A->seq_al[b][a]='h'; - else if( !is_gap(A->seq_al[b][a]))A->seq_al[b][a]='x'; - } - for ( a=0; a< A->nseq; a++) - { - for ( b=1; b< A->len_aln-1; b++) - { - r1=A->seq_al[a][b-1]; - r2=A->seq_al[a][b]; - r3=A->seq_al[a][b+1]; - if (r2=='h') - { - if ( (r1=='h' || r1=='b') && (r3=='h' || r3=='b'))A->seq_al[a][b]='h'; - else A->seq_al[a][b]='b'; - } - } - for ( b=1; b< A->len_aln-1; b++)if ( A->seq_al[a][b]=='b')A->seq_al[a][b]='x'; - } - - } - - -/*******************************************************************************************/ -/* */ -/* */ -/* PROCESING OF EST */ -/* */ -/***************************************************************************************** */ -int process_est_sequence ( Sequence *S, int *cluster_list) - { - char **inverted_seq; - int T=20; - int a, b; - int V1, V2; - int **sens; - int **a_sens; - int **best; - int *solution; - char buf [VERY_LONG_STRING]; - int n_clusters=0; - int n; - - sens=declare_int ( S->nseq,S->nseq); - a_sens=declare_int ( S->nseq,S->nseq); - best=declare_int ( S->nseq,S->nseq); - - - inverted_seq=vcalloc ( S->nseq, sizeof (char*)); - for ( a=0; anseq; a++) - inverted_seq[a]=invert_seq ( S->seq[a]); - - for ( a=0; a< S->nseq-1; a++) - { - - for ( b=a+1; bnseq; b++) - { - - V1=sens[a][b]=sens[b][a]=get_best_match ( S->seq[a], S->seq[b]); - V2=a_sens[a][b]=a_sens[b][a]=get_best_match ( S->seq[a],inverted_seq[b]); - best[a][b]=best[b][a]=(V1>V2)?V1:V2; - } - } - solution=SHC ( S->nseq, a_sens, sens); - - - for ( a=0; anseq; a++)cluster_list[a]=-1; - for ( a=0; anseq; a++) - { - n=search_for_cluster (a, n_clusters, cluster_list, T, S->nseq, best); - if ( n>0)n_clusters++; - } - fprintf ( stderr, "\nTHERE %s %d Independant Cluster(s) in your sequences",(n_clusters>1)?"are":"is",(n_clusters)); - for (a=0; anseq; b++) - { - if ( cluster_list[b]==a)fprintf ( stderr, "%s ", S->name[b]); - } - } - - for ( a=0; anseq; a++) - { - if ( solution[a]==-1) - { - S->seq[a]=inverted_seq[a]; - sprintf ( buf, "i_%s", S->name[a]); - sprintf ( S->name[a], "%s", buf); - } - } - return n_clusters; - } - -int search_for_cluster ( int seq, int cluster_number, int *cluster_list, int T, int nseq, int **S) - { - int n=0,a; - - if (cluster_list[seq]==-1) - { - cluster_list[seq]=cluster_number; - n++; - } - for ( a=0; aT) - { - n++; - cluster_list[a]=cluster_number; - n+=search_for_cluster ( a, cluster_number, cluster_list, T, nseq, S); - } - } - return n; - } - -int * SHC ( int nseq, int **NST, int **ST) - { - int a; - int mut; - int score, new_score; - int N_IT=VERY_LONG_STRING; - int *sol; - int count; - - sol=vcalloc ( nseq, sizeof (int)); - for ( a=0; a49)?1:-1; - - score=evaluate_sol (sol, nseq, ST, NST); - fprintf ( stderr, "\nI_Score=%d\n", score); - N_IT=N_IT*nseq; - - for ( count=0,a=0; a< N_IT && scorescore) - { - score=new_score; - } - else if ( (addrand ((unsigned long)VERY_LONG_STRING))>score) - { - score=new_score; - } - else - sol[mut]=sol[mut]*-1; - if ( count==VERY_LONG_STRING) - { - count=0; - fprintf ( stderr, "\nScore=%d", score); - } - } - fprintf ( stderr, "\nScore=%d\n", score); - return sol; - } - -int mutate_sol (int *sol, int nseq) - { - int n; - n=addrand ((unsigned long)nseq); - sol[n]=sol[n]*-1; - return n; - } -int evaluate_sol ( int *sol, int nseq, int **ST, int **NST) - { - static int max_score; - int a, b, score=0; - - if ( max_score==0) - { - for ( a=0; aNST[a][b])?ST[a][b]:NST[a][b]; - } - } - - for ( a=0; al2)?l1:l2; - m=declare_int (ml, ml); - } - else if ( (mll2)?l1:l2; - m=declare_int (ml, ml); - } - - for ( a=0; abest)?mdiag[a][0]:best; - - return best; - } - -int** extract_m_diag_streches ( int ** m, int l1, int l2,char *seq1, char *seq2, int *n_mdiag) - { - - int b, x, y, s1, s2; - static int **mdiag; - int in; - static int max_diag=VERY_LONG_STRING; - - /* - diag[0]=len; - diag[1]=x_start; - diag[2]=y_start; - diag[3]=x_end; - diag[4]=y_end; - */ - - if ( mdiag==NULL) - mdiag=declare_int ( max_diag, 5); - - for ( s1=l1-1, s2=0;s20) - { - if (in==1) - mdiag[n_mdiag[0]][0]++; - else - { - mdiag[n_mdiag[0]][0]=1; - mdiag[n_mdiag[0]][1]=x; - mdiag[n_mdiag[0]][2]=y; - in=1; - } - } - else - if (in==1) - { - in=0; - mdiag[n_mdiag[0]][3]=x-1; - mdiag[n_mdiag[0]][4]=y-1; - if ( !is_strech ( "ta", seq1, seq2,mdiag[n_mdiag[0]][0], mdiag[n_mdiag[0]][1],mdiag[n_mdiag[0]][2]))n_mdiag[0]++; - } - if (n_mdiag[0]==(max_diag-1)) - {mdiag=vrealloc (mdiag, (max_diag+VERY_LONG_STRING)*sizeof (int*)); - for ( b=max_diag; bT)return 1; - } - return 0; - } - - -/************************************************************************************/ -/* */ -/* STRUC */ -/* */ -/* */ -/************************************************************************************/ - -char * oneletaa2threeletaa(char aa); -float aa2property (char aa, char *mode); - -int output_seq2struc(char *outfile, Alignment *A) -{ - FILE *fp1, *fp2; - int a,c, l; - float v, h, x, y, z, dx, dy, dz; - char *s; - char *tmpfile1, *tmpfile2; - char command[1000]; - - tmpfile1=vtmpnam(NULL); - tmpfile2=vtmpnam(NULL); - - ungap (A->seq_al[0]); - s=A->seq_al[0];l=strlen (s); - fp1=vfopen (tmpfile1, "w"); - - x=y=z=0; - for ( a=0; a< l; a++) - { - h=aa2property ( s[a], "doolittle" ); - v=aa2property (s[a], "volume"); - /*14.398907: peptide bond length*/ - dx=(float)sqrt ((double)(14.398907/(((h*h)/(v*v))+1))); - dy=dx*(h/v); - dz=0; - - - x+=dx; - y+=dy; - z+=dz; - fprintf (fp1, "ATOM%7d CA %s A%4d%12.3f%8.3f%8.3f 1.00 5.30\n",a+1, oneletaa2threeletaa(s[a]),a+1, x, y, z); - } - vfclose (fp1); - sprintf ( command, "extract_from_pdb -infile %s -force > %s", tmpfile1, tmpfile2); - my_system (command); - fp1=vfopen (tmpfile2, "r"); - fp2=vfopen (outfile, "w"); - - while ( (c=fgetc(fp1))!=EOF)fprintf (fp2, "%c", c); - vfclose (fp1); - vfclose (fp2); - - return 0; -} - -char * oneletaa2threeletaa(char aa) - { - aa=tolower (aa); - if ( aa=='a')return "ALA"; - else if ( aa=='r') return "ARG"; - else if ( aa=='n') return "ASN"; - else if ( aa=='d') return "ASP"; - else if ( aa=='c') return "CYS"; - else if ( aa=='q') return "GLN"; - else if ( aa=='e') return "GLU"; - else if ( aa=='g') return "GLY"; - else if ( aa=='h') return "HIS"; - else if ( aa=='i') return "ILE"; - else if ( aa=='l') return "LEU"; - else if ( aa=='k') return "LYS"; - else if ( aa=='m') return "MET"; - else if ( aa=='f') return "PHE"; - else if ( aa=='p') return "PRO"; - else if ( aa=='s') return "SER"; - else if ( aa=='t') return "THR"; - else if ( aa=='w') return "TRP"; - else if ( aa=='y') return "TYR"; - else if ( aa=='v') return "VAL"; - else - { - fprintf ( stderr, "\nERROR: %c is not an amino acid [FATAL::aa2hydropathy::%s]", aa, PROGRAM); - myexit (EXIT_FAILURE); - return NULL; - } - return NULL; - } - -float aa2property (char aa, char *mode) - { - if ( mode==NULL || strm (mode, "doolittle")) - { - aa=tolower (aa); - if ( aa=='i')return 4.5; - else if ( aa=='v') return 4.2; - else if ( aa=='l') return 3.8; - else if ( aa=='f') return 2.8; - else if ( aa=='c') return 2.5; - else if ( aa=='m') return 1.9; - else if ( aa=='a') return 1.8; - else if ( aa=='g') return -0.4; - else if ( aa=='t') return -0.7; - else if ( aa=='w') return -0.9; - else if ( aa=='s') return -0.8; - else if ( aa=='y') return -1.3; - else if ( aa=='p') return -1.6; - else if ( aa=='h') return -3.2; - else if ( aa=='e') return -3.5; - else if ( aa=='q') return -3.5; - else if ( aa=='d') return -3.5; - else if ( aa=='n') return -3.5; - else if ( aa=='k') return -3.9; - else if ( aa=='r') return -4.5; - else - { - fprintf ( stderr, "\nERROR: %c is not an amino acid [FATAL::aa2hydropathy::%s]", aa, PROGRAM); - myexit (EXIT_FAILURE); - } - } - else if (strm (mode, "volume")) - { - aa=tolower (aa); - if ( aa=='a')return 0.915; - else if ( aa=='r') return 2.02; - else if ( aa=='n') return 1.35; - else if ( aa=='d') return 1.24; - else if ( aa=='c') return 1.18; - else if ( aa=='q') return 1.61; - else if ( aa=='e') return 1.55; - else if ( aa=='g') return 0.66; - else if ( aa=='h') return 1.67; - else if ( aa=='i') return 1.69; - else if ( aa=='l') return 1.68; - else if ( aa=='k') return 1.71; - else if ( aa=='m') return 1.70; - else if ( aa=='f') return 2.03; - else if ( aa=='p') return 1.29; - else if ( aa=='s') return 0.99; - else if ( aa=='t') return 1.22; - else if ( aa=='w') return 2.37; - else if ( aa=='y') return 2.03; - else if ( aa=='v') return 1.41; - else - { - fprintf ( stderr, "\nERROR: %c is not an amino acid [FATAL::aa2hydropathy::%s]", aa, PROGRAM); - myexit (EXIT_FAILURE); - } - } - - else - { - fprintf ( stderr, "\nERROR: %s is an unknown mode [FATAL::aa2hydropathy::%s]", mode , PROGRAM); - myexit (EXIT_FAILURE); - } - return 0; - } - - - - - -/************************************************************************************/ -/* */ -/* DNA */ -/* */ -/* */ -/************************************************************************************/ - -Alignment *code_dna_aln (Alignment *A) - { - int a, b,l,r; - - for ( a=0; a< A->nseq; a++) - { - for (l=0, b=0; b< A->len_aln; b++) - { - r=A->seq_al[a][b]; - if ( r=='-')l++; - else if ( r=='~')continue; - else if ( r=='.')l++; - else if ( !islower(r))A->seq_al[a][b]='4'; - else - { - A->seq_al[a][b]=(l+3)%3+'0'; - l++; - } - } - } - return A; - } - - -Alignment *back_translate_dna_aln (Alignment *A) - { - /*Given a set of aligned sequences - starts from left to right - 1 aa->3 nuc - ambiguities are randomly resolved. - returns the corresponding amino acid alignment - */ - int a; - char *seq ; - - ungap_aln(A); - A=realloc_aln (A, 10000); - seq=vcalloc ( 10000, sizeof (char)); - - - for ( a=0; a< A->nseq; a++) - { - seq=back_translate_dna_seq (A->seq_al[a], seq, RANDOM); - sprintf ( A->seq_al[a], "%s", seq); - } - A->len_aln=A->len_aln*3; - compress_aln (A); - vfree (seq); - return A; - } -char * back_translate_dna_seq ( char *in_seq,char *out_seq, int mode) - { - int a,len; - - len=strlen(in_seq); - - if (out_seq==NULL)out_seq=vcalloc ( len*3+1, sizeof (char)); - - out_seq[0]='\0'; - for (a=0; atype, "DNA") && !strm (S->type, "RNA")) printf_exit (EXIT_FAILURE, stderr, "Sequences should be *RNA* type [FATAL:%s]\n", PROGRAM); - for ( a=0; anseq; a++) - { - for (b=0; bseq[a]); b++) - { - if ( S->seq[a][b]=='u') S->seq[a][b]='t'; - if ( S->seq[a][b]=='U') S->seq[a][b]='T'; - } - HERE ("%s", S->seq[a]); - } - return S; -} -Sequence *dna_seq2rna_seq (Sequence *S) -{ - int a, b; - - if ( !strm(S->type, "DNA") && !strm (S->type, "RNA")) printf_exit (EXIT_FAILURE, stderr, "Sequences should be *DNA* type (type=%s) [FATAL:%s]\n", PROGRAM, S->type); - for ( a=0; anseq; a++) - for (b=0; blen[a]; b++) - { - if ( S->seq[a][b]=='t') S->seq[a][b]='u'; - if ( S->seq[a][b]=='T') S->seq[a][b]='U'; - } - return S; -} - - - -int get_longest_frame (char *seq, int mode); -Alignment *translate_dna_aln (Alignment *A, int frame) - { - /*Given a set of aligned sequences - starts from left to right - 3 nuc->1 aa - 2nuc+1gap, 1nuc+2gap->3 gaps - 1 stop-> 3gaps - returns the corresponding amino acid alignment - */ - - - int a, b,r; - - - if (frame==3 || frame ==4) - { - - for (a=0; a< A->nseq; a++) - { - char *d, *buf, f; - d=A->seq_al[a]; - f=get_longest_frame (d,frame); - buf=vcalloc ( strlen (d)+1, sizeof (char)); - if ( f<3) - { - sprintf (buf, "%s", d+f); - sprintf (d, "%s", buf); - sprintf (A->seq_comment[a], " frame: %d", f); - } - else if ( f>=3) - { - f-=3; - sprintf ( buf, "%s", d); - buf=complement_string (buf); - sprintf (d, "%s",buf+f); - sprintf (A->seq_comment[a], " frame: %d Reverse Complement", f); - } - vfree (buf); - } - } - else - { - - for ( a=0; a< A->nseq; a++) - for (b=0; b< frame; b++) - A->seq_al[a][b]='-'; - ungap_aln(A); - } - - for ( b=0; b< A->nseq; b++) - for ( a=0; a< A->len_aln;) - { - - r=translate_dna_codon (A->seq_al[b]+a, 'z'); - if (is_gap(r)) - { - A->seq_al[b][a++]='-'; - A->seq_al[b][a++]='-'; - A->seq_al[b][a++]='-'; - } - else if ( r=='x') - { - A->seq_al[b][a++]='o'; - A->seq_al[b][a++]='-'; - A->seq_al[b][a++]='-'; - } - else if ( r=='z') - { - A->seq_al[b][a++]='x'; - A->seq_al[b][a++]='-'; - A->seq_al[b][a++]='-'; - } - else - { - A->seq_al[b][a++]=r; - A->seq_al[b][a++]='-'; - A->seq_al[b][a++]='-'; - } - } - compress_aln (A); - - return A; - } - -int get_longest_frame (char *in_seq, int mode) -{ - char *prot, *seq; - int a; - int max_l=0, l; - int best_frame=0; - int nf; - - seq=vcalloc (strlen (in_seq)+1, sizeof (char)); - prot=vcalloc (strlen (in_seq)+1, sizeof (char)); - sprintf ( seq, "%s", in_seq); - - if ( mode == 3)nf=3; - else if ( mode == 4) nf=6; - - for (a=0; a=3)?a-3:a; - prot=translate_dna_seq ( seq,f,'\0', prot); - l=strlen (prot); - if (l>=max_l){max_l=l;best_frame=a;} - } - vfree (seq); - vfree (prot); - return best_frame; -} - -Alignment *clean_gdna_aln (Alignment *A) - { - int a, b, c, r1, r2,s, p, n, tn; - int *col; - static int **mat; - Alignment *T=NULL; - int **score; - char *buffer; - - - /*Viterbi Parameters*/ - int AL=0; /*Allowed Transition*/ - int F=-1000000; /*Forbiden Transition*/ - int SPLICE_PENALTY=100; - int ORF1=0, ORF2=1, ORF3=2, NC=3; - - int state, pstate, best_e, best_pstate_p,best_state_p, best_pstate_v, best_state_v, v; - int nstate=4; - int **transitions; - int e; - int **v_tab_p; - int **v_tab; - int * is_dna; - - best_state_p=best_state_v=best_pstate_p=best_pstate_v=best_e=0; - buffer=vcalloc ( 100000, sizeof (char)); - is_dna=vcalloc ( A->nseq, sizeof (int)); - score=declare_int ( A->nseq+1, A->len_aln); - - - if ( !mat)mat=read_matrice("pam250mt"); - T=copy_aln (A, T); - col=vcalloc ( A->nseq, sizeof (int)); - - for (a=0; a<= A->len_aln; a++) - for ( b=0; b< A->nseq; b++){A->seq_al[b][a]=tolower(A->seq_al[b][a]); A->seq_al[b][a]=(A->seq_al[b][a]=='t')?'u':A->seq_al[b][a];} - - for ( a=0; a< A->nseq; a++) - { - sprintf ( buffer, "%s", A->seq_al[a]); - ungap (buffer); - is_dna[a]=strm ( get_string_type (buffer), "DNA"); - } - - - for (a=0; a< A->len_aln-2; a++) - { - for (b=0; b< A->nseq; b++) - { - if (is_dna[b])col[b]=translate_dna_codon (A->seq_al[b]+a, 'x'); - else col[b]=tolower ( A->seq_al[b][a]); - } - - for (n=0,tn=0,b=0; b< A->nseq; b++) - for ( c=b; c< A->nseq; c++ ) - { - r1=col[b]; - r2=col[c]; - - if (r1=='x' || r2=='x'){score[A->nseq][a]=F;break;} - else if (r1=='-' && r2=='-'); - else if (r1=='-' || r2=='-'); - else - { - - if ( is_dna[b] && is_dna[c])score[A->nseq][a]+= mat[r1-'A'][r2-'A']; - else score[A->nseq][a]+=mat[r1-'A'][r2-'A']* (A->nseq*A->nseq); - } - n+=( !is_gap(r1) && !is_gap(r2)); - score[A->nseq][a]=(((tn!=0)?score[A->nseq][a]/tn:0)); - } - - } - - /*initialisation*/ - - transitions=declare_int ( nstate, nstate); - v_tab=declare_int ( A->len_aln+2, nstate ); - v_tab_p=declare_int ( A->len_aln+2, nstate ); - - for (a=0; anseq; s++) - { - for ( p=0; p<=A->len_aln; p++){for (state=0; state< nstate; state++)v_tab_p[p][state]=-1; } - for (p=1+2; p<= A->len_aln; p++) - { - - for (state=0; state< nstate; state++) - { - - if ( state==NC){e=-best_e;} - else - { - e=score[A->nseq][(p-1)-state]; - if ( state==0)best_e=e; - else best_e=MAX(e, best_e); - } - - for ( pstate=0; pstatebest_pstate_v) ) - { - best_pstate_v=v; - best_pstate_p=pstate; - } - } - - v_tab[p][state]=best_pstate_v; - v_tab_p[p][state]=best_pstate_p; - if (state==0 ||best_pstate_v>best_state_v ) - { - best_state_p=state; - best_state_v=best_pstate_v; - } - } - - } - - - - for (p=0; p< A->len_aln; p++)T->seq_al[s][p]='.'; - for (p=A->len_aln; p>0; p--) - { - - if ( best_state_p==0)T->seq_al[s][p-1]=translate_dna_codon (A->seq_al[s]+(p-1), 'x'); - else if ( best_state_p==1 || best_state_p==2)T->seq_al[s][p-1]='-'; - - - - best_state_p=v_tab_p[p][best_state_p]; - - } - } - - - - vfree (col); - return T; - } - -Alignment *clean_cdna_aln (Alignment *A) - { - /*Given an alignmnet of nucleotides - Returns the same alignmnent whith non coding nucleotides replaced with dots - - at each position, the emission probability is the sum of pair of the substitution of amino-acids - */ - - int a, b, c,s, p; - static int **mat; - int *emission; - float em1, em2; - char *buffer; - Alignment *B=NULL; - - - - - /*Viterbi Parameters*/ - int AL=0; /*Allowed Transition*/ - int F=-1000000; /*Forbiden Transition*/ - int PENALTY=30; - int NC, C1,C2, C3, START, END; - int nstate=0; - int state=0,best_state=0, score=0, best_score=0; - int p_state; - int e=0; - int **score_tab; - int **state_tab; - - int **transitions; - int n; - int r1, r2, r3; - - NC=nstate++; - C1=nstate++; - C2=nstate++; - C3=nstate++; - START=nstate++; - END=nstate++; - - - B=copy_aln (A, B); - buffer=vcalloc ( 100000, sizeof (char)); - emission=vcalloc (A->len_aln, sizeof (int)); - - if ( !mat) - { - mat=read_matrice("pam250mt"); - } - - /*Computation of the emission proba for the coding state*/ - - - for (a=0; a< A->len_aln; a++) - { - - /*First component: % occupancy of the column*/ - em1=0; - for ( b=0; b< A->nseq; b++) em1+=!is_gap(translate_dna_codon (A->seq_al[b]+a, '-')); - em1=em1/(float)A->nseq; - - /*Second Component: % similarity within column*/ - em2=0; - for (n=0,b=0; b< A->nseq-1; b++) - { - r1=translate_dna_codon (A->seq_al[b]+a, '-'); - - for (c=b+1; cnseq; c++) - { - r2=translate_dna_codon (A->seq_al[c]+a, '-'); - if (is_gap(r2) || is_gap(r1)); - else - { - n++; - em2+=((mat[r1-'A'][r2-'A'])>1)?1:0; - } - } - } - em2=em2/(float)((n==0)?1:n); - - - emission[a]=(em1*100); - - } - - - - /*initialisation*/ - - transitions=declare_int ( nstate, nstate); - score_tab=declare_int ( A->len_aln+2, nstate ); - state_tab=declare_int ( A->len_aln+2, nstate ); - - for (a=0; anseq; s++) - { - for ( p=0; p<=A->len_aln; p++){for (state=0; state< nstate; state++){score_tab[p][state]=F;state_tab[p][state]=-1;} } - score_tab[0][START]=0; - - for (p=1; p<= A->len_aln; p++) - { - for (state=0; state< nstate; state++) - { - if ( state==START || state==END)continue; - else if ( state==NC) e=-10; - else if ( state==C1) - { - e=emission[p-1]; - } - else if ( state ==C2) - { - if ( p-2<0)e=F; - else e=emission[p-2]; - } - else if ( state==C3) - { - if ( p-3<0)e=F; - else e=emission[p-3]; - } - - for (p_state=0; p_statebest_score){ best_score=score;best_state=p_state;} - - } - - score_tab[p][state]=best_score; - state_tab[p][state]=best_state; - - } - } - - best_score=best_state=UNDEFINED; - for (state=0; statebest_score) - { - best_score=score_tab[p-1][state]+e; - best_state=state; - } - - } - - for (p=A->len_aln; p>0;) - { - B->seq_al[s][p-1]=best_state+'0'; - best_state=state_tab[p][best_state]; - p--; - } - } - - for ( a=0; a< A->nseq; a++) - for ( b=0; b< A->len_aln;) - { - s=B->seq_al[a][b]; - if ( s==C1+'0') - { - r1=A->seq_al[a][b]; - r2=A->seq_al[a][b+1]; - r3=A->seq_al[a][b+2]; - - - if ( is_gap(r1) ||is_gap(r2) || is_gap(r3)) - { - A->seq_al[a][b]=(is_gap(r1))?'~':'.'; - A->seq_al[a][b+1]=(is_gap(r2))?'~':'.'; - A->seq_al[a][b+2]=(is_gap(r3))?'~':'.'; - } - b+=3; - } - else if ( s==NC+'0') - { - A->seq_al[a][b]=(is_gap(A->seq_al[a][b]))?'~':'.'; - b++; - } - else - { - fprintf (stderr, "\nPROBLEM: [%d %d]->%d", a, b, s-'0'); - } - } - - - free_aln (B); - free_int (transitions, -1); - free_int (score_tab, -1); - free_int (state_tab, -1); - vfree (emission); - vfree (buffer); - - return A; - } - - - - -Alignment *translate_splice_dna_aln (Alignment *A, Alignment *ST) - { - int a, b, c, r1, r2,s, p, n, tn; - int *col; - static int **mat; - Alignment *T=NULL; - int **score; - - /*Viterbi Parameters*/ - int AL=0; /*Allowed Transition*/ - int F=-1000000; /*Forbiden Transition*/ - int ORF1=0, ORF2=1, ORF3=2,SPL1=3, SPL2=4, SPL3=5, SPL4=6, NC=7; - int SPLICE_PENALTY; - int frame1, frame2, frame3, best_frame; - int nstate=8; - char r; - - - - int state=0, pstate=0, best_pstate_p=0,best_state_p=0, best_pstate_v=0, best_state_v=0, v=0; - - int **transitions; - int e=0; - int **v_tab_p; - int **v_tab; - - score=declare_int ( A->nseq+1, A->len_aln); - - - if ( !mat)mat=read_matrice("pam250mt"); - T=copy_aln (A, T); - col=vcalloc ( A->nseq, sizeof (int)); - - for (a=0; a<= A->len_aln; a++) - for ( b=0; b< A->nseq; b++){A->seq_al[b][a]=tolower(A->seq_al[b][a]); A->seq_al[b][a]=(A->seq_al[b][a]=='t')?'u':A->seq_al[b][a];} - - - - - for (a=0; a< A->len_aln-2; a++) - { - for (b=0; b< A->nseq; b++) - { - col[b]=translate_dna_codon (A->seq_al[b]+a, 'x'); - } - - for (n=0,tn=0,b=0; b< A->nseq-1; b++) - for ( c=b+1; c< A->nseq; c++, tn++ ) - { - r1=col[b]; - r2=col[c]; - - if (r1=='x' || r2=='x')score[A->nseq][a]=F; - else if (r1=='-' && r2=='-'); - else if (r1=='-' || r2=='-'); - else - { - score[A->nseq][a]+= mat[r1-'A'][r2-'A']; - - } - n+=( !is_gap(r1) && !is_gap(r2)); - } - score[A->nseq][a]=(((tn!=0)?score[A->nseq][a]/tn:0)); - - } - - /*initialisation*/ - - transitions=declare_int ( nstate, nstate); - v_tab=declare_int ( A->len_aln+2, nstate*nstate); - v_tab_p=declare_int ( A->len_aln+2, nstate*nstate); - - for (a=0; anseq; s++) - { - for ( p=0; p<=A->len_aln; p++){for (state=0; state< nstate; state++)v_tab_p[p][state]=-1; } - for (p=1+2; p<= A->len_aln; p++) - { - frame1=score[A->nseq][(p-1)]; - frame2=score[A->nseq][(p-1)-1]; - frame3=score[A->nseq][(p-1)-2]; - best_frame=best_int (3, 1, &a, frame1, frame2, frame3); - for (state=0; state< nstate; state++) - { - r=tolower (A->seq_al[s][p-1]); - r=(r=='u')?'t':r; - - if (state==ORF1)e=frame1; - else if (state==ORF2)e=frame2; - else if (state==ORF3)e=frame3; - else if (state==SPL1)e=(r=='g')?best_frame:F; - else if (state==SPL2)e=(r=='t')?best_frame:F; - else if (state==SPL3)e=(r=='a')?best_frame:F; - else if (state==SPL4)e=(r=='g')?best_frame:F; - else if (state==NC)e=-best_frame; - for ( pstate=0; pstatebest_pstate_v) ){best_pstate_v=v;best_pstate_p=pstate;} - } - - v_tab[p][state]=best_pstate_v; - v_tab_p[p][state]=best_pstate_p; - if (state==0 ||best_pstate_v>best_state_v ){best_state_p=state; best_state_v=best_pstate_v;} - } - } - - - - for (p=0; p< A->len_aln; p++)T->seq_al[s][p]='.'; - for (p=A->len_aln; p>0; p--) - { - if ( best_state_p==0)T->seq_al[s][p-1]=toupper(translate_dna_codon (A->seq_al[s]+(p-1), 'x')); - else if ( best_state_p>=SPL1 && best_state_p<=SPL4)T->seq_al[s][p-1]='-'; - best_state_p=v_tab_p[p][best_state_p]; - } - } - - - - vfree (col); - return T; - } - -Alignment * mutate_cdna_aln ( Alignment *A) -{ - int a, b, c, n; - int n1, n2, r1, r2; - int **pos, ps; - int neutral_substitution=50; - int random_substitution=0; - int random_deletion=0; - int amino_acid_deletion=0; - int amino_acid_substitution=0; - char nuc_list[]="agct"; - char *new_codon; - - neutral_substitution=atoi(get_env_variable ("NEUTRAL_SUBSTITUTION",IS_FATAL)); - random_substitution =atoi(get_env_variable ("RANDOM_SUBSTITUTION", IS_FATAL)); - random_deletion =atoi(get_env_variable ("RANDOM_DELETION", IS_FATAL)); - amino_acid_deletion =atoi(get_env_variable ("AMINO_ACID_DELETION", IS_FATAL)); - amino_acid_substitution =atoi(get_env_variable ("AMINO_ACID_SUBSTITUTION", IS_FATAL)); - - - if (A->S)free_sequence ( A->S, (A->S)->nseq); - A->S=aln2seq(A); - - addrandinit(time (NULL)); - - - pos=aln2pos_simple ( A, A->nseq); - - /* 1 Apply neutral substitutions */ - - if ( neutral_substitution) - { - for ( c=0; c< neutral_substitution; c++) - { - for ( a=0; a< A->nseq; a++) - { - - for ( b=0; b< A->len_aln; b++) - { - - if (pos[a][b]<=0)continue; - ps=MAX(0,pos[a][b]-(pos[a][b]-1)%3-1); - - - n1=(A->S)->seq[a][pos[a][b]-1]; - r1=translate_dna_codon ( (A->S)->seq[a]+ps, 'o'); - - n2=nuc_list[(int)addrand((unsigned long) 4)]; - (A->S)->seq[a][pos[a][b]-1]=n2; - r2=translate_dna_codon ( (A->S)->seq[a]+ps, 'o'); - - - if ( r1==r2 && r1!='o')A->seq_al[a][b]=n2; - - else (A->S)->seq[a][pos[a][b]-1]=n1; - } - } - } - } - - /* 2 Apply substitutions */ - if ( random_substitution) - { - for ( a=0; a< A->nseq; a++) - { - for ( b=0; b< A->len_aln; b++) - { - if (pos[a][b]<=0)continue; - if (addrand ((unsigned long) 100)>random_substitution)continue; - - n1=nuc_list[(int)addrand((unsigned long)4)]; - (A->S)->seq[a][pos[a][b]-1]=n1; - A->seq_al[a][b]=n1; - } - } - } - - /* 3 Apply amino acid substitutions */ - if ( amino_acid_substitution) - { - for ( a=0; a< A->nseq; a++) - { - for ( b=0; b< A->len_aln; b+=3) - { - if (pos[a][b]<=0)continue; - if (addrand ((unsigned long) 100)>amino_acid_substitution)continue; - ps=MAX(0,pos[a][b]-(pos[a][b]-1)%3-1); - - r1=translate_dna_codon ( (A->S)->seq[a]+ps, 'o'); - new_codon=mutate_amino_acid(r1, "clustalw_col"); - - for ( c=ps; cS)->seq[a][c]=new_codon[c-ps]; - } - for ( b=0; b< A->len_aln; b++) - { - if (pos[a][b]<=0)continue; - else A->seq_al[a][b]=(A->S)->seq[a][pos[a][b]-1]; - } - } - } - /* 3 Apply amino acid deletions */ - if ( amino_acid_deletion) - { - for ( a=0; a< A->nseq; a++) - { - for ( b=0; b< A->len_aln; b+=3) - { - if (pos[a][b]<=0)continue; - if (addrand ((unsigned long) 1000)>amino_acid_deletion)continue; - ps=MAX(0,pos[a][b]-(pos[a][b]-1)%3-1); - n=addrand ((unsigned long) 4)+1; - - for ( c=ps; clen_aln; c++)(A->S)->seq[a][c]='-'; - } - for ( b=0; b< A->len_aln; b++) - { - if (pos[a][b]<=0)continue; - else A->seq_al[a][b]=(A->S)->seq[a][pos[a][b]-1]; - } - } - } - /* 4 Apply amino acid insertions */ - -/*FRAMESHIFT MUTATIONS*/ - /* 5 Apply nucleotide deletions*/ - if ( random_deletion) - { - for ( a=0; a< A->nseq; a++) - { - for ( b=0; b< A->len_aln; b++) - { - if (pos[a][b]<=0)continue; - if (addrand ((unsigned long) 1000)>random_deletion)continue; - - n1='-'; - (A->S)->seq[a][pos[a][b]-1]=n1; - A->seq_al[a][b]=n1; - } - } - } - /* 6 Apply nucleotide deletions*/ - free_int (pos, -1); - return A; - -} - -Alignment* clean_est ( Alignment *A) - { - /*Rules are as follow: - Internal Gap > 30% Requences ----> - - Best Residue < 50% Residues ----> 'N' - */ - int a, b,c; - int best; - int tot; - - for ( a=0; a< A->len_aln; a++) - { - - for (tot=0, b=0; b<4; b++)tot+=(A->P)->count[b][a]; - best=best_int (5,1, &c, (A->P)->count[0][a],(A->P)->count[1][a],(A->P)->count[2][a],(A->P)->count[3][a],(A->P)->count[4][a]); - - if ( tot==0) - { - fprintf ( stderr, "\nWARNING: POSITION WITH NO INFORMATION [clean_est:%s]", PROGRAM); - A->seq_al[0][a]='-'; - } - else if (((A->P)->count[4][a]*100)/tot >30)A->seq_al[0][a]='-'; - else if ( (best*100)/tot<50)A->seq_al[0][a]='n'; - - } - return A; - } - - - -char **make_symbols ( char *name, int *n) - { - char **symbol; - - symbol=declare_char ( STRING, STRING); - - if ( strcmp (name, "3d_ali")==0) - { - sprintf ( symbol[0], "gih"); - sprintf ( symbol[1], "eb"); - sprintf ( symbol[2], "x"); - sprintf ( symbol[3], "#l"); - n[0]=4; - } - - else if ( strcmp (name, "all")==0) - { - int a, i; - for ( i=0,a=0; a<26; a++) - { - sprintf ( symbol[i++], "%c%c", 'a'+a, 'a'+a); - sprintf ( symbol[i++], "%c%c", 'A'+a, 'A'+a); - } - sprintf ( symbol[i++], "--"); - n[0]=i; - } - - else if ( strcmp (name, "set1")==0) - { - sprintf ( symbol[0], "ilvmfywhktcagH"); - sprintf ( symbol[1], "reqdnsP"); - sprintf ( symbol[2], "--"); - sprintf ( symbol[3], "#l"); - n[0]=4; - } - else if ( strcmp (name, "set2")==0) - { - n[0]=0; - sprintf ( symbol[n[0]++], "gsacT"); - sprintf ( symbol[n[0]++], "ndtvpS"); - sprintf ( symbol[n[0]++], "ilkreqL"); - sprintf ( symbol[n[0]++], "--"); - sprintf ( symbol[n[0]++],"#l"); - } - else if ( strcmp ( name, "any")==0) - { - sprintf ( symbol[0], "*x"); - n[0]=1; - } - - - - - return symbol; - } - -char * translate_dna_seq_on3frame ( char *dna_seq, char stop, char *prot) - { - int a, l; - char *buf; - - l=strlen (dna_seq); - if ( prot==NULL)prot=vcalloc ( l+2, sizeof (char)); - - buf=vcalloc (l+4, sizeof (char)); - sprintf (buf, "%s", dna_seq); - lower_string ( buf); - for ( a=0; a< l; a++)buf[a]=(buf[a]=='t')?'u':buf[a]; - - for (a=0; a< l; a++) - prot[a]=translate_dna_codon (buf+a, stop); - vfree (buf); - prot[a]='\0'; - - return prot; - } -char * translate_dna_seq ( char *dna_seq, int frame, char stop, char *prot) - { - int a, b, l; - char *buf; - - l=strlen (dna_seq); - if ( prot==NULL)prot=vcalloc ( l/3 +2, sizeof (char)); - - buf=vcalloc (l+4, sizeof (char)); - sprintf (buf, "%s", dna_seq); - lower_string ( buf); - for ( a=0; a< l; a++)buf[a]=(buf[a]=='t')?'u':buf[a]; - - for ( b=0,a=0+frame; a< l; a+=3,b++) - prot[b]=translate_dna_codon (buf+a, stop); - vfree (buf); - prot[b]='\0'; - - return prot; - } -char * back_translate_dna_codon ( char aa, int deterministic) - { - static char *r; - int choice; - - vsrand(0); - if ( r==NULL)r=vcalloc (4, sizeof (char)); - - if (!is_gap(aa))aa=tolower(aa); - - if (is_gap(aa))sprintf (r, "---"); - else if ( aa>=0 && aa<=9) - { - sprintf (r, "%d%d%d", aa, aa,aa); - } - else if ( aa>='0' && aa<='9') - { - sprintf (r, "%c%c%c", aa, aa,aa); - } - else if ( aa=='a') - { - choice=(deterministic)?0:rand()%4; - if ( choice==0)sprintf (r, "gca"); - else if ( choice==1)sprintf (r, "gcg"); - else if ( choice==2)sprintf (r, "gcc"); - else if ( choice==3)sprintf (r, "gct"); - } - else if ( aa=='c') - { - choice=(deterministic)?0:rand()%2; - if ( choice==0)sprintf (r, "tgc"); - else if ( choice==1)sprintf (r, "tgt"); - } - else if ( aa=='d') - { - choice=(deterministic)?0:rand()%2; - if ( choice==0)sprintf (r, "gac"); - else if ( choice==1)sprintf (r, "gat"); - } - - else if ( aa=='e') - { - choice=(deterministic)?0:rand()%2; - if ( choice==0)sprintf (r, "gaa"); - else sprintf (r, "gag"); - } - else if ( aa=='f') - { - choice=(deterministic)?0:rand()%2; - if ( choice==0)sprintf (r, "ttc"); - else sprintf (r, "ttt"); - } - else if ( aa=='g') - { - choice=(deterministic)?0:rand()%4; - if ( choice==0) sprintf (r, "gga"); - else if ( choice==1) sprintf (r, "ggg"); - else if ( choice==2) sprintf (r, "ggc"); - else if ( choice==3) sprintf (r, "ggt"); - } - else if ( aa=='h') - { - choice =rand()%2; - if ( choice==0)sprintf (r, "cac"); - else sprintf (r, "cat"); - } - else if ( aa=='i') - { - choice=(deterministic)?0:rand()%3; - if ( choice==0) sprintf (r, "ata"); - else if ( choice==1) sprintf (r, "atc"); - else if ( choice==2) sprintf (r, "att"); - } - else if ( aa=='k') - { - choice=(deterministic)?0:rand()%2; - if ( choice==0) sprintf (r, "aaa"); - else if ( choice==1) sprintf (r, "aag"); - - } - else if ( aa=='l') - { - choice=(deterministic)?0:rand()%6; - if ( choice==0) sprintf (r, "cta"); - else if ( choice==1) sprintf (r, "ctg"); - else if ( choice==2) sprintf (r, "ctc"); - else if ( choice==3) sprintf (r, "ctt"); - else if ( choice==4) sprintf (r, "tta"); - else if ( choice==5) sprintf (r, "ttg"); - } - else if ( aa=='m')sprintf ( r, "atg"); - else if ( aa=='n') - { - choice=(deterministic)?0:rand()%2; - if ( choice==0) sprintf (r, "aac"); - else if ( choice==1) sprintf (r, "aat"); - } - else if ( aa=='p') - { - choice=(deterministic)?0:rand()%4; - if ( choice==0) sprintf (r, "cca"); - else if ( choice==1) sprintf (r, "ccg"); - else if ( choice==2) sprintf (r, "ccc"); - else if ( choice==3) sprintf (r, "cct"); - } - else if ( aa=='q') - { - choice=(deterministic)?0:rand()%2; - if ( choice==0) sprintf (r, "caa"); - else if ( choice==1) sprintf (r, "cag"); - } - else if ( aa=='r') - { - choice=(deterministic)?0:rand()%6; - if ( choice==0) sprintf (r, "cga"); - else if ( choice==1) sprintf (r, "cgg"); - else if ( choice==2) sprintf (r, "cgc"); - else if ( choice==3) sprintf (r, "cgt"); - else if ( choice==4) sprintf (r, "aga"); - else if ( choice==5) sprintf (r, "agg"); - - } - else if ( aa=='s') - { - choice=(deterministic)?0:rand()%6; - if ( choice==0) sprintf (r, "tca"); - else if ( choice==1) sprintf (r, "tcg"); - else if ( choice==2) sprintf (r, "tcc"); - else if ( choice==3) sprintf (r, "tct"); - else if ( choice==4) sprintf (r, "agt"); - else if ( choice==5) sprintf (r, "agc"); - - } - else if ( aa=='t') - { - choice=(deterministic)?0:rand()%4; - if ( choice==0) sprintf (r, "aca"); - else if ( choice==1) sprintf (r, "acg"); - else if ( choice==2) sprintf (r, "acc"); - else if ( choice==3) sprintf (r, "act"); - } - else if ( aa=='v') - { - choice=(deterministic)?0:rand()%4; - if ( choice==0) sprintf (r, "gta"); - else if ( choice==1) sprintf (r, "gtg"); - else if ( choice==2) sprintf (r, "gtc"); - else if ( choice==3) sprintf (r, "gtt"); - } - else if ( aa=='w') - { - sprintf (r, "tgg"); - } - else if ( aa=='y') - { - choice=(deterministic)?0:rand()%2; - if ( choice==0) sprintf (r, "tac"); - else if ( choice==1) sprintf (r, "tat"); - } - else - { - sprintf (r, "nnn"); - } - return r; - - } -int translate_dna_codon ( char *sequence, char stop) - { - char seq[4]; - int a,b; - - - if ( (b=strlen (sequence))<3) - { - for ( a=0; atype, "DNA") || strm(S->type, "RNA"))sprintf (alp, "AGCT"); - else if ( strm(S->type, "PROTEIN"))sprintf (alp, "ACDEFGHIKLMNPQRSTVWY"); - - alp_size=strlen(alp); - - B=copy_aln (A,NULL); - B=realloc_aln(B, B->len_aln*2+1); - - for ( a=0, b=0; a< A->len_aln; a++, b+=2) - { - for ( c=0; c< A->nseq; c++) - { - B->seq_al[c][b]=tolower(A->seq_al[c][a]); - B->seq_al[c][b+1]='~'; - } - } - - for ( c=0; c< A->nseq; c++)B->seq_al[c][b]='\0'; - B->len_aln=A->len_aln*2; - - - - tot=n_mut=0; - for (a=0; a< B->len_aln; a+=2) - for ( b=0; bnseq; b++) - { - if ( is_gap(B->seq_al[b][a]))continue; - mut=((rand()%RAND_MAX)>ratio)?0:1; - tot++; - n_mut+=mut; - - if (mut) - { - type=rand()%2; - if (type==0)/*deletion*/ - { - B->seq_al[b][a]='.'; - } - else if ( type==1) - { - B->seq_al[b][a+1]=alp[rand()%alp_size]; - } - else if (type==2) - { - B->seq_al[b][a]=alp[rand()%alp_size]; - } - - } - } - ungap_aln (B); - - - free_sequence (S, S->nseq); - free_aln (A); - return B; - -} - -char* mutate_amino_acid ( char aa, char *mode) - - { - int a, b, c, d; - char nucleotide[]="agct"; - char amino_acid[]="acdefghiklmnpqrstvwy"; - static char **triplet; - static char **cw_col; - int ng_cw_col; - static int **amino_acid_list; - static int *lu; - char a1, a2; - char *mat; - - aa=tolower(aa); - declare_name(mat); - if ( !mode)sprintf (mat, "clustalw_col"); - else sprintf (mat, "%s", mode); - if (!triplet) - { - triplet=declare_char ( 64, 4); - for (d=0, a=0; a< 4;a++) - for ( b=0; b< 4; b++) - for ( c=0; c< 4; c++, d++) - { - triplet[d][0]=nucleotide[a]; - triplet[d][1]=nucleotide[b]; - triplet[d][2]=nucleotide[c]; - } - } - if ( !cw_col)cw_col=make_group_aa ( &ng_cw_col,mat); - if ( !amino_acid_list) - { - amino_acid_list=declare_int ( 20, 65); - for ( a=0; a< 20; a++) - for ( b=0; b< 64; b++) - { - a1=translate_dna_codon ( triplet[b], 'x'); - a2=amino_acid[a]; - for ( d=0; d< ng_cw_col; d++) - if ( is_in_set ( a1, cw_col[d]) && is_in_set ( a2, cw_col[d])) - { - amino_acid_list[a][++amino_acid_list[a][0]]=b; - } - } - lu=vcalloc ( 26, sizeof (int)); - for ( a=0; a<20; a++) - { - lu[amino_acid[a]-'a']=a; - } - /* - for ( a=0; a< 20; a++) - { - fprintf ( stderr, "\n%c", amino_acid[a]); - for ( b=1; b<=amino_acid_list[a][0]; b++) - fprintf ( stderr, "\n\t%s %c", triplet[amino_acid_list[a][b]], translate_dna_codon (triplet[amino_acid_list[a][b]], 'x')); - } - */ - } - - return triplet [addrand((unsigned long)amino_acid_list[lu[aa-'a']][0])+1]; - } - -/**************************************************************************************************/ -/******************************** ********************************************/ -/******************************** PROCESSING ********************************************/ -/******************************** ********************************************/ - - - -void modify_data (Sequence_data_struc *D1in, Sequence_data_struc *D2in, Sequence_data_struc *DSTin, char **action_list,int n_actions, Action_data_struc *RAD) - { - Sequence *COOR=NULL, *NS=NULL,*BUFS=NULL, *OUT_S=NULL; - Constraint_list *CL; - char *s; - int value,upper_value, lower_value, start, end, a, b,c; - int *count_table=NULL; - char *action; - Sequence_data_struc *D1; - Sequence_data_struc *D2; - Sequence_data_struc *DST; - int s1, s2, r1, r2; - static int clean_flag; - Alignment *BUF; - - /*Switches*/ - - action=action_list[0]; - - if (action[0]=='2') - { - - D1=D2in; - D2=D1in; - DST=DSTin; - action++; - } - else if ( action[0]=='1') - { - D1=D1in; - D2=D2in; - DST=DSTin; - action++; - } - else if ( action[0]=='3') - { - D1=DSTin; - D2=D1in; - DST=DSTin; - action++; - } - else - { - D1=D1in; - D2=D2in; - DST=DSTin; - } - if (!D1->A)D1->A=copy_aln (D1in->A, NULL); - - if ( strm(action, "seqnos")) - { - (D1->A)->output_res_num=1; - } - else if ( strm (action,"aln2bootstrap")) - { - (D1->A)=aln2bootstrap (D1->A, ATOI_ACTION (1)); - D1->S=aln2seq (D1->A); - } - else if ( strm (action,"aln2sample")) - { - (D1->A)=aln2sample (D1->A, ATOI_ACTION (1)); - D1->S=aln2seq (D1->A); - } - else if ( strm (action,"aln2random_aln")) - { - (D1->A)=aln2random_aln (D1->A, ACTION (1)); - D1->S=aln2seq (D1->A); - } - else if ( strm (action, "or_scan")) - { - HERE ("OR SCAN"); - D1->A=or_scan(D1->A, D2->A, ACTION(1)); - D1->S=aln2seq (D1->A); - } - else if ( strm (action, "or_sar")) - { - D1->A=or_sar(D1->A, D2->A, ACTION(1), PRINT); - D1->S=aln2seq (D1->A); - } - else if ( strm ( action, "sar2subsar")) - { - /*in->sequences - in2->sar data - */ - Alignment *subA, *subS; - - if ( n_actions==1) - { - fprintf ( stderr, "\nin=aln, in2=sar sar2subsar [filter value compound1 compound2...] | [jack1] | [file]\n"); - myexit (EXIT_FAILURE); - } - - sarset2subsarset ( D1->A, D2->A, &subA, &subS, main_read_aln (action_list[2], NULL)); - D1->A=subA;D2->A=subS; - } - else if ( strm (action, "display_sar")) - { - D1->A=display_sar (D1->A, D2->A, action_list[1]); - } - else if ( strm ( action, "sar2simpred")) - { - /*in->sequences - in2->sar data - */ - sar2simpred ( D1->A, D2->A, action_list[1], action_list[2], atoi(action_list[3]), atoi (action_list[4])); - } - else if ( strm ( action, "sar2simpred2")) - { - /*in->sequences - in2->sar data - */ - if ( n_actions!=5) - { - fprintf ( stderr, "\nERROR: +sar2simpred2 seqnamesfile posfile compound limit"); - myexit (EXIT_FAILURE); - } - sar2simpred2 ( D1->A, D2->A, action_list[1], action_list[2], action_list[3], atoi (action_list[4])); - } - else if ( strm ( action, "sar_analyze")) - { - /*in->sequences - in2->sar data - */ - sar_analyze ( D1->A, D2->A,action_list[1]); - } - else if ( strm ( action, "simple_sar_predict")) - { - //displays each column with ist score; - simple_sar_predict (D1->A, D2->A,ACTION(1)); - exit (EXIT_SUCCESS); - } - else if ( strm ( action, "display_sar_analyze")) - { - //displays each column with ist score; - display_simple_sar_analyze_col (D1->A, D2->A,ACTION(1)); - exit (EXIT_SUCCESS); - } - else if ( strm ( action, "display_sar_analyze_pc")) - { - //displays each column with ist score; - display_simple_sar_analyze_pair_col (D1->A, D2->A,ACTION(1)); - exit (EXIT_SUCCESS); - } - else if ( strm ( action, "weight2sar")) - { - /*in->sequences - in2->sar data - */ - if ( n_actions!=3) - { - fprintf ( stderr, "\nERROR: +weight2sar "); - myexit (EXIT_FAILURE); - } - D1->A=weight2sar ( D1->A,D2->A, action_list[1], atoi(action_list[2])); - - } - else if ( strm ( action, "sar_weight")) - { - /*in->sequences - in2->sar data - */ - if ( n_actions!=3) - { - fprintf ( stderr, "\nERROR: +sar_weight "); - myexit (EXIT_FAILURE); - } - D1->A=aln2weighted_sar_score ( D1->A,D2->A, action_list[1], action_list[2]); - D1->S=aln2seq ( D1->A); - } - - else if ( strm (action, "name2unique_name")) - { - char *tmp1, *tmp2; - char command[1000]; - tmp1=vtmpnam (NULL); tmp2=vtmpnam (NULL); - - output_fasta_aln (tmp1,D1->A); - free_aln (D1->A);free_sequence (D1->S, -1); - sprintf ( command, "fasta_aln2fasta_aln_unique_name.pl %s >%s", tmp1, tmp2); - my_system ( command); - D1->S=get_fasta_sequence ( tmp2, NULL); - D1->A=seq2aln (D1->S,NULL, 1); - } - else if ( strm (action, "rm_tag") || strm (action, "rm_template")) - { - - char **temp_name=NULL,**temp_list=NULL, temp_nseq=0; - int z; - - if ( D1 && D1->A){temp_name=(D1->A)->name;temp_nseq=(D1->A)->nseq;} - else if ( D1 && D1->S){temp_name=(D1->S)->name;temp_nseq=(D1->S)->nseq;} - temp_list=rm_name_tag (temp_name,temp_nseq, NULL); - if ( n_actions>1 && strm (action_list[1], "template")) - { - - for ( z=0; zS=seq2template_seq (D1->S, action_list[1], NULL); - D1->A=seq2aln(D1->S, NULL, 1); - } - else if ( strm ( action, "seq2year")) - { - D1->S=seq2year (D1->S, (n_actions>1)?atoi(action_list[1]):1); - D1->A=seq2aln(D1->S, NULL, 1); - } - else if ( strm (action, "swap_lib_header")) - { - Sequence *S; - S=main_read_seq (action_list[1]); - (D1->CL)->S=S; - - } - else if ( strm (action, "weight_lib")) - { - int l; - int w; - w=atoi (action_list[1]); - if ( D1->CL) - { - for (l=0; l<(D1->CL)->ne; l++) - (D1->CL)->L[l*CL->entry_len+WE]=w; - } - } - else if ( strm (action, "struc2nb")) - { - int c; - for ( c=0; c< (D1->S)->nseq; c++) - { - struclist2nb ((D1->S)->name[c],(D1->S)->seq[c], (D1->S)->seq_comment[c], atof(action_list[1]),ACTION(2),ACTION(3) ); - } - myexit (EXIT_SUCCESS); - } - - - - else if ( strm(action, "seq2contacts")) - { - int z; - D1->S=swap_header (D1->S, D2->S); - for ( z=0; z< (D1->S)->nseq; z++)sprintf ( (D1->A)->name[z], "%s", (D1->S)->name[z]); - DST->S=seq2contacts (D1->S, atof (action_list[1])); - DST->A=copy_aln (D1->A, NULL); - thread_seq_struc2aln ( DST->A,DST->S); - for (z=0; z< (D1->S)->nseq; z++) - (DST->A)->S=D1->S; - - } - else if ( strm(action, "struc2contacts")) - { - char *seq; - if ( atof (action_list[3])>0) - { - seq=map_contacts (action_list[1], action_list[2], atof (action_list[3])); - fprintf ( stderr, "\n>%s %s\n%s",action_list[1], action_list[2],seq); - } - else - print_contacts (action_list[1], action_list[2], atof (action_list[3])); - - myexit (EXIT_SUCCESS); - } - else if ( strm(action, "treelist_prune")|| strm(action, "prune_treelist")) - { - Sequence *TS; - if (D2 && D2->S)TS=D2->S; - else TS=treelist2sub_seq((D1->S),ATOI_ACTION(1)); - treelist2prune_treelist ( D1->S,TS, NULL); - D1->A=seq2aln (D1->S, NULL, NO_PAD); - } - else if ( strm (action, "tree2unresolved_nodes")) - { - int ns; - int *l; - ns=tree2nseq (D1->T); - l=vcalloc (ns, sizeof (int)); - tree2nnode_unresolved (D1->T, l); - for ( a=0; aT=main_prune_tree ( D1->T, D2->S); - } - else if ( strm ( action, "tree2seq")) - { - D1->S=tree2seq(D1->T, NULL); - D1->A=seq2aln (D1->S, D1->A, 1); - (D1->A)->len_aln=1; - for ( a=0; a< (D1->A)->nseq; a++)sprintf ( (D1->A)->seq_al[a], "sequence"); - } - else if ( strm (action, "seq2dpatree")) - { - D1->T= seq2dpa_tree(D1->S,"ktup"); - } - else if ( strm (action, "tree2dpatree")) - { - D1->T= tree2dpa_tree(D1->T,(D2 && D2->A)?D2->A:D1->A, (n_actions==1)?"idmat":action_list[1]); - } - else if ( strm (action, "tree2group")) - { - vfclose (tree2group (D1->T, (tree2seq(D1->T,NULL)), atoi(action_list[1]), atoi(action_list[2]),(n_actions==4)?action_list[3]:NULL, stdout)); - myexit (EXIT_SUCCESS); - } - else if ( strm(action, "unroot")) - { - D1->T=unroot_tree(D1->T); - } - - - else if ( strm(action, "treelist2group")|| strm(action, "treelist2groups") ) - { - Sequence *TS; - - if (D2 && D2->S)TS=D2->S; - else TS=treelist2seq((D1->S)); - treelist2groups (D1->S, TS, ACTION(1), stdout); - myexit (EXIT_SUCCESS); - - // treelist2groups (D1->S,(D2)?D2->S:NULL, ACTION(1), stdout ); - //exit (EXIT_SUCCESS); - } - else if ( strm(action, "splits2tree")) - { - - D1->T=split2tree ((D2)?D2->T:NULL,D1->S, ACTION(1)); - - } - else if ( strm(action, "count_splits")) - { - - count_splits ((D2)?D2->T:NULL,D1->S, ACTION(1)); - exit (EXIT_SUCCESS); - } - else if ( strm(action, "count_groups")) - { - count_tree_groups (D1->S, ACTION(1)); - } - else if ( strm (action, "tree2dist")) - { - int ta, tb, ***td; - Sequence *TS; - - TS=(D2)?D2->S:NULL; - td=tree2dist (D1->T,TS, NULL); - if (!TS)TS=tree2seq(D1->T, NULL); - for (ta=0; tanseq; ta++) - { - fprintf ( stdout, "%-15s ",TS->name[ta]); - for ( tb=0; tbnseq; tb++) - { - int n=0; - if ( ACTION(1) && strm (ACTION(1), "length"))n=1; - - fprintf (stdout, " %4d", td [n][ta][tb]); - } - fprintf ( stdout, "\n"); - } - exit (EXIT_SUCCESS); - } - else if ( strm (action, "treelist2lti")) - { - Sequence *TS; - if (D2 && D2->S)TS=D2->S; - else TS=treelist2sub_seq((D1->S),ATOI_ACTION(2)); - treelist2lti (D1->S,TS, (int)ATOI_ACTION(1), stdout ); - exit (0); - } - else if ( strm (action,"treelist2frame")) - { - Sequence *TS; - if (D2 && D2->S)TS=D2->S; - else TS=treelist2sub_seq((D1->S),ATOI_ACTION(1)); - treelist2frame (D1->S, TS); - myexit (EXIT_SUCCESS); - } - - else if ( strm (action, "treelist2seq")) - { - D1->S=treelist2sub_seq (D1->S,ATOI_ACTION(1)); - D1->A=seq2aln(D1->S, NULL, 1); - } - else if ( strm (action, "treelist2leafgroup")) - { - treelist2leafgroup (D1->S, (D2)?D2->S:NULL, ACTION(1)); - exit (0); - } - else if ( strm(action, "treelist2splits")) - { - if (D1->T)D1->S=add_file2file_list ((D1->T)->file, NULL); - treelist2splits (D1->S, (D2)?D2->S:NULL); - } - - else if ( strm(action, "treelist2dmat")) - { - treelist2dmat (D1->S); - } - else if ( strm(action, "tree_cmp") || strm (action, "tree_compare")) - { - D1->T=main_compare_trees ( D1->T, D2->T, stdout); - } - else if ( strm (action, "tree_scan")) - { - D1->T=tree_scan (D1->A, D2->T, ACTION(1), ACTION(2)); - } - else if ( strm (action, "split_cmp")) - { - main_compare_splits (D1->T, D2->T, ACTION(1), stdout); - } - - else if ( strm(action, "node_sort")) - { - node_sort ( action_list[1], D1->T); - exit (EXIT_SUCCESS); - } - - else if ( strm ( action, "avg_bootstrap")) - { - display_avg_bootstrap (D1->T); - myexit (EXIT_SUCCESS); - } - - else if ( strm (action, "tree_cog_cmp")) - { - main_compare_cog_tree (D1->T,action_list[1]); - exit (EXIT_SUCCESS); - } - else if ( strm (action, "tree_aln_cmp")) - { - main_compare_aln_tree (D1->T, D2->A, stdout); - exit (EXIT_SUCCESS); - } - else if ( strm(action, "change_bootstrap")) - { - D1->T=reset_boot_tree ( D1->T, (n_actions>=2)?atoi(action_list[1]):0); - } - else if ( strm(action, "change_distances")) - { - D1->T=reset_dist_tree ( D1->T, (n_actions>=2)?atof(action_list[1]):0.00); - } - - else if ( strm(action, "aln2tree")) - { - D1->T=tree_compute (D1->A, n_actions-1, action_list+1); - } - else if ( strm(action, "similarities2tree")) - { - D1->T=similarities_file2tree (ACTION(1)); - } - - else if ( strm(action, "original_seqnos")) - { - (D1->A)->output_res_num=2; - } - - else if ( strm (action, "aln2pred")) - { - aln2pred (D1->A, D2->A, ACTION (1)); - exit (EXIT_SUCCESS); - } - else if ( strm(action, "evaluate")) - { - Alignment *A; - - - DST->A=copy_aln (D1->A, NULL); - DST->S=aln2seq(DST->A); - if (n_actions>1 && strm ( action_list[1], "categories")) - { - CL=declare_constraint_list ( DST->S,NULL, NULL, 0,NULL, read_matrice("pam250mt")); - DST->A= main_coffee_evaluate_output(DST->A, CL, "categories"); - } - else if (n_actions>1 && strm ( action_list[1], "sar")) - { - CL=declare_constraint_list ( DST->S,NULL, NULL, 0,NULL, read_matrice("pam250mt")); - DST->A= main_coffee_evaluate_output(DST->A, CL, "sar"); - } - else if (n_actions>1 && strstr ( action_list[1], "boxshade")) - { - char color_mode[1000]; - sprintf (color_mode,"boxshade_%d", atoi(ACTION2(2,"30"))); - CL=declare_constraint_list ( DST->S,NULL, NULL, 0,NULL, read_matrice("pam250mt")); - DST->A= main_coffee_evaluate_output(DST->A, CL, color_mode); - } - - else - { - CL=declare_constraint_list ( DST->S,NULL, NULL, 0,NULL, read_matrice((n_actions==1)?"pam250mt":action_list[1])); - DST->A= main_coffee_evaluate_output(DST->A, CL, "matrix"); - } - - DST->S=aln2seq ( DST->A); - - A=D1->A; - - sprintf ( A->name[A->nseq], "cons"); - sprintf ( A->seq_al[A->nseq], "%s", aln2cons_seq_mat (A, "idmat")); - - } - else if ( strm (action, "sp_evaluate")) - { - fprintf ( stdout, "SP Score: %.2f", sum_pair ((DST && DST->A)?DST->A:D1->A,ACTION(1),atoi(ACTION2(2,"0")),atoi(ACTION2(3,"0")))); - exit (EXIT_SUCCESS); - } - else if ( strm (action, "lat_evaluate")) - { - float score; - score=lat_sum_pair ( D1->A, action_list[1]); - fprintf ( stdout, "\nLAT_SCORE: %.2f", score); - exit (EXIT_SUCCESS); - - } - else if ( strm (action, "add_scale")) - { - D1->A=aln2scale (D1->A, ACTION(1)); - } - else if ( strm (action, "RNAfold_cmp")) - { - D1->A=compare_RNA_fold (D1->A, D2->A); - } - else if ( strm (action, "aln2alifold")) - { - D1->A=aln2alifold (D1->A); - D1->S=aln2seq ( D1->A); - } - - - else if ( strm (action, "add_alifold")) - { - D1->A=add_alifold2aln (D1->A, (D2)?D2->A:NULL); - - } - else if ( strm (action, "alifold2analyze")) - { - D1->A=alifold2analyze (D1->A, (D2)?D2->A:NULL, ACTION(1)); - D1->S=aln2seq(D1->A); - } - else if ( strm (action, "aln2conservation")) - { - D1->A=aln2conservation ( D1->A, ATOI_ACTION (1), ACTION (2)); - exit (EXIT_FAILURE); - } - else if ( strm (action, "aln2cons")) - { - char *cons_seq; - char *cons_name; - cons_name=vcalloc (100, sizeof (char)); - sprintf(cons_name, "%s", (n_actions<=2)?"Cons":action_list[2]); - cons_seq=aln2cons_seq_mat (D1->A, (n_actions==1)?"blosum62mt":action_list[1]); - free_aln (D1->A);free_sequence(D1->S, -1); - D1->S=fill_sequence_struc (1, &cons_seq, &cons_name); - /*keep the gaps*/ - (D1->S)->len[0]=strlen (cons_seq); sprintf ( (D1->S)->seq[0], "%s", cons_seq); - D1->A=seq2aln (D1->S, NULL, KEEP_GAP); - vfree (cons_name);vfree (cons_seq); - } - else if ( strm (action, "seq2filter")) - { - D1->S=seq2filter ( D1->S, atoi(action_list[1]), atoi(action_list[2])); - - } - else if ( strm (action, "aln2resindex")) - { - //-in: aln, file: ref_seq ref_res target_seq - //-in2 target sequences - aln2resindex (D1->A, (D2)?D2->A:NULL, stdout); - exit (EXIT_SUCCESS); - } - else if (strm(action, "keep_name")) - { - RAD->keep_name=1-RAD->keep_name; - } - else if (strm(action, "use_consensus") ||strm(action, "use_cons") ) - { - RAD->use_consensus=1-RAD->use_consensus; - } - else if ( strm(action, "ungap")) - { - seq2aln (D1->S, D1->A, 1); - } - else if ( strm2(action, "rmgap", "rm_gap")) - { - - ungap_aln_n (D1->A, (n_actions==1)?100:atoi(action_list[1])); - free_sequence ( D1->S, (D1->S)->nseq); - D1->S=aln2seq ( D1->A); - (D1->A)->S=D1->S; - } - else if ( strm(action, "rmgap_col")) - { - D1->A=remove_gap_column ( D1->A,action_list[1]); - } - else if ( strm(action,"random")) - { - - D1->A= make_random_aln(NULL,(n_actions==1)?1:atoi(action_list[1]),(n_actions==2)?100:atoi(action_list[2]),"acdefghiklmnpqrstvwy"); - - D1->S=aln2seq ( D1->A); - } - - else if ( strm(action, "landscape")) - { - - set_landscape_msa ((n_actions==1)?0:atoi(action_list[1])); - } - else if ( strm(action, "clean_maln")) - { - if ( !DST) - { - fprintf ( stderr,"\n[You Need an evaluation File: Change the output format][FATAL:%s]\n", PROGRAM); - myexit(EXIT_FAILURE); - } - (DST->A)=aln2number (DST->A); - D1->A=clean_maln(D1->A, DST->A,(n_actions==1)?1:atoi(action_list[1]),(n_actions==1)?1:atoi(action_list[2])); - } - else if ( strm (action, "extract")) - { - - COOR=get_pir_sequence (RAD->coor_file, NULL); - D1->S=extract_sub_seq ( COOR, D1->S); - free_aln (D1->A); - D1->A=declare_Alignment(D1->S); - seq2aln (D1->S, D1->A, RAD->rm_gap); - free_sequence (COOR, COOR->nseq); - } - else if ( strm (action, "reorder_column")) - { - - - - Alignment *RO1, *RO2; - Sequence *OUT_S; - int s; - - RO1=rotate_aln (D1->A,NULL); - if (ACTION(1) && strm (ACTION(1), "tree")) - { - D1->T=tree_compute (RO1,n_actions-2, action_list+2); - OUT_S=tree2seq(D1->T, NULL); - RO1=reorder_aln(RO1, OUT_S->name, OUT_S->nseq); - } - else if ( ACTION(1) && strm (ACTION(1), "random")) - { - RO1=reorder_aln ( RO1, NULL, RO1->nseq); - } - - RO2=rotate_aln (RO1, NULL); - for (s=0; s< RO2->nseq; s++) - sprintf ( RO2->name[s], "%s", (D1->A)->name[s]); - free_aln (RO1); - free_aln (D1->A); - D1->A=RO2; - D1->S=aln2seq(D1->A); - } - - else if ( strm (action, "reorder")) - { - - if ( n_actions==2 && strm (action_list[1], "random")) - { - D1->A=reorder_aln ( D1->A, NULL, (D1->A)->nseq); - } - else if (n_actions==2 && strm (action_list[1], "scramble")) - { - D1->A=aln2scramble_seq(D1->A); - } - - else if ( n_actions==2 && strm (action_list[1], "tree")) - { - - OUT_S=tree2seq (D2->T, NULL); - D1->A=reorder_aln(D1->A, OUT_S->name, OUT_S->nseq); - free_sequence (D1->S,(D1->S)->nseq); - D1->S=aln2seq (D1->A); - } - else - { - (D2->A)->S=aln2seq (D2->A); - (D1->A)->S=aln2seq (D1->A); - OUT_S=trim_aln_seq_name(D2->A, D1->A); - D1->A=reorder_aln(D1->A, OUT_S->name, OUT_S->nseq); - free_sequence (D1->S,(D1->S)->nseq); - D1->S=aln2seq (D1->A); - } - } - else if ( strm (action, "cat_aln")) - { - /*D1->A=aln_cat ( D1->A, D2 ->A);*/ - - if (D2 && D2->A && !ACTION(1)) - D1->A=concatenate_aln (D1->A, D2->A, ACTION(1)); - else if (ACTION(1) && is_aln(ACTION(1))) - { - Alignment *B; - int n=1; - - while (ACTION(n)) - { - - B=main_read_aln (ACTION(n), NULL); - D1->A=concatenate_aln (D1->A, B, NULL); - n++; - } - D1->S=aln2seq(D1->A); - } - - else - { - Alignment *A, *B; - - A=main_read_aln ((D1->A)->name[0], NULL); - - for ( a=1; a<(D1->A)->nseq; a++) - { - B=main_read_aln ((D1->A)->name[a], NULL); - A=concatenate_aln (A, B, ACTION(1)); - - } - D1->A=A; - D1->S=aln2seq(D1->A); - } - } - - else if ( strm ( action, "msalist2cat_pwaln")) - { - int a, b, c; - int sim, min, max; - - if (n_actions!=3) - { - min=0; - max=100; - } - else - { - min=atoi(action_list[1]); - max=atoi(action_list[2]); - } - - fprintf ( stdout, ">A\n"); - for (a=0;a<(D1->S)->nseq; a++) - { - Alignment *A; - HERE ("process %s", (D1->S)->name[a]); - A=main_read_aln((D1->S)->name[a],NULL); - for (b=0; bnseq-1; b++) - { - for ( c=b+1; cnseq; c++) - { - sim=get_seq_sim (A->seq_al[b], A->seq_al[c], "-", ""); - if (sim>=min && sim<=max)fprintf (stdout, "xxx%s", A->seq_al[b]); - } - } - free_aln (A); - } - fprintf ( stdout, "\n>B\n"); - for (a=0;a<(D1->S)->nseq; a++) - { - Alignment *A; - HERE ("process %s", (D1->S)->name[a]); - A=main_read_aln((D1->S)->name[a],NULL); - for (b=0; bnseq-1; b++) - { - for ( c=b+1; cnseq; c++) - { - sim=get_seq_sim (A->seq_al[b], A->seq_al[c], "-", ""); - if (sim>=min && sim<=max)fprintf (stdout, "xxx%s", A->seq_al[c]); - } - } - free_aln (A); - } - - fprintf ( stdout, "\n"); - exit (EXIT_SUCCESS); - } - - else if ( strm (action, "collapse_tree")) - { - D1->T=tree2collapsed_tree (D1->T, n_actions-1, action_list+1); - } - else if ( strm (action, "collapse_aln")) - { - D1->A=aln2collapsed_aln (D1->A, n_actions-1, action_list+1); - } - else if ( strm (action, "extract_aln")) - { - D1->A=aln2sub_aln_file (D1->A, n_actions-1, action_list+1); - myexit (EXIT_SUCCESS); - } - - - - else if ( strm (action, "remove_aa")) - { - int pos,len, n; - pos=atoi(action_list[1]); - len=atoi(action_list[2]); - n=atoi (action_list[3]); - if ( atoi (action_list[4])==1)len=-len; - if (pos && n>1) - { - fprintf ( stderr, "\nWARNING: rm_aa, position (pos) and iteration number (n) simulatneously defined. Iteration number reset to 1 [%s]\n", PROGRAM); - n=1; - } - for ( a=0; a< n; a++) - D1->A=probabilistic_rm_aa (D1->A, pos, len); - } - else if ( strm (action, "remove_nuc")) - { - int pos; - pos=atoi(action_list[1]); - - if ( pos>3 || pos<1) - printf_exit (EXIT_FAILURE, stderr, "Remove_nuc: indicate a number between 1 and 3\n"); - - pos--; - for ( c=0,a=0; a<(D1->A)->len_aln; a++, c++) - { - if (c==3)c=0; - for (b=0; b<(D1->A)->nseq; b++) - { - if (c==pos) - { - (D1->A)->seq_al[b][a]='-'; - } - } - } - - D1->S=aln2seq (D1->A); - } - - else if (strm ( action, "conserved_positions")) - { - Alignment *A; - int a, b, c; - int *cache=NULL; - - - A=D1->A; - for ( a=0; a< A->nseq && !cache; a++) - { - if ( strm (action_list[1], A->name[a])) - { - cache=vcalloc ( A->len_aln+1, sizeof (int)); - for ( c=0,b=0; blen_aln; b++) - { - if ( is_gap (A->seq_al[a][b]))cache[b]=-1; - else cache[b]=++c; - } - } - } - - for ( a=0; a< A->len_aln; a++) - { - r1=A->seq_al[0][a]; - if ( is_gap(r1))continue; - for ( c=0,b=0; bnseq; b++) - { - r2=A->seq_al[b][a]; - c+=(r1==r2)?1:0; - } - if ( (c*100)/A->nseq>=atoi(action_list[2])) - fprintf ( stdout, "COL: %d Res: %c %s %d\n", a+1, r1, action_list[1], cache[a]+atoi(action_list[3])); - } - exit (EXIT_FAILURE); - } - else if (strm ( action, "extract_block") ) - { - - BUF=copy_aln (D1->A, NULL); - if ( check_file_exists(action_list[1])) - BUF=extract_aln3(BUF,action_list[1]); - else - BUF=extract_aln2(BUF,atoi(action_list[2]),atoi(action_list[3]),action_list[1]); - D1->A=copy_aln (BUF,D1->A); - - } - else if ( strm ( action, "extract_pos_list")) - { - D1->A=alnpos_list2block (D1->A, n_actions-1, action_list+1); - } - else if ( strm ( action, "seq2msa")) - { - D1->A=simple_progressive_aln ( D1->S, NULL, NULL, action_list[1]); - } - else if ( strm ( action, "realign_block") ) - { - D1->A=realign_block ( D1->A, atoi (action_list[1]), atoi (action_list[2]), (n_actions==4)?action_list[3]:NULL); - } - else if ( strm (action, "extract_seq")) - { - int is_file; - if ( check_file_exists (action_list[1])) - { - is_file=1; - BUFS=main_read_seq (action_list[1]); - action_list=BUFS->name; - n_actions=BUFS->nseq; - } - else - { - is_file=0; - action_list++; - n_actions--; - } - - for ( a=0; a< n_actions;) - { - s=action_list[a]; - - if ( n_actions==1 || is_file==1) - { - start=1; - end=0; - a+=1; - } - else - { - - start=(strm2 (s,"#","*"))?1:(atoi(action_list[a+1])); - end= (strm2 (action_list[a+2],"#","*"))?0:(atoi(action_list[a+2])); - a+=3; - } - - if ( strm2 (s, "#", "*")) - { - OUT_S=extract_one_seq((D1->A)->name[0],start, end, D1->A, RAD->keep_name); - for (b=1; b< (D1->A)->nseq; b++) - { - NS=extract_one_seq((D1->A)->name[b],start, end, D1->A, RAD->keep_name); - if (count_n_res_in_array(NS->seq[0], -1)) - OUT_S=add_sequence ( NS,OUT_S, 0); - } - } - else - { - if ( a==1)OUT_S=extract_one_seq(s,start, end, D1->A, RAD->keep_name); - else - { - NS=extract_one_seq(s,start, end, D1->A, RAD->keep_name); - OUT_S=add_sequence ( NS,OUT_S, 0); - } - } - } - D1->S=OUT_S; - free_aln (D1->A); - D1->A=declare_Alignment(D1->S); - seq2aln (D1->S, D1->A, RAD->rm_gap); - } - - else if ( strm (action, "extract_seq_list")) - { - if ( check_file_exists (action_list[1])) - { - - BUFS=main_read_seq (action_list[1]); - action_list=BUFS->name; - n_actions=BUFS->nseq; - } - else - { - action_list++; - n_actions--; - } - - for ( a=0; a< n_actions;a++) - { - NS=extract_one_seq(action_list[a],1,0, D1->A, KEEP_NAME); - OUT_S=add_sequence ( NS,OUT_S, 0); - } - - D1->S=OUT_S; - free_aln (D1->A); - D1->A=declare_Alignment(D1->S); - seq2aln (D1->S, D1->A, RAD->rm_gap); - } - else if ( strm (action, "remove_seq") || strm (action, "rm_seq")) - { - char *buf; - char **list; - int n; - int l; - - list=declare_char ((D1->S)->nseq, 200); - - buf=vcalloc ((D1->S)->max_len+1, sizeof (char)); - for ( n=0,a=0; a< (D1->A)->nseq; a++) - { - - sprintf (buf, "%s", (D1->S)->seq[a]); - ungap (buf); - l=strlen(buf); - - for (c=1, b=1; b< n_actions; b++) - { - if ( strm (action_list[b], (D1->S)->name[a])){(D1->S)->seq[a]=NULL;break;} - else if ( strm (action_list[b], "empty") && l==0) - { - fprintf ( stderr, "WARNING: Sequence %s does not contain any residue: automatically removed from the set [WARNING:%s]\n",(D1->S)->name[a], PROGRAM); - (D1->S)->seq[a]=NULL;break; - } - else if ( strm (action_list[b], "unique")) - { - if ( name_is_in_list ((D1->S)->name[a], list,n, 100)!=-1) - { - (D1->S)->seq[a]=NULL;break; - } - else - { - sprintf ( list[n++], "%s", (D1->S)->name[a]); - } - } - } - } - D1->S=duplicate_sequence (D1->S); - free_aln (D1->A); - free_char ( list, -1); - D1->A=declare_Alignment(D1->S); - seq2aln (D1->S, D1->A, RAD->rm_gap); - } - - else if ( strm (action, "aln2overaln")|| strm (action,"overaln_param")) - { - //mode (lower|number|uanlign) Penalty (0-100) Thresold (0-9) - int p1,p2,p3,f, t; - char *s; - int eb=0; - char clean_mode[100]; - OveralnP *F; - - F=vcalloc (1, sizeof (OveralnP)); - if ( D2 && D2->A) - { - D1->A=mark_exon_boundaries (D1->A, D2->A); - eb=1; - } - else if ( get_string_variable ("exon_boundaries")) - { - Sequence *S; - Alignment *EB; - EB=seq2aln(S=main_read_seq(s),NULL, 0); - D1->A=mark_exon_boundaries (D1->A, EB); - free_sequence (S, S->nseq); free_aln (EB); - eb=1; - } - - - if (ACTION(1)==NULL)sprintf (F->mode, "lower"); - else if (strstr (ACTION(1), "h")) - { - fprintf ( stdout, "aln2unalign lower|number|unalign F P1 P2 P3 T\n"); - exit (EXIT_SUCCESS); - } - else sprintf (F->mode, "%s", ACTION(1)); - - F->t=ATOI_ACTION(2); - F->f=ATOI_ACTION(3); - F->p1=ATOI_ACTION(4); - F->p2=ATOI_ACTION(5); - F->p3=ATOI_ACTION(6); - F->p3=ATOI_ACTION(7); - - if (int_variable_isset ("overaln_target"))f=get_int_variable ("overaln_target"); - if (int_variable_isset ("overaln_threshold"))t=get_int_variable ("overaln_threshold"); - if (eb)sprintf (F->model, "fsa2"); - else sprintf (F->model, "fsa1"); - - D1->A=aln2clean_pw_aln (D1->A, F); - - } - else if ( strm (action,"aln2unalign")) - { - Alignment *SA; - Sequence *SS; - - SS=aln2seq(SA); - SA=copy_aln (D1->A, NULL); - thread_seq_struc2aln (SA, SS); - D1->A=unalign_aln (D1->A,SA, ATOI_ACTION(1)); - D1->S=aln2seq ( D1->A); - } - else if ( strm (action, "clean_cdna")) - { - Alignment *A; - A=D1->A; - for (a=0; a< A->nseq; a++) - { - char *d, *buf, f; - - d=A->seq_al[a]; - f=get_longest_frame (d, 3); - buf=vcalloc ( strlen (d)+1, sizeof (char)); - sprintf (buf, "%s", d+f); - sprintf (d, "%s", buf); - vfree (buf); - } - } - else if ( strm (action, "clean_cdna2")) - { - D1->A=clean_cdna_aln ( D1->A); - free_sequence ( D1->S, (D1->S)->nseq); - D1->S=aln2seq ( D1->A); - } - else if ( strm (action, "aln2short_aln")) - { - D1->A=aln2short_aln (D1->A, action_list[1], action_list[2], atoi(action_list[3])); - free_sequence ( D1->S, (D1->S)->nseq); - D1->S=aln2seq ( D1->A); - } - else if ( strm ( action, "complement")) - { - D1->A=complement_aln (D1->A); - free_sequence ( D1->S, (D1->S)->nseq); - D1->S=aln2seq ( D1->A); - } - else if ( strm ( action, "translate")) - { - D1->A=translate_dna_aln( D1->A,(n_actions==1)?0:atoi(action_list[1])); - free_sequence ( D1->S, (D1->S)->nseq); - D1->S=aln2seq ( D1->A); - } - else if (strm2 ( action, "back_translate","backtranslate")) - { - D1->A=back_translate_dna_aln( D1->A); - free_sequence ( D1->S, (D1->S)->nseq); - D1->S=aln2seq ( D1->A); - } - else if (strm ( action, "rotate")) - { - D1->A=rotate_aln( D1->A, action_list[1]); - free_sequence ( D1->S, (D1->S)->nseq); - D1->S=aln2seq ( D1->A); - } - else if (strm ( action, "invert")) - { - D1->A=invert_aln( D1->A); - free_sequence ( D1->S, (D1->S)->nseq); - D1->S=aln2seq ( D1->A); - } - else if (strm ( action, "code_dna_aln")) - { - D1->A=code_dna_aln( D1->A); - free_sequence ( D1->S, (D1->S)->nseq); - D1->S=aln2seq ( D1->A); - } - - else if ( strm ( action, "mutate")) - { - D1->A=mutate_aln( D1->A,(n_actions==1)?"0":action_list[1]); - free_sequence ( D1->S, (D1->S)->nseq); - D1->S=aln2seq (D1->A); - } - else if ( strm ( action, "thread_profile_on_msa")) - { - (D1->A)->S=NULL; - D1->A=thread_profile_files2aln (D1->A, action_list[1], NULL); - D1->S=aln2seq(D1->A); - } - else if ( strm ( action, "thread_dna_on_prot_aln")) - { - D1->A=thread_dnaseq_on_prot_aln (D1->S, D2->A); - free_sequence (D1->S,(D1->S)->nseq); - D1->S=aln2seq (D1->A); - } - else if ( strm ( action, "thread_struc_on_aln")) - { - thread_seq_struc2aln ( D2->A, D1->S); - D1->A=copy_aln(D2->A, D1->A); - free_sequence ( D1->S, (D1->S)->nseq); - D1->S=aln2seq (D1->A); - } - else if ( strm (action, "sim_filter")) - { - D1->A=sim_filter (D1->A, action_list[1], ACTION (2)); - free_sequence (D1->S,(D1->S)->nseq); - D1->S=aln2seq (D1->A); - } - else if ( strm (action, "seq2blast")) - { - D1->A=seq2blast (D1->S); - free_sequence (D1->S,(D1->S)->nseq); - D1->S=aln2seq (D1->A); - } - - else if ( strm (action, "trim")) - { - D1->A=simple_trimseq (D1->A,(D2)?D2->A:NULL, action_list[1], ACTION (2)); - - free_sequence (D1->S,(D1->S)->nseq); - D1->S=aln2seq (D1->A); - } - - else if (strm ( action, "trimTC")) - { - value=(n_actions==1)?10:atoi(action_list[1]); - - D1->A=tc_trimseq(D1->A,D1->S,action_list[1]); - free_sequence (D1->S,(D1->S)->nseq); - D1->S=aln2seq (D1->A); - } - else if (strm ( action, "trimTC2")) - { - char *group_file; - Alignment *B=NULL; - char trim_mode[100]; - if ( n_actions==1 || !(strm (action_list[1], "NSEQ") ||strm (action_list[1], "MINID")) ) - { - fprintf ( stderr, "\nTrimTC2 ()\n"); - myexit (EXIT_FAILURE); - } - sprintf (trim_mode, "%s", action_list[1]);action_list+=2; n_actions-=2; - - if ( strm ( trim_mode, "NSEQ")) - { - group_file=tree2Ngroup( (D1)?D1->A:NULL, (D2)?D2->T:NULL, atoi (action_list[0]), vtmpnam(NULL), (n_actions==1)?"idmat":action_list[1]); - } - else - { - group_file=tree2Ngroup( (D1)?D1->A:NULL, (D2)?D2->T:NULL, -1*atoi (action_list[0]), vtmpnam(NULL), (n_actions==1)?"idmat":action_list[1]); - } - - B=copy_aln (D1->A, B); - B=aln2sub_aln_file (B,1,&group_file); - B=aln2sub_seq (B, 1, &group_file); - D1->A=extract_sub_aln2 (D1->A, B->nseq, B->name); - } - else if ( strm (action, "chain")) - { - D1->A=seq2seq_chain (D1->A,D2->A, ACTION(2)); - } - - - else if (strm ( action, "master_trim")) - { - value=(n_actions==1)?10:atoi(action_list[1]); - - D1->A=master_trimseq(D1->A,D1->S,action_list[1]); - free_sequence (D1->S,(D1->S)->nseq); - D1->S=aln2seq (D1->A); - } - else if ( strm (action, "force_aln")) - { - char ***rlist=NULL; - int count=0; - - if ( n_actions==2) - { - if (!is_lib_02(action_list[1])) - { - fprintf ( stderr, "\nERROR: force_aln requires files in TC_LIB_FORMAT_02 [FATAL:%s]", PROGRAM); - myexit (EXIT_FAILURE); - } - else - rlist=file2list (action_list[1], " "); - } - else - { - rlist=declare_arrayN(3, sizeof (char),3,7, 10); - - strcat (rlist[1][1],action_list[1]);strcat (rlist[1][3],action_list[2]); - strcat (rlist[1][4],action_list[3]);strcat (rlist[1][6],action_list[4]); - sprintf ( rlist[2][0], "-1"); - } - count=1; - while (rlist[count] && atoi(rlist[count][0])!=-1) - { - char st1[100], st2[100], st3[100], st4[100]; - - sprintf ( st1, "%s", rlist[count][1]);sprintf ( st2, "%s", rlist[count][3]); - sprintf ( st3, "%s", rlist[count][4]);sprintf ( st4, "%s", rlist[count][6]); - fprintf ( stderr, "\nFORCE: %s %s %s %s", st1, st2, st3, st4); - - if (is_number (st1))s1=atoi (st1)-1; - else s1=name_is_in_list (st1,(D1->A)->name, (D1->A)->nseq, 100); - if ( s1<0 || s1>= (D1->A)->nseq)crash ("wrong sequence index"); - r1=atoi (st2)-1; - - if (is_number (st3))s2=atoi (st3)-1; - else s2=name_is_in_list (st3,(D1->A)->name, (D1->A)->nseq, 100); - if ( s2<0 || s2>= (D1->A)->nseq)crash ("wrong sequence index"); - r2=atoi (st4)-1; - - (D1->A)=add_constraint2aln ((D1->A), s1, r1, s2, r2); - count++; - } - fprintf ( stderr, "\n"); - free_arrayN((void*)rlist,3); - } - - else if (strm ( action, "grep")) - { - D1->A=grep_seq (D1->A, ACTION(1),ACTION(2), ACTION(3)); - if (D1->A==NULL) myexit (EXIT_SUCCESS); - else D1->S=aln2seq (D1->A); - } - - else if (strm (action, "find")) - { - int r, l; - char *search_string; - - search_string=vcalloc ( 30, sizeof (char)); - if ( strm (action_list[1], "lower"))sprintf ( search_string, "abcdefghijklmnopqrstuvwxyz"); - else if ( strm ( action_list[1], "upper"))sprintf ( search_string, "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); - else - { - vfree (search_string);search_string=vcalloc ( strlen (action_list[1])+1, sizeof (char)); - sprintf (search_string, "%s", action_list[1]); - } - - for (a=0; a<(D1->A)->nseq; a++) - for ( l=0,b=0; b< (D1->A)->len_aln; b++) - { - r=(D1->A)->seq_al[a][b]; - l+=!is_gap(r); - if ( r!='\0' && strrchr (search_string, r)) - { - /*fprintf ( stdout, "%-15s res %c alnpos %4d seqpos %4d\n", (D1->A)->name[a], r, b+1, l);*/ - fprintf ( stdout, "%s %d %d\n", (D1->A)->name[a], l, l+1); - } - } - myexit (EXIT_SUCCESS); - } - else if ( strm (action, "merge_annotation")) - { - D1->A=merge_annotation (D1->A, DST?DST->A:NULL, ACTION(1)); - D1->S=aln2seq (D1->A); - } - else if ( strm (action, "color_residue")) - { - int i; - Alignment *A; - A=D1->A; - - DST->A=copy_aln (D1->A, NULL); - DST->S=aln2seq (DST->A); - for (a=0; a< (DST->S)->nseq; a++)ungap ((DST->S)->seq[a]); - - if (n_actions>2) - { - for (a=1; aA)->name, (D1->A)->nseq, 100); - if (i!=-1) - { - (DST->S)->seq[i][atoi(action_list[a+1])-1]='0'+atoi(action_list[a+2])-1; - } - else fprintf (stderr, "\nWARNING: Could not find Sequence %s", action_list[a]); - } - } - else - { - char name[1000]; - int pos, val; - FILE *fp; - - fp=vfopen (action_list[1], "r"); - while (fscanf (fp, "%s %d %d\n", name, &pos, &val)==3) - { - - i=name_is_in_list(name, (D1->A)->name, (D1->A)->nseq, 100); - if (i!=-1)(DST->S)->seq[i][pos-1]='0'+val; - else fprintf (stderr, "\nWARNING: Could not find Sequence %s", action_list[a]); - } - vfclose (fp); - } - DST->A=seq2aln (DST->S, NULL, 1); - } - else if ( strm (action, "edit_residue")) - { - FILE *fp; - int i, pos; - int **p; - char mod[100], name[100]; - Alignment *A; - - A=D1->A; - - p=aln2inv_pos (A); - if (n_actions>2) - { - for (a=1; aA)->name, (D1->A)->nseq, 100); - if (i!=-1) - { - pos=atoi(action_list[a+1]); - - pos=p[i][pos]-1; - sprintf (mod, "%s", action_list[a+2]); - if ( strm (mod, "upper"))(D1->A)->seq_al[i][pos]=toupper((D1->A)->seq_al[i][pos]); - else if ( strm (mod, "lower"))(D1->A)->seq_al[i][pos]=tolower((D1->A)->seq_al[i][pos]); - else (D1->A)->seq_al[i][pos]=mod[0]; - } - else fprintf (stderr, "\nWARNING: Could not find Sequence %s", action_list[a]); - - } - } - else - { - fp=vfopen (action_list[1], "r"); - while (fscanf (fp, "%s %d %s\n", name, &pos, mod)==3) - { - - i=name_is_in_list(name, (D1->A)->name, (D1->A)->nseq, 100); - if (i!=-1) - { - pos=p[i][pos]-1; - if ( strm (mod, "upper"))(D1->A)->seq_al[i][pos]=toupper(A->seq_al[i][pos]); - else if ( strm (mod, "lower"))A->seq_al[i][pos]=tolower(A->seq_al[i][pos]); - else A->seq_al[i][pos]=mod[0]; - } - else fprintf(stderr, "\nWARNING: Could not find Sequence %s", action_list[a]); - } - vfclose (fp); - } - D1->S=aln2seq (D1->A); - } - else if ( strm (action, "clean_flag")) - { - clean_flag=1-clean_flag; - } - else if ( strm (action, "aln2case")) - { - D1->A=aln2case_aln (D1->A, ACTION(1), ACTION(2)); - D1->S=aln2seq(D1->A); - } - - else if ( strm5 (action, "convert","upper","lower", "keep", "switchcase")) - { - b=1; - - if ( n_actions>1 && is_number (action_list[b])) - { - lower_value=upper_value=atoi(action_list[b++]); - } - else if ( n_actions>1 && strm (action_list[b], "gap")) - { - DST=vcalloc (1,sizeof(Sequence_data_struc)); - DST->A=aln2gap_cache (D1->A,0); - lower_value=0; - upper_value=0; - b++; - } - else if (n_actions>1 && action_list[b] && action_list[b][0]=='[') - - { - lower_value=atoi(strtok (action_list[b]+1, "-[]")); - upper_value=atoi(strtok (NULL, "-[]")); - - b++; - } - else - { - lower_value=upper_value=-1; - } - - if ( n_actions >b ||strm (action, "keep") ) - { - if ( !RAD->symbol_list)RAD->symbol_list=declare_char (STRING, STRING); - RAD->n_symbol=0; - if ( strm (action, "keep") )sprintf ( RAD->symbol_list[RAD->n_symbol++], "#-"); - else - { - for (a=b; a< n_actions; a++) - { - sprintf ( RAD->symbol_list[RAD->n_symbol], "%s", action_list[a]); - RAD->n_symbol++; - } - } - } - - for ( value=0; value<=9; value++) - { - if ( lower_value==-1)value=-1; - - if ( (value>=lower_value && value<=upper_value)|| value==-1) - { - if (strm(action,"convert")) D1->A=filter_aln_convert (D1->A, DST?DST->A:NULL,RAD->use_consensus,value,RAD->n_symbol, RAD->symbol_list); - else if (strm(action,"upper"))D1->A=filter_aln_lower_upper (D1->A, DST?DST->A:NULL,RAD->use_consensus,value); - else if (strm(action,"lower"))D1->A=filter_aln_upper_lower (D1->A, DST?DST->A:NULL,RAD->use_consensus,value); - else if (strm(action,"switchcase"))D1->A=filter_aln_switchcase (D1->A, DST?DST->A:NULL,RAD->use_consensus,value); - } - else - { - if (strm(action,"keep")) D1->A=filter_aln_convert (D1->A, DST?DST->A:NULL,RAD->use_consensus,value,RAD->n_symbol, RAD->symbol_list); - } - if (value==-1)break; - - } - - /*free_sequence (D1->S,(D1->S)->nseq);*/ - if (!D1->S)D1->S=aln2seq (D1->A); - } - else if ( strm ( action, "count_pairs")) - { - int a, b,c,v, **matrix; - Alignment *A; - matrix=declare_int (300,300); - A=D1->A; - for ( a=0; a< A->nseq-1; a++) - for (b=0; b< A->nseq; b++) - for (c=0; clen_aln; c++) - matrix[(int)A->seq_al[a][c]][(int)A->seq_al[b][c]]++; - for ( a=0; a<255; a++) - for ( b=a; b<256; b++) - { - v=matrix[a][b]+matrix[b][a]; - if (v)fprintf ( stdout, "\n%c %c %d", a, b, v); - } - exit (EXIT_SUCCESS); - } - else if ( strm (action, "count_misc")) - { - count_misc (D1->A, (!D2)?NULL:D2->A); - } - else if ( strm (action, "count")) - { - b=1; - if ( n_actions>1 && is_number (action_list[b])) - { - lower_value=upper_value=atoi(action_list[b++]); - } - else if (n_actions>1 && action_list[b] && action_list[b] && action_list[b][0]=='[') - - { - lower_value=atoi(strtok (action_list[b]+1, "-[]")); - upper_value=atoi(strtok (NULL, "-[]")); - - b++; - } - else - { - lower_value=upper_value=-1; - } - if ( n_actions >b) - { - if ( !RAD->symbol_list)RAD->symbol_list=declare_char (STRING, STRING); - RAD->n_symbol=0; - for (a=b; a< n_actions; a++) - { - sprintf ( RAD->symbol_list[RAD->n_symbol], "%s", action_list[a]); - RAD->n_symbol++; - } - } - for ( value=lower_value; value<=upper_value; value++) - { - count_table=count_in_aln (D1->A, DST?DST->A:NULL,value,RAD->n_symbol, RAD->symbol_list, count_table); - } - for ( a=0; an_symbol; a++) - { - fprintf ( stdout, "%s %d\n", RAD->symbol_list[a], count_table[a]); - } - free_sequence (D1->S,(D1->S)->nseq); - D1->S=aln2seq (D1->A); - vfree(count_table); - exit(EXIT_SUCCESS); - } - else if ( strm (action, "msa_weight")) - { - int random_value; - char command [LONG_STRING]; - char aln_name[FILENAMELEN]; - char tree_name[FILENAMELEN]; - char dist_matrix_name[FILENAMELEN]; - char weight_name[FILENAMELEN]; - char method_4_msa_weights[1000]; - - if ( n_actions==1) - { - fprintf ( stderr, "\nError: msa_weight requires a weight_method"); - } - - sprintf ( method_4_msa_weights, "%s", (get_env_variable ("METHOD_4_MSA_WEIGHTS",NO_REPORT))?get_env_variable ("METHOD_4_MSA_WEIGHTS",NO_REPORT):METHOD_4_MSA_WEIGHTS); - - /*1 Computation of the tree and the distance matrix*/ - random_value=addrand ((unsigned long) 100000)+1; - sprintf (aln_name, "%d.aln", random_value); - sprintf (tree_name, "%d.ph", random_value); - sprintf (dist_matrix_name, "%d.dst", random_value); - sprintf (weight_name, "%d.weight", random_value); - output_fasta_aln (aln_name, D1->A); - - sprintf ( command, "clustalw -infile=%s -tree -outputtree=dist %s", aln_name, TO_NULL_DEVICE); - my_system ( command); - sprintf ( command, "%s -method %s -aln %s -tree %s -dmatrix %s -weightfile %s %s",method_4_msa_weights, action_list[1],aln_name, tree_name, dist_matrix_name,weight_name, TO_NULL_DEVICE); - my_system ( command); - - (D1->A)->S=aln2seq (D1->A); - ((D1->A)->S)->W=read_seq_weight ( (D1->A)->name, (D1->A)->nseq,weight_name); - vremove (weight_name); - vremove (aln_name); - vremove (tree_name); - vremove (dist_matrix_name); - } - else if ( strm (action, "pavie_seq2random_seq")) - { - D1->S=pavie_seq2random_seq (D1->S, action_list[1]); - D1->A=seq2aln (D1->S,NULL,1); - } - else if ( strm ( action, "pavie_seq2noisy_seq")) - { - /* ()*/ - - D1->S=pavie_seq2noisy_seq (D1->S, atoi(action_list[1]),ACTION(2)); - D1->A=seq2aln (D1->S,NULL,1); - } - else if ( strm (action, "pavie_seq2pavie_mat")) - { - - pavie_seq2trained_pavie_mat ( D1->S, (n_actions==2)?action_list[1]:NULL); - myexit (EXIT_SUCCESS); - } - else if ( strm (action, "pavie_seq2pavie_aln")) - { - - pavie_seq2pavie_aln ( D1->S, action_list[1], ACTION(2)); - myexit (EXIT_SUCCESS); - } - else if ( strm (action, "pavie_seq2pavie_dm")) - { - if (strstr (ACTION2(2,""), "_MSA_")) - D1->S=aln2seq_main(D1->A, KEEP_GAP); - - - pavie_seq2pavie_aln ( D1->S, action_list[1],(n_actions==3)?action_list[2]:"_MATDIST_"); - myexit (EXIT_SUCCESS); - } - else if ( strm (action, "pavie_seq2pavie_msa")) - { - D1->A=pavie_seq2pavie_msa ( D1->S, action_list[1], (n_actions==3)?action_list[2]:NULL); - } - else if ( strm (action, "pavie_seq2pavie_tree")) - { - D1->T=pavie_seq2pavie_tree ( D1->S, action_list[1], (n_actions==3)?action_list[2]:NULL); - } - else if ( strm (action, "pavie_seq2pavie_sort")) - { - D1->A=pavie_seq2pavie_sort ( D1->S, action_list[1], (n_actions==3)?action_list[2]:NULL); - } - - else if ( strm (action, "aln2mat_diaa")) - { - aln2mat_diaa (D1->S); - } - else if ( strm (action, "aln2mat")) - { - aln2mat(D1->S); - } - - else if ( strm (action, "seq2latmat")) - { - seq2latmat ( D1->S, "stdout"); - myexit (EXIT_SUCCESS); - } - else if ( strm (action , "rm_target_pdb")) - { - int i, j; - char *buf; - - for (i=0; i< (D1->A)->nseq; i++) - { - j=1;buf=(D1->A)->name[i]; - while (buf[j]!='_' && buf[j-1]!='_' && buf[j]!='\0')j++; - buf[j]='\0'; - } - } - else if ( strm ( action, "mat2cmp")) - { - double *r; - r=mat2cmp (D1->M, D2->M); - fprintf ( stdout, "\nMATRIX COMPARISON: R=%.3f R2=%.3f On %d pairs of values\n", (float)r[0], (float)r[1], (int)r[2]); - myexit (EXIT_SUCCESS); - } -//Special modes - else if ( strm ( action, "overaln_list")) - { - float *re, tre=0,sn, tsn=0, sp, tsp=0; - int p1,p2,p3, t, f; - FILE *fp; - char fname [100]; - Alignment **LA; - Alignment **LB; - - HERE ("F P1 P2 P3 T"); - - t=ATOI_ACTION(1); - f=ATOI_ACTION(2); - p1=ATOI_ACTION(3); - p2=ATOI_ACTION(4); - p3=ATOI_ACTION(5); - - - - LA=vcalloc ((D1->A)->nseq, sizeof (Alignment*)); - LB=vcalloc ((D2->A)->nseq, sizeof (Alignment*)); - for (a=0; a<(D1->A)->nseq; a++) - { - LA[a]=main_read_aln ((D1->A)->name[a], NULL); - LB[a]=main_read_aln ((D2->A)->name[a], NULL); - } - - for ( a=0; a<(D1->A)->nseq; a++) - { - Alignment *A, *B; - A=LA[a]; - B=LB[a]; - re=analyze_overaln (A, B, "_case_l_",t,f,p1,p2,p3); - fprintf (stdout, "\n%d: sn: %.2f sp: %.2f re: %.2f F: %d P: %d P2: %d T: %d",a, re[0],re[1],re[2],f, p1,p2,t); - tsn+=re[0]; - tsp+=re[1]; - tre+=re[2]; - vfree(re); - } - fprintf (stdout, "\nTOT: sn: %.2f sp: %.2f re: %.2f F: %d P: %d P2: %d T: %d", tsn/(D1->A)->nseq,tsp/(D1->A)->nseq, tre/(D1->A)->nseq,f,p1,p2,t); - - exit (0); - } - else if ( strm ( action, "overaln_list_scan")) - { - float *re, tre=0, tsn=0, tsp; - int p1,p2, p3, t, f; - FILE *fp; - char fname [100]; - Alignment **LA; - Alignment **LB; - - if ( ACTION(1))sprintf ( fname, "%s", ACTION(1)); - else sprintf ( fname, "scan_results.txt"); - - fprintf ( stdout, "SCAN Results will be ouput in %s\n", fname); - - - LA=vcalloc ((D1->A)->nseq, sizeof (Alignment*)); - LB=vcalloc ((D2->A)->nseq, sizeof (Alignment*)); - for (a=0; a<(D1->A)->nseq; a++) - { - LA[a]=main_read_aln ((D1->A)->name[a], NULL); - LB[a]=main_read_aln ((D2->A)->name[a], NULL); - } - for (f=32; f<=40; f++) - { - for (p1=90; p1<=100; p1+=5) - { - for ( t=1; t<=3; t++) - { - for (p2=0; p2<=40; p2+=5) - { - for (p3=0;p3<=0;p3+=5) - { - tre=tsn=tsp=0; - for ( a=0; a<(D1->A)->nseq; a++) - { - Alignment *A, *B; - A=LA[a]; - B=LB[a]; - re=analyze_overaln (A, B, "_case_l_",t,f,p1,p2,p3); - - tsn+=re[0]; - tsp+=re[1]; - tre+=re[2]; - vfree (re); - } - fp=vfopen (fname, "a"); - fprintf (fp, "\nTOT: sn: %.2f sp: %.2f re: %.2f P: %d P2: %d P3: %d T: %d F: %d", tsn/(D1->A)->nseq,tsp/(D1->A)->nseq, tre/(D1->A)->nseq, p1,p2, p3,t,f); - fprintf (stderr, "\nTOT: sn: %.2f sp: %.2f re: %.2f P: %d P2: %d P3: %d T: %d F: %d", tsn/(D1->A)->nseq,tsp/(D1->A)->nseq, tre/(D1->A)->nseq, p1,p2, p3,t,f); - vfclose (fp); - } - } - } - } - } - exit (0); - } - else if ( strm ( action, "overaln"))//Evaluate the capacity to predict over-aligned regions - { - OveralnP *F; - F=vcalloc (1, sizeof (OveralnP)); - //al1: ref - //al2: alignment - //ATOI(1): P (0-100) - //ATOI(2): T (0-9) - - float *r; - DST=vcalloc (1,sizeof(Sequence_data_struc)); - DST->A=aln2gap_cache (D1->A,0); - lower_value=0; - upper_value=0; - D1->A=filter_aln_upper_lower (D1->A, DST->A, 0, 0); - - sprintf (F->mode, "%s", ((s=get_string_variable ("overaln_mode")))?s:"lower"); - if (!strm (F->mode, "lower") && !strm (F->mode, "unalign"))printf_exit (EXIT_FAILURE,stderr,"\nERROR: unknown overal_mode in overal output [%s] [FATAL:%s]", F->mode, PROGRAM); - - if (int_variable_isset ("overaln_threshold"))F->t=get_int_variable ("overaln_threshold"); - if (int_variable_isset ("overaln_target"))F->f=get_int_variable ("overaln_target"); - if (int_variable_isset ("overaln_P1"))F->f=get_int_variable ("overaln_P1"); - if (int_variable_isset ("overaln_P1"))F->f=get_int_variable ("overaln_P2"); - if (int_variable_isset ("overaln_P1"))F->f=get_int_variable ("overaln_P3"); - if (int_variable_isset ("overaln_P1"))F->f=get_int_variable ("overaln_P4");//F P1 P2 P3 T; - - D2->A=aln2clean_pw_aln (D2->A, F); - r=aln2pred (D1->A, D2->A,"case_l_"); - fprintf ( stdout, "sn %.2f sp %.2f re %.2f\n", r[0], r[1], r[2]); - exit (0); - } - - -//JM_START - else if ( strm ( action, "aln2hitMat")) - { - aln2hitMat(D1->A, ACTION(1)); - myexit (EXIT_SUCCESS); - } -//JM_END - - else - { - fprintf ( stderr, "\nWARNING: ACTION %s UNKNOWN and IGNORED\n", action); - } - } - - -void aln2mat_diaa (Sequence *S) -{ - int a, aa1, aa2, aa3, aa4; - int s1, s2, p; - Alignment *A; - - int ****m; - int **c; - int naa=0; - int count=0; - double Delta=0.00001; - int *alp; - int tot,u; - double observed, expected, f_diaa1, f_diaa2, v; - - - alp=vcalloc (256, sizeof (int)); - for (a=0; a<26; a++)alp[a+'a']=1; - alp['b']=0; - alp['j']=0; - alp['o']=0; - alp['u']=0; - alp['x']=0; - alp['z']=0; - - m=declare_arrayN (4,sizeof (int),26,26,26,26); - c=declare_arrayN (2,sizeof (int),26,26); - - for ( a=0; a< S->nseq; a++) - { - fprintf ( stderr, "%s\n", S->name[a]); - A=main_read_aln (S->name[a],NULL); - for (s1=0; s1nseq; s1++)lower_string (A->seq_al[s1]); - - for ( s1=0; s1nseq-1; s1++) - for (s2=s1+1; s2nseq; s2++) - { - for (p=0; plen_aln-1; p++) - { - - u =alp[aa1=A->seq_al[s1][p]]; - u+=alp[aa2=A->seq_al[s1][p+1]]; - u+=alp[aa3=A->seq_al[s2][p]]; - u+=alp[aa4=A->seq_al[s2][p+1]]; - - if ( u==4) - { - aa1-='a';aa2-='a';aa3-='a'; aa4-='a'; - - c[aa1][aa2]++; - c[aa3][aa4]++; - m[aa1][aa2][aa3][aa4]++; - count+=2; - } - } - } - free_aln (A); - } - fprintf ( stdout, "# DIAA_MATRIX_FORMAT_01\n"); - naa=26; - for (aa1=0; aa10)fprintf ( stdout, "TEST C=%d expected=%.4f observed=%.4f v=%.4f [%d %d %d][%d] tot=%d\n", count, (float)expected, (float)observed, (float) v, c[aa1][aa2], c[aa3][aa4], count, m[aa1][aa2][aa3][aa4], tot); - fprintf ( stdout, "%c%c %c%c %d %d\n", aa1+'a', aa2+'a', aa3+'a', aa4+'a', (int)v, m[aa1][aa2][aa3][aa4]+ m[aa3][aa4][aa1][aa2]); - } - } - exit (EXIT_SUCCESS); -} -void aln2mat (Sequence *S) -{ - int a, aa1, aa3; - int s1, s2, p; - Alignment *A; - - int **m; - int *c; - int naa=0; - int count=0; - double Delta=0.00001; - int *alp; - int tot,u; - double observed, expected, f_diaa1, f_diaa2, v; - - - alp=vcalloc (256, sizeof (int)); - for (a=0; a<26; a++)alp[a+'a']=1; - alp['b']=0; - alp['j']=0; - alp['o']=0; - alp['u']=0; - alp['x']=0; - alp['z']=0; - - m=declare_arrayN (2,sizeof (int),26,26); - c=declare_arrayN (1,sizeof (int),26); - - for ( a=0; a< S->nseq; a++) - { - fprintf ( stderr, "%s\n", S->name[a]); - A=main_read_aln (S->name[a],NULL); - for (s1=0; s1nseq; s1++)lower_string (A->seq_al[s1]); - - for ( s1=0; s1nseq-1; s1++) - for (s2=s1+1; s2nseq; s2++) - { - for (p=0; plen_aln-1; p++) - { - - u =alp[aa1=A->seq_al[s1][p]]; - u+=alp[aa3=A->seq_al[s2][p]]; - - if ( u==2) - { - aa1-='a';aa3-='a'; - - c[aa1]++; - c[aa3]++; - m[aa1][aa3]++; - count+=2; - } - } - } - free_aln (A); - } - fprintf ( stdout, "# MONOAA_MATRIX_FORMAT_01\n"); - naa=26; - for (aa1=0; aa10)fprintf ( stdout, "TEST C=%d expected=%.4f observed=%.4f v=%.4f [%d %d %d][%d] tot=%d\n", count, (float)expected, (float)observed, (float) v, c[aa1][aa2], c[aa3][aa4], count, m[aa1][aa2][aa3][aa4], tot); - fprintf ( stdout, "%c %c %d %d\n", aa1+'a', aa3+'a', (int)v, m[aa1][aa3]+ m[aa3][aa1]); - } - } - exit (EXIT_SUCCESS); -} - - -int **seq2latmat ( Sequence *S, char *fname) -{ - int a, b, r0, r1; - char *aa; - int naa; - int *count, tot; - int **mat; - double observed, expected; - FILE *fp; - - fp=vfopen (fname, "w"); - - count=vcalloc ( 256, sizeof (int)); - mat=declare_int (256, 256); - - naa=strlen ( BLAST_AA_ALPHABET); - aa=vcalloc ( naa+2, sizeof (char)); - sprintf ( aa, "%s", BLAST_AA_ALPHABET); - lower_string (aa); - - for ( tot=0,a=0; a< S->nseq; a++) - { - ungap (S->seq[a]); - for ( b=1; blen[a]; b++) - { - r0=tolower(S->seq[a][b-1]); - r1=tolower(S->seq[a][b]); - - mat[r0][r1]++; - //count[r0]++; - count[r1]++; - tot++; - } - } - for ( a=0; a< naa; a++) - for (b=0; b< naa; b++) - { - if ( aa[a]=='*' || aa[b]=='*'); - else - { - expected=((double)count[(int)aa[a]]/(double)tot)* ((double)count[(int)aa[b]]/(double)tot)*(double)tot; - observed=((double)mat[(int)aa[a]][(int)aa[b]]); - - /* - fprintf ( stderr, "\n%c=%d %c=%d Tot=%d Obs=%d Exp=%d\n", aa[a],count[aa[a]], aa[b],count[aa[b]],tot, mat[aa[a]][aa[b]],(int)expected); - fprintf ( stderr, "\n%d", mat[aa[a]][aa[b]]); - fprintf ( stderr, "\n%d", mat[aa[a]][aa[b]]); - */ - mat[(int)aa[a]][(int)aa[b]]=(expected==0 || observed==0)?0:((int)10*log((observed/expected))); - } - } - - fprintf (fp,"# BLAST_MATRIX FORMAT\n#ALPHABET=%s\n#TRANSITION MATRIX TRAINED ON %d Sequence\n#", BLAST_AA_ALPHABET, S->nseq); - for (a=0; a< naa; a++)fprintf ( fp, "%3c ", toupper(aa[a])); - fprintf (fp,"\n"); - for (a=0; a< naa; a++) - { - - fprintf (fp, "%c", toupper(aa[a])); - for ( b=0; b< naa; b++) - { - fprintf (fp, "%3d ", mat[(int)aa[a]][(int)aa[b]]); - } - fprintf ( fp, "\n"); - } - vfclose (fp); - vfree (count); - vfree (aa); - - return mat; -} - -double* mat2cmp ( int **mat1, int **mat2) -{ - int a, b, n, x, y; - double **list, *r; - if ( !mat1 || !mat2) - { - fprintf ( stderr, "\nERROR: mat2cmp needs two matrices [FATAL:%s]", PROGRAM); - myexit (EXIT_FAILURE); - } - - for (n=0, a=0; a< 256; a++) - for ( b=0; b<256; b++) - { - x=mat1[a][b]; - y=mat2[a][b]; - if ( x|| y)n++; - } - if ( n==0) return 0; - list=declare_double (n, 2); - - for (n=0, a=0; a<256; a++) - for ( b=0; b<256; b++) - { - x=mat1[a][b]; - y=mat2[a][b]; - if ( x || y) - { - list[n][0]=x; - list[n][1]=y; - n++; - } - } - r=return_r (list, n); - free_double(list, -1); - return r; -} - -int ** read_blast_matrix ( char *mat_name) - { - FILE *fp; - int n_aa,aa1, aa2; - int a, b, c; - int **matrix; - int value; - char sbuf[VERY_LONG_STRING]; - char buf[2]; - char alp[257]; - - matrix=declare_int (256,256); - vfree ( matrix[30]); - matrix[30]=vcalloc(10000, sizeof (int)); - fp=vfopen ( mat_name, "r"); - while ( (c=fgetc(fp))=='#' || isspace(c) ) - { - char *p; - fgets ( sbuf, VERY_LONG_STRING, fp); - if ( (p=strstr (sbuf, "ALPHABET"))) - sscanf (p, "ALPHABET=%s", alp); - } - ungetc(c, fp); - lower_string (alp); - n_aa=strlen (alp); - - for ( a=0; a< n_aa; a++) - { - fscanf ( fp, "%s ", buf); - - aa1=tolower(buf[0]); - - if ( aa1!=alp[a]) - { - fprintf ( stderr, "\nParsing_error when reading blast_matrix %s:\n%c %c",mat_name, aa1,alp[a]); - fprintf ( stderr, "\n%c ", fgetc(fp)); - myexit (EXIT_FAILURE); - } - for ( b=0; bnseq; a++) - { - if (S->seq_comment[a] && (s=strstr(S->seq_comment[a], "_FIRSTYEAR"))) - { - sscanf (s, "_FIRSTYEAR%d_", &first); - } - else first=1; - - for ( y=first,b=0; blen[a]; b++) - { - if ( !is_gap(S->seq[a][b])) - { - S->seq[a][b]='a'+((y/modulo))%10; - y++; - } - } - if ( (s=strstr ( S->name[a], "_agechannel"))) - { - sprintf ( s, "%s", new_channel); - } - else strcat (S->name[a], new_channel); - } - return S; -} - -Sequence* output_n_pavie_age_channel (Sequence *S, char *name, int n) -{ - int x, a; - if (!n)n=2; - - - for ( x=1,a=0; a< n; a++, x*=10) - { - S=output_pavie_age_channel(S, name,x); - } -return S; -} - - - - -Sequence* output_pavie_age_channel (Sequence *S, char *name, int modulo) - { - Alignment *A; - FILE *fp; - static int display; - char mat_list_name[100]; - char seq_list[1000]; - char mat_name[1000]; - char *tmp; - - sprintf ( mat_list_name, "%s_pavie_age_matrix.mat_list", name); - sprintf (seq_list, "%s_age_channel.fasta",name); - - if ( display==0 ) - { - if (check_file_exists(seq_list))vremove (seq_list); - if (check_file_exists(mat_list_name))vremove (mat_list_name); - } - sprintf (mat_name, "%s_age_mat_mod%d.mat",name, modulo); - output_age_matrix ( mat_name, modulo); - - fp=vfopen ( mat_list_name,"a"); - fprintf ( fp, "%s\n", mat_name); - vfclose ( fp); - - S=seq2year (S,modulo); - A=seq2aln (S, NULL, KEEP_GAP); - output_fasta_seq (tmp=vtmpnam (NULL),A); - file_cat ( tmp, seq_list); - - if ( display==0) - { - display_output_filename ( stdout, "AGE_MAT_LIST", "MAT_LIST", mat_list_name, CHECK); - display_output_filename ( stdout, "AGE_SEQ", "FASTA", seq_list, CHECK); - display=1; - } - fprintf ( stderr, "\nModulo:%d years", modulo); - fprintf ( stderr, "\n"); - free_aln (A); - return S; - } -// -// Name MAnipulation -// - -Alignment *clean_aln (Alignment *A) -{ - if ( A) - { - A->seq_comment=clean_string (A->nseq, A->seq_comment); - A->aln_comment=clean_string (A->nseq, A->aln_comment); - A->name=translate_names(A->nseq, A->name); - (A->S)=clean_sequence ((A->S)); - } - return A; -} -Sequence *clean_sequence ( Sequence *S) -{ - if ( !S) return S; - - S->seq_comment=clean_string (S->nseq, S->seq_comment); - S->name=translate_names(S->nseq, S->name); - return S; -} -char ** translate_names (int n, char **name) -{ - int a; - for ( a=0; a<", name[a]))name[a]='_'; - - } - sprintf (buf,"%s",decode_name (name, DECODE)); - if ( strlen (buf)>read_array_size_new ((char *)name)) - { - name=vrealloc (name, sizeof (char)*(strlen (buf)+1)); - } - sprintf (name, "%s", buf); - - return name; - } -char *decode_name (char *name, int mode) -{ - static char ***name_list; - static int n; - static char tag[100]; - int a; - - if (mode==CLEAN) - { - for (a=0; a %s\n", name_list[a][0], name_list[a][1]); - return file; - } - if (mode ==DECODE && name_list==NULL)return name; - if ( name==NULL) return name; - - - - if (!tag[0]) - { - vsrand (0); - sprintf ( tag, "TCTAG_%d",rand ()%100000); - } - - if ( mode==CODE) - { - for (a=0; a< n; a++) - if ( strm (name, name_list[a][0]))return name_list[a][1]; - - - name_list=realloc (name_list, sizeof (char**)*(n+1)); - name_list[n]=vcalloc (2, sizeof (char*)); - name_list[n][0]=vcalloc (strlen (name)+1, sizeof (char)); - name_list[n][1]=vcalloc (100, sizeof (char)); - sprintf ( name_list[n][0], "%s", name); - sprintf ( name_list[n][1], "%s_%d", tag,n+1); - return name_list[n++][1]; - } - else if ( mode ==DECODE) - { - char *p; - int i; - if ( !(p=after_strstr (name, tag)))return name; - else - { - sscanf (p, "_%d", &i); - return name_list[i-1][0]; - } - } - else - { - printf_exit (EXIT_FAILURE, stderr,"Unknown Mode for Decode_name [FATAL:%s]", PROGRAM); - } - return NULL; -} - - -FILE * display_sequences_names (Sequence *S, FILE *fp, int check_pdb_status, int print_templates) - { - int a; - int max_len; - char *r; - - if ( !S) - { - fprintf (fp,"\nERROR: NO SEQUENCE READ [FATAL:%s]\n", PROGRAM); myexit (EXIT_FAILURE); - } - for ( a=0, max_len=0; a< S->nseq; a++)max_len=MAX(max_len, strlen (S->name[a])); - fprintf ( fp, "\nINPUT SEQUENCES: %d SEQUENCES [%s]", S->nseq,(S->type)?S->type:"Unknown type"); - for ( a=0; a< S->nseq; a++) - { - fprintf (fp, "\n Input File %-*s Seq %-*s Length %4d type %s",max_len,S->file[a], max_len,S->name[a],(int)strlen ( S->seq[a]), S->type); - if (check_pdb_status) - { - if ((r=seq_is_pdb_struc (S, a)))fprintf (fp, " Struct Yes PDBID %s", get_pdb_id(r)); - else fprintf (fp, " Struct No"); - /* - if (is_pdb_struc (S->name[a])||is_pdb_struc (S->file[a]) )fprintf (fp, " Struct Yes"); - else fprintf (fp, " Struct No"); - */ - } - else fprintf (fp, " Struct Unchecked"); - if ( print_templates)fp=display_sequence_templates (S, a, fp); - - - } - fprintf ( fp, "\n"); - return fp; - - } -Sequence *add_file2file_list (char *name, Sequence *S) -{ - - if (!S) S=declare_sequence (1,1,10); - else S=realloc_sequence (S,S->nseq+1,0);S->nseq=0; - - sprintf ( S->name[S->nseq++], "%s", name); - return S; - -} -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/reformat_struc.c b/binaries/src/tcoffee/t_coffee_source/reformat_struc.c deleted file mode 100644 index 10ccc6d..0000000 --- a/binaries/src/tcoffee/t_coffee_source/reformat_struc.c +++ /dev/null @@ -1,1093 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "dp_lib_header.h" -#include "define_header.h" - - - -#define FATAL "fatal:reformat_struc" - -char * process_repeat (char *aln, char *seq, char *pdb) -{ - char *tf, *file, *name; - Alignment *A, *A2; - Sequence *S, *P; - int r1, r2, is_r1, is_r2, l1=0, l2=0, a; - int *pos; - FILE *fp; - - A=main_read_aln (aln, NULL); - for (a=0; anseq; a++)ungap(A->seq_al[a]); - - S=main_read_seq (seq); - P=get_pdb_sequence (pdb); - - - A2=align_two_sequences (S->seq[0], P->seq[0], "pam250mt", -10, -1, "myers_miller_pair_wise"); - - pos=vcalloc ( A2->len_aln+1, sizeof (int)); - - for (l1=0, l2=0,a=0; a< A2->len_aln; a++) - { - - r1=A2->seq_al[0][a]; - r2=A2->seq_al[1][a]; - - is_r1=1-is_gap(r1); - is_r2=1-is_gap(r2); - - l1+=is_r1; - l2+=is_r2; - - if (!is_r1); - else - { - pos[l1]=l2; - } - } - tf=vtmpnam(NULL); - fp=vfopen (tf, "w"); - for (a=0; anseq; a++) - { - int *coor, b, c; - - name=A->name[a]; - file=vtmpnam (NULL); - coor=string2num_list2 (name, "-"); - - //Check the compatibility between the guide sequence and the coordinates - for ( c=0,b=coor[1]-1; bseq_al[a][c])!=tolower(S->seq[0][b])) - printf_exit (EXIT_FAILURE, stderr, "Incompatibility between the repeat [%s] and the master Sequence [%s]\n%s",A->name[a], seq, A->seq_al[a]); - } - - printf_system ( "extract_from_pdb %s -coor %d %d -seq_field SEQRES> %s", pdb,pos[coor[1]-1],pos[coor[2]-1], file); - fprintf (fp, ">%s _P_ %s\n", name, file); - vfree (coor); - } - vfclose (fp); - return tf; -} - - - -char * normalize_pdb_file (char *name_in, char *seq, char *out_file) -{ - char command[1000]; - Sequence *S; - Alignment *A; - int a; - int start, end, npdb, r1, r2; - char name[100]; - - - if ( !name_in) return NULL; - else - { - sprintf ( name, "%s", name_in); - } - - if ( !is_pdb_file(name)) - { - fprintf(stdout, "\nERROR[normalize_pdb_file]: %s is not a pdb file[FATAL:%s]\n", name, PROGRAM); - myexit (EXIT_FAILURE); - } - - S=get_pdb_sequence (name); - A=align_two_sequences (S->seq[0],seq,"idmat",-3,0, "fasta_pair_wise"); - - - - for (start=-1, end=-1,npdb=0,a=0; a< A->len_aln; a++) - { - r1=1-is_gap(A->seq_al[0][a]); - r2=1-is_gap(A->seq_al[1][a]); - - npdb+=r1; - - if (r1 && r2 && start==-1)start=npdb; - if (r1 && r2)end=npdb; - } - - free_aln(A); - free_sequence (S, -1); - - sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -coor %d %d -nodiagnostic > %s", check_file_exists(name), start, end, out_file); - my_system ( command); - return out_file; - } - -Ca_trace * trim_ca_trace (Ca_trace *T, char *seq ) -{ - /*This function: - -removes from Ca trace all the residues that are not in the sequence - -add in the Ca trace residues unmatched in the structure (it gives them a NULL structure) - */ - Alignment *ALN; - Atom *A; - int a,l, s, r, is_r, is_s; - int *seq_cache, *struc_cache; - int tot_l=0; - - char buf1[10000]; - char buf2[10000]; - - /* lower_string (T->seq); - lower_string (seq); - */ - - - sprintf (buf1, "%s", T->seq); - sprintf (buf2, "%s", seq); - lower_string (buf1); - lower_string (buf2); - - - if ( strm (buf1,buf2))return T; - else - { - ALN=align_two_sequences (T->seq,seq, "est_idmat",-1, 0,"fasta_pair_wise"); - struc_cache=vcalloc (ALN->len_aln+1, sizeof (int)); - seq_cache =vcalloc (ALN->len_aln+1, sizeof (int)); - - for ( r=0, s=0,a=0; a< ALN->len_aln; a++) - { - is_r=!is_gap(ALN->seq_al[0][a]); - is_s=!is_gap(ALN->seq_al[1][a]); - - r+=is_r; - s+=is_s; - - if ( is_s && is_r) - { - struc_cache[r-1]=s-1; - seq_cache[s-1]=r-1; - } - else if ( is_s && !is_r) - { - seq_cache[s-1]=-1; - - } - else if ( !is_s && is_r) - { - struc_cache[r-1]=-1; - } - } - - T->ca=vrealloc ( T->ca, sizeof (Atom*)*(ALN->len_aln+1)); - T->peptide_chain=vrealloc ( T->peptide_chain, (sizeof (Amino_acid*))*(ALN->len_aln+1)); - T->seq=vrealloc ( T->seq, ALN->len_aln+1); - - for ( a=T->len; a< ALN->len_aln; a++) - { - T->peptide_chain[a]=vcalloc (1, sizeof (Amino_acid)); - } - - - /*Read every atom*/ - for ( a=0; a< T->n_atom; a++) - { - - A=(T->structure[a]); - if ( struc_cache[A->res_num-1]==-1)continue; - else - { - /*set the struc residue to its sequence index*/ - A->res_num=struc_cache[A->res_num-1]+1; - if (strm (A->type, "CA")) {T->ca[A->res_num-1]=A;tot_l++;} - if ( strm (A->type, "CA"))(T->peptide_chain[A->res_num-1])->CA=A; - if ( strm (A->type, "C"))(T->peptide_chain[A->res_num-1] )->C=A; - if ( strm (A->type, "CB"))(T->peptide_chain[A->res_num-1])->CB=A; - if ( strm (A->type, "N"))(T->peptide_chain[A->res_num-1] )->N=A; - } - } - - l=strlen(seq); - for ( a=0;a< l; a++) - { - - if ( seq_cache[a]==-1) - { - tot_l++; - T->ca[a]=NULL; - - if (!T->peptide_chain[a])T->peptide_chain[a]=vcalloc (1, sizeof (Amino_acid)); - T->peptide_chain[a]->CA=NULL; - T->peptide_chain[a]->C =NULL; - T->peptide_chain[a]->CB=NULL; - T->peptide_chain[a]->N=NULL; - T->seq[a]='x'; - } - else - { - T->seq[a]=seq[a]; - } - } - T->len=ALN->len_aln; - - /* - T->len=tot_l; - */ - - free_aln (ALN); - vfree(seq_cache); - vfree(struc_cache); - - - } - return T; -} - -Ca_trace * read_ca_trace (char *name, char *seq_field ) -{ - char *tp_name=NULL; - char command[10000]; - - - if ( !is_simple_pdb_file (name)) - { - tp_name=vtmpnam (NULL); - sprintf ( command, "extract_from_pdb -seq_field %s -infile %s -atom ALL -chain FIRST -mode simple> %s",seq_field, check_file_exists(name), tp_name); - if ( getenv4debug ("DEBUG_EXTRACT_FROM_PDB"))fprintf ( stderr, "\n[DEBUG_EXTRACT_FROM_PDB:read_ca_trace] %s\n", command); - my_system ( command); - } - else - tp_name=name; - - return simple_read_ca_trace (tp_name); -} - -Ca_trace * simple_read_ca_trace (char *tp_name ) - { - /*This function reads a pdb file into a Ca_trace structure*/ - - - - int a, c, n; - FILE *fp; - Atom *A; - char res; - char *buf; - Ca_trace *T=NULL; - int res_num=0, last_res_num=0; - - - buf=vcalloc ( VERY_LONG_STRING, sizeof (char)); - n=count_n_line_in_file (tp_name ); - - if ( !T) - { - T=vcalloc ( 1, sizeof ( Ca_trace)); - declare_name (T->name); - } - - /*1 Get the complete sequence: replace missing residues with Xs*/ - for (a=0; a< VERY_LONG_STRING; a++)buf[a]='x'; - res=res_num=0; - - fp=vfopen (tp_name, "r"); - while ( (c=fgetc(fp))!='>'); - fscanf ( fp, "%*s" ); - while ( (c=fgetc(fp))!='\n'); - fscanf ( fp, "%*s" ); - while ( (c=fgetc(fp))!='\n'); - while ((c=fgetc(fp))!=EOF) - { - ungetc(c, fp); - - fscanf (fp, "%*s %*s %c %*c %d %*f %*f %*f\n",&res,&res_num); - if (res) - { - - res=tolower (res); - buf[res_num-1]=res; - last_res_num=res_num; - } - res=res_num=0; - } - buf[last_res_num]='\0'; - vfclose (fp); - /*Sequence Read*/ - - - T->len=strlen (buf); - T->seq=vcalloc ( T->len+1, sizeof (char)); - buf=lower_string (buf); - sprintf ( T->seq, "%s", buf); - n+=T->len; - T->structure=vcalloc ( n, sizeof (Atom*)); - for ( a=0; a< n; a++)T->structure[a]=vcalloc ( 1, sizeof (Atom)); - T->ca=vcalloc ( T->len+1, sizeof ( Atom*)); - a=0; - - fp=vfopen (tp_name, "r"); - while ( (c=fgetc(fp))!='>'); - fscanf ( fp, "%*s" ); - while ( (c=fgetc(fp))!='\n'); - fscanf ( fp, "%*s" ); - while ( (c=fgetc(fp))!='\n'); - - while ((c=fgetc(fp))!=EOF) - { - ungetc(c, fp); - A=T->structure[a]; - A->num=a; - fscanf (fp, "%*s %s %s %*c %d %f %f %f\n",A->type, A->res,&A->res_num, &A->x, &A->y, &A->z); - res=A->res[0]; - - res=tolower (res); - - T->seq[A->res_num-1]=res; - - if ( strm ( A->type, "CA")) T->ca[A->res_num-1]=A; - a++; - } - T->n_atom=a; - - T->peptide_chain=vcalloc (T->len+1, sizeof (Amino_acid*)); - for ( a=0; a<=T->len; a++) T->peptide_chain[a]=vcalloc (1, sizeof (Amino_acid)); - for ( a=0; a< T->n_atom; a++) - { - A=T->structure[a]; - - if ( strm (A->type, "CA"))(T->peptide_chain[A->res_num-1])->CA=A; - if ( strm (A->type, "C"))(T->peptide_chain[A->res_num-1] )->C=A; - if ( strm (A->type, "CB"))(T->peptide_chain[A->res_num-1])->CB=A; - if ( strm (A->type, "N"))(T->peptide_chain[A->res_num-1] )->N=A; - } - - - vfclose (fp); - vfree (buf); - return T; - } -Ca_trace * hasch_ca_trace ( Ca_trace *T) - { - - T=hasch_ca_trace_nb (T); - T=hasch_ca_trace_bubble (T); - T=hasch_ca_trace_transversal (T); - return T; - } -Ca_trace * hasch_ca_trace_transversal ( Ca_trace *TRACE) - { - /*This function gets the Coordinates of a protein and computes the distance of each Ca to its - - given a Ca, - Compute the distance between, CA-x and CA+x with x=[1-N_ca] - T->nb[a][0]-->Number of distances. - T->nb[a][1... T->nb[a][0]]-->ngb index with respect to the Ca chain - T->d_nb[a][1... T->d_nb[a][0]]-->ngb index with respect to the Ca chain - */ - - int a, b, d; - float dist; - Atom *A, *B; - - Struct_nb *T; - Pdb_param *PP; - - - TRACE->Transversal=vcalloc ( 1, sizeof (Struct_nb)); - - T=TRACE->Transversal; - PP=TRACE->pdb_param; - - if ( !T->nb)T->nb=declare_int (TRACE->len+1, 1); - if ( !T->d_nb)T->d_nb=declare_float (TRACE->len+1, 1); - - for (d=0,a=0; a< TRACE->len; a++) - { - - for ( b=1; b<=PP->N_ca; b++) - { - if ( (a-b)<0 || (a+b)>=TRACE->len)continue; - A=TRACE->ca[a-b]; - B=TRACE->ca[a+b]; - dist=get_atomic_distance ( A, B); - - T->nb[a]=vrealloc ( T->nb[a], (++T->nb[a][0]+1)*sizeof (int)); - T->nb[a][T->nb[a][0]]=b; - - T->d_nb[a]=vrealloc ( T->d_nb[a], (T->nb[a][0]+1)*sizeof (float)); - T->d_nb[a][T->nb[a][0]]=dist; - - d++; - } - T->max_nb=MAX (T->max_nb, T->nb[a][0]); - } - return TRACE; - - } - -Ca_trace * hasch_ca_trace_nb ( Ca_trace *TRACE) - { - /*This function gets the Coordinates of a protein and computes the distance of each Ca to its - T->N_ca Ngb. - The first Ngb to the left and to the right are excluded - Ngd to the left get negative distances - Ngb to the right receive positive distances - T->nb[a][0]-->Number of ngb. - T->nb[a][1... T->nb[a][0]]-->ngb index with respect to the Ca chain - T->d_nb[a][1... T->d_nb[a][0]]-->ngb index with respect to the Ca chain - */ - - - - int a, b, d; - float dist; - Atom *A, *B; - - Struct_nb *T; - Pdb_param *PP; - - TRACE->Chain=vcalloc ( 1, sizeof (Struct_nb)); - - T=TRACE->Chain; - PP=TRACE->pdb_param; - - if ( !T->nb)T->nb=declare_int (TRACE->len+1, 1); - if ( !T->d_nb)T->d_nb=declare_float (TRACE->len+1, 1); - - for (d=0,a=0; a< TRACE->len; a++) - { - for ( b=MAX(0,a-PP->N_ca); b< MIN( a+PP->N_ca, TRACE->len); b++) - { - if (FABS(a-b)<2)continue; - A=TRACE->ca[a]; - B=TRACE->ca[b]; - if ( !A || !B)continue; - dist=get_atomic_distance ( A, B); - if (bnb[a]=vrealloc ( T->nb[a], (++T->nb[a][0]+1)*sizeof (int)); - T->nb[a][T->nb[a][0]]=b; - - T->d_nb[a]=vrealloc ( T->d_nb[a], (T->nb[a][0]+1)*sizeof (float)); - T->d_nb[a][T->nb[a][0]]=dist; - d++; - } - - T->max_nb=MAX (T->max_nb, T->nb[a][0]); - } - return TRACE; - - } -Ca_trace * hasch_ca_trace_bubble ( Ca_trace *TRACE) - { - - int a, b; - float dist; - Atom *A, *B; - float **list; - Pdb_param *PP; - Struct_nb *T; - - PP=TRACE->pdb_param; - TRACE->Bubble=vcalloc ( 1, sizeof (Struct_nb)); - T=TRACE->Bubble; - - - - if ( !T->nb)T->nb=declare_int (TRACE->len+1, 1); - if ( !T->d_nb)T->d_nb=declare_float (TRACE->len+1, 1); - list=declare_float ( TRACE->n_atom, 3); - - - for (a=0; a< TRACE->len; a++) - { - for ( b=0; b< TRACE->len; b++) - { - A=TRACE->ca[a]; - B=TRACE->ca[b]; - if ( !A || !B)continue; - dist=get_atomic_distance ( A, B); - - if ( distmaximum_distance && FABS((A->res_num-B->res_num))>2) - { - T->nb[a][0]++; - T->nb[a]=vrealloc ( T->nb[a], (T->nb[a][0]+1)*sizeof (int)); - T->nb[a][T->nb[a][0]]=(TRACE->ca[b])->num; - - T->d_nb[a]=vrealloc ( T->d_nb[a], (T->nb[a][0]+1)*sizeof (float)); - T->d_nb[a][T->nb[a][0]]= ((amax_nb=MAX (T->max_nb, T->nb[a][0]); - - } - - for ( a=0; a< TRACE->len; a++) - { - for ( b=0; b< T->nb[a][0]; b++) - { - list[b][0]=T->nb[a][b+1]; - list[b][1]=T->d_nb[a][b+1]; - list[b][2]=(TRACE->structure[T->nb[a][b+1]])->res_num; - } - - sort_float ( list, 3,2, 0, T->nb[a][0]-1); - for ( b=0; b< T->nb[a][0]; b++) - { - T->nb[a][b+1]=list[b][0]; - T->d_nb[a][b+1]=list[b][1]; - } - } - - free_float ( list, -1); - return TRACE; - - } - - -float ** measure_ca_distances(Ca_trace *T) - { - int a, b; - Atom *A, *B; - float **dist; - - dist=declare_float ( T->len, T->len); - - for (a=0; a< T->len-1; a++) - { - for ( b=a+1; b< T->len; b++) - { - - A=T->ca[a]; - B=T->ca[b]; - dist[a][b]=dist[b][a]=get_atomic_distance ( A, B); - - } - } - return dist; - } -float get_atomic_distance ( Atom *A, Atom*B) - { - float dx, dy, dz, d; - - if ( !A || !B) - { - return UNDEFINED; - } - - - dx=A->x - B->x; - dy=A->y - B->y; - dz=A->z - B->z; - - - d=(float) sqrt ( (double) ( dx*dx +dy*dy +dz*dz)); - return d; - } - -char * map_contacts ( char *file1, char *file2, float T) -{ - - Ca_trace *ST1, *ST2; - int *contact_list; - int a; - - - ST1=read_ca_trace (file1, "ATOM"); - ST2=read_ca_trace (file2, "ATOM"); - - contact_list=identify_contacts (ST1, ST2, T); - for ( a=0; alen; a++) - { - ST1->seq[a]=(contact_list[a]==1)?toupper(ST1->seq[a]):tolower(ST1->seq[a]); - } - - return ST1->seq; -} - -float ** print_contacts ( char *file1, char *file2, float T) -{ - - Ca_trace *ST1, *ST2; - float **dist, d; - int a, b; - Atom *A, *B; - char *list; - int *list1=NULL, *list2=NULL; - int *cache1, *cache2; - - - - if ((list=strstr (file1, "_R_"))!=NULL) - { - list[0]='\0'; - list+=3; - list1=string2num_list2(list, "_"); - } - - if ((list=strstr (file2, "_R_"))!=NULL) - { - list[0]='\0'; - list+=3; - list2=string2num_list2(list, "_"); - } - - fprintf ( stdout, "\n#%s (%s) struc2contacts_01", PROGRAM, VERSION); - fprintf ( stdout, "\nStructure %s vs %s", file1, file2); - ST1=read_ca_trace (file1, "SEQRES"); - ST2=read_ca_trace (file2, "SEQRES"); - - - cache1=vcalloc (ST1->len+1, sizeof (int)); - cache2=vcalloc (ST2->len+1, sizeof (int)); - - if (list1)for ( a=1; alen; a++)cache1[a]=1; - - if (list2)for ( a=1; alen; a++)cache2[a]=1; - - - dist=declare_float (ST1->len+1,ST2->len+1); - vfree (list1); vfree(list2); - - for ( a=0; a< ST1->n_atom; a++) - { - A=ST1->structure[a]; - if ( !cache1[A->res_num])continue; - for ( b=0; bn_atom; b++) - { - - B=ST2->structure[b]; - if( !cache2[B->res_num])continue; - - d=get_atomic_distance (A,B); - - if (dist[A->res_num][B->res_num]==0 || dist[A->res_num][B->res_num]>d)dist[A->res_num][B->res_num]=d; - } - } - - for ( a=1; a<=ST1->len; a++) - { - A=ST1->ca[a-1]; - if ( !A || !cache1[A->res_num])continue; - for ( b=1; b<= ST2->len; b++) - { - B=ST2->ca[b-1]; - if( !B || !cache2[B->res_num])continue; - if(dist[a][b]!=0)fprintf ( stdout, "\nResidue %3d [%s] vs %3d [%s] %9.4f Angstrom",A->res_num,A->res,B->res_num,B->res,dist[a][b]); - } - } - fprintf ( stdout, "\n"); - vfree (cache1);vfree(cache2); - free_float (dist, -1); - return NULL; -} - - -int * identify_contacts (Ca_trace *ST1,Ca_trace *ST2, float T) -{ - int a, b; - float d; - int *result; - - - - result=vcalloc ( ST1->len+1, sizeof (int)); - - - for ( a=0; a< ST1->n_atom; a++) - for ( b=0; bn_atom; b++) - - { - - d=get_atomic_distance (ST1->structure[a], ST2->structure[b]); - if (dstructure[a]; B=ST2->structure[b]; - fprintf ( stderr, "\n%d %s %s Vs %d %s %s: %f", A->res_num, A->res, A->type, B->res_num, B->res, B->type, d); */ - result[(ST1->structure[a])->res_num-1]=1; - } - } - return result; -} - -Sequence *seq2contacts ( Sequence *S, float T) -{ - int a; - Sequence *NS; - - - - NS=duplicate_sequence (S); - for ( a=0; a< S->nseq; a++) - { - NS->seq[a]=string2contacts ( S->seq[a], S->name[a], S->seq_comment[a], T); - } - - return NS; -} - -char *string2contacts (char *seq,char *name, char *comment, float T) -{ - char **nlist; - char *r; - char *result; - int a, b, n; - char *struc_name; - static char *struc_file; - static char *ligand_file; - Alignment *A; - char r0, r1; - - int l, ln; - char command[1000]; - /*>seq__struc Ligand1 Chain1 Ligand 2 cahin2 - Chain: index or ANY if unknown - Ligand: name of pdb file - */ - - if ( !struc_file) - { - struc_file=vtmpnam (NULL); - ligand_file=vtmpnam (NULL); - } - - - - result=vcalloc ( strlen (seq)+1, sizeof (char)); - for ( a=0; a< strlen (seq); a++)result[a]='0'; - - nlist=string2list (comment); - if ( !nlist)return result; - else - n=atoi(nlist[0]); - - struc_name=strstr(name, "_S_"); - if (!struc_name && !is_pdb_struc (name)) - { - - return result; - } - else if ( !struc_name && is_pdb_struc (name)) - { - struc_name=name; - } - else - { - struc_name+=3; - if ( check_file_exists (struc_name) && is_simple_pdb_file(struc_name)) - { - sprintf (command, "cp %s %s", name, struc_file); - } - else - { - sprintf ( command, "extract_from_pdb -infile %s -atom ALL -mode simple -force >%s",name, struc_file); - } - my_system (command); - } - - - - for ( a=1, ln=1;a%s", nlist[a], ligand_file); - a++; - } - else - { - sprintf ( command, "extract_from_pdb -infile %s -chain %s -ligand %s -ligand_only -atom ALL -mode simple -force >%s",struc_name, nlist[a+1],nlist[a], ligand_file); - a+=2; - } - my_system (command); - - if ( T>0) - { - r=map_contacts (struc_file,ligand_file,T); - - toggle_case_in_align_two_sequences (KEEP_CASE); - A=align_two_sequences (seq,r,"pam250mt", -10, -1, "myers_miller_pair_wise"); - toggle_case_in_align_two_sequences (CHANGE_CASE); - - - for ( l=0,b=0; b< A->len_aln; b++) - { - r0=A->seq_al[0][b];r1=A->seq_al[1][b]; - if (!is_gap(r0)) - { - if (isupper(r1))result[l]=(result[l]!='0')?'9':'0'+ln; - l++; - } - } - - free_aln (A); - fprintf ( stderr, " [DONE]"); - } - else if ( T==0) - { - print_contacts( struc_file,ligand_file,T); - } - - } - fprintf ( stderr, "\n"); - - return result; -} - - -char **struclist2nb (char *name,char *seq, char *comment, float Threshold, char *atom, char *output) -{ - char *list, **pdb; - int a; - char **R, *tmpf; - - tmpf=vtmpnam (NULL); - - list=strstr ( comment, "_P_")+3; - if ( !strstr (comment, "_P_"))return NULL; - else - { - pdb=string2list ( strstr ( comment, "_P_")+3); - } - - for (a=1; a%s R=%d T=%.2f %s\n%s\n", name, a+1, Threshold, comment, R[a]); - } - else - { - FILE *fp; - char c; - - fp=vfopen (tmpf, "r"); - while ( (c=fgetc(fp))!=EOF)fprintf (stdout, "%c", c); - vfclose (fp); - } - return NULL; -} - -char **struc2nb (char *name,char *seq, char *comment, float Threshold, char *atom, char *output) -{ - char *struc_file; - char *struc_name; - Ca_trace *T; - Atom *A1, *A2; - int a, b; - short **hasch; - FILE *fp; - float d; - char command[10000]; - static char **R; - - struc_file=vtmpnam (NULL); - declare_name (struc_name); - - sscanf ( strstr(comment, "_P_"), "_P_ %s", struc_name); - //struc_name=strstr(name, "_S_"); - - if (!R) - { - int l; - l=strlen (seq); - R=declare_char (l+1, l+1); - for ( a=0; a%s",struc_name,(atom==NULL)?"ALL":atom, struc_file); - } - - my_system (command); - } - T=read_ca_trace (struc_file, "ATOM"); - hasch=declare_short (T->len, T->len); - - if (!R) - { - int l; - l=strlen (seq); - R=declare_char (l+1, l+1); - for ( a=0; an_atom; a++) - for ( b=0; b< T->n_atom; b++) - { - A1=T->structure[a];A2=T->structure[b]; - d=get_atomic_distance (A1, A2); - - if ( dres_num-1][A2->res_num-1]=1; - } - } - fp=vfopen (output, "a"); - fprintf ( fp, "#Distance_map_format_01\n#Sequence %s with T= %.2f", struc_name, Threshold); - for ( a=0; alen; a++) - { - int c; - c=change_residue_coordinate ( T->seq,seq,a); - - if ( c!=-1 && seq) - { - char r1, r2; - r1=(T->seq)[a]; - r2=seq[c]; - r1=tolower(r1);r2=tolower(r2); - if ( r1!=r2) continue; - R[c][c]=toupper (R[c][c]); - fprintf (fp, "\n%s Residue %d ",struc_name,c+1); - for ( b=0; blen; b++) - { - int d; - char r3, r4; - r3=(T->seq)[a];r4=seq[c]; - r3=tolower(r3);r4=tolower(r4); - if ( r3!=r4) continue; - - d=change_residue_coordinate (T->seq,seq,b); - - if ( hasch[a][b] && d!=-1) - { - fprintf (fp, "%d ",d+1); - R[c][d]=toupper (R[c][d]); - } - } - fprintf (fp, ";"); - } - } - free_short (hasch, -1); - fprintf (fp, "\n"); - return R; -} - -short **seq2nb (char *seq, char *pdb, float Threshold, char *atom) -{ - char *struc_file; - char *struc_name; - Ca_trace *T; - Atom *A1, *A2; - int a, b; - short **hasch; - short **result; //Contains the result for every residue of seq - float d; - char command[10000]; - - // get a clean pdb file - struc_file=vtmpnam (NULL); - if ( check_file_exists (struc_file) && is_simple_pdb_file(struc_name)) - { - sprintf (command, "cp %s %s", pdb, struc_file); - } - else - { - sprintf ( command, "extract_from_pdb -infile %s -atom %s -mode simple -force >%s",pdb,(atom==NULL)?"ALL":atom, struc_file); - } - my_system (command); - - //Read and hasch the PDB file - T=read_ca_trace (struc_file, "ATOM"); - hasch=declare_short (T->len, T->len); - result=declare_short (strlen (seq)+1, strlen (seq)+1); - for ( a=0; a< T->n_atom; a++) - for ( b=0; b< T->n_atom; b++) - { - A1=T->structure[a];A2=T->structure[b]; - d=get_atomic_distance (A1, A2); - - if ( dres_num-1][A2->res_num-1]=1; - } - } - - for ( a=0; alen; a++) - { - int c; - c=change_residue_coordinate ( T->seq,seq,a); - if ( c!=-1) - { - for ( b=0; blen; b++) - { - int d; - d=change_residue_coordinate (T->seq,seq,b); - - if ( hasch[a][b] && d!=-1) - { - result[c][result[c][0]++]=d; - } - } - } - } - free_short (hasch, -1); - return result; -} -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/showpair.c b/binaries/src/tcoffee/t_coffee_source/showpair.c deleted file mode 100644 index ed4ae69..0000000 --- a/binaries/src/tcoffee/t_coffee_source/showpair.c +++ /dev/null @@ -1,560 +0,0 @@ -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" - -static void make_p_ptrs(int *tptr, int *pl, int naseq, int l); -static void make_n_ptrs(int *tptr, int *pl, int naseq, int len); -static void put_frag(int fs, int v1, int v2, int flen); -static int frag_rel_pos(int a1, int b1, int a2, int b2); -static void des_quick_sort(int *array1, int *array2, int array_size); -static void pair_align(int seq_no, int l1, int l2); - - -/* -* Prototypes -*/ - -/* -* Global variables -*/ -/*extern int *seqlen_array; - extern char **seq_array; - extern int dna_ktup, dna_window, dna_wind_gap, dna_signif; params for DNA - extern int prot_ktup,prot_window,prot_wind_gap,prot_signif; params for prots - extern int nseqs; - extern Boolean dnaflag; - extern double **tmat; - extern int max_aa; - extern int max_aln_length; -*/ - -static int *seqlen_array; -static char **seq_array; - -static int nseqs; -static int dnaflag; -static int max_aln_length; -static int max_aa; - -static int next; -static int curr_frag,maxsf; -static int **accum; -static int *diag_index; -static char *slopes; - -int ktup,window,wind_gap,signif; /* Pairwise aln. params */ -int *displ; -int *zza, *zzb, *zzc, *zzd; - -static Boolean percent=1; - - -static void make_p_ptrs(int *tptr,int *pl,int naseq,int l) -{ - static int a[10]; - int i,j,limit,code,flag; - int residue; - - /*tptr--> pointer to the last occurence of the same residue or ktuple: - - abcdeabef - - tptr: 0 0 0 0 0 1 2 5 0 - pl[a]=6 - pl[b]=7 - */ - - - for (i=1;i<=ktup;i++) - a[i] = (int) pow((double)(max_aa+1),(double)(i-1)); - - limit = (int) pow((double)(max_aa+1),(double)ktup); - for(i=1;i<=limit;++i) - pl[i]=0; - for(i=1;i<=l;++i) - tptr[i]=0; - - - - for(i=1;i<=(l-ktup+1);++i) { - code=0; - flag=FALSE; - for(j=1;j<=ktup;++j) { - residue = seq_array[naseq][i+j-1]; - if((residue<0) || (residue > max_aa)){ - flag=TRUE; - break; - } - code += ((residue) * a[j]); - } - if(flag) - continue; - ++code; - if(pl[code]!=0)tptr[i]=pl[code]; - pl[code]=i; - } -} - - -static void make_n_ptrs(int *tptr,int *pl,int naseq,int len) -{ - static int pot[]={ 0, 1, 4, 16, 64, 256, 1024, 4096 }; - int i,j,limit,code,flag; - int residue; - - limit = (int) pow((double)4,(double)ktup); - - for(i=1;i<=limit;++i) - pl[i]=0; - for(i=1;i<=len;++i) - tptr[i]=0; - - for(i=1;i<=len-ktup+1;++i) { - code=0; - flag=FALSE; - for(j=1;j<=ktup;++j) { - residue = seq_array[naseq][i+j-1]; - if((residue<0) || (residue>4)){ - flag=TRUE; - break; - } - code += ((residue) * pot[j]); /* DES */ - } - if(flag) - continue; - ++code; - if(pl[code]!=0) - tptr[i]=pl[code]; - pl[code]=i; - } -} - - -static void put_frag(int fs,int v1,int v2,int flen) -{ - int end; - accum[0][curr_frag]=fs; - accum[1][curr_frag]=v1; - accum[2][curr_frag]=v2; - accum[3][curr_frag]=flen; - - if(!maxsf) { - maxsf=1; - accum[4][curr_frag]=0; - return; - } - - if(fs >= accum[0][maxsf]) { - accum[4][curr_frag]=maxsf; - maxsf=curr_frag; - return; - } - else { - next=maxsf; - while(TRUE) { - end=next; - next=accum[4][next]; - if(fs>=accum[0][next]) - break; - } - accum[4][curr_frag]=next; - accum[4][end]=curr_frag; - } -} - - -static int frag_rel_pos(int a1,int b1,int a2,int b2) -{ - int ret; - - ret=FALSE; - if(a1-b1==a2-b2) { - if(a2 0) { - if(lst[p] >= ust[p]) - p--; - else { - i = lst[p] - 1; - j = ust[p]; - pivlin = array1[j]; - while(i < j) { - for(i=i+1; array1[i] < pivlin; i++) - ; - for(j=j-1; j > i; j--) - if(array1[j] <= pivlin) break; - if(i < j) { - temp1 = array1[i]; - array1[i] = array1[j]; - array1[j] = temp1; - - temp2 = array2[i]; - array2[i] = array2[j]; - array2[j] = temp2; - } - } - - j = ust[p]; - - temp1 = array1[i]; - array1[i] = array1[j]; - array1[j] = temp1; - - temp2 = array2[i]; - array2[i] = array2[j]; - array2[j] = temp2; - - if(i-lst[p] < ust[p] - i) { - lst[p+1] = lst[p]; - ust[p+1] = i - 1; - lst[p] = i + 1; - } - else { - lst[p+1] = i + 1; - ust[p+1] = ust[p]; - ust[p] = i - 1; - } - p = p + 1; - } - } - return; - -} - - - - - -static void pair_align(int seq_no,int l1,int l2) -{ - int pot[8],i,j,l,m,flag,limit,pos,tl1,vn1,vn2,flen,osptr,fs; - int tv1,tv2,encrypt,subt1,subt2,rmndr; - int residue; - - if(dnaflag) { - for(i=1;i<=ktup;++i) - pot[i] = (int) pow((double)4,(double)(i-1)); - limit = (int) pow((double)4,(double)ktup); - } - else { - for (i=1;i<=ktup;i++) - pot[i] = (int) pow((double)(max_aa+1),(double)(i-1)); - limit = (int) pow((double)(max_aa+1),(double)ktup); - } - - tl1 = (l1+l2)-1; - - for(i=1;i<=tl1;++i) { - slopes[i]=displ[i]=0; - diag_index[i] = i; - } - - -/* increment diagonal score for each k_tuple match */ -/* Attempt at guessing the best band by looking at identities*/ - - for(i=1;i<=limit;++i) - { - vn1=zzc[i]; - while(TRUE) - { - if(!vn1) break; - vn2=zzd[i]; - while(vn2 != 0) - { - osptr=vn1-vn2+l2; - ++displ[osptr]; /*PLUG THE Pos Dependant Scheme Here!!!! (For Id only)*/ - vn2=zzb[vn2]; - } - vn1=zza[vn1]; - } - } - -/* choose the top SIGNIF diagonals */ - - des_quick_sort(displ, diag_index, tl1); - - j = tl1 - signif + 1; - if(j < 1) j = 1; - -/* flag all diagonals within WINDOW of a top diagonal */ - - for(i=tl1; i>=j; i--) - if(displ[i] > 0) { - pos = diag_index[i]; - l = (1 >pos-window) ? 1 : pos-window; - m = (tl1max_aa)){flag=TRUE; break;}encrypt += ((residue)*pot[j]);} - if(flag) continue; - else flag=FALSE; - - ++encrypt; - vn2=zzd[encrypt]; - - /*now trying to match i-ktup and vn2-ktup*/ - while(TRUE) - { - if(!vn2) - { - flag=TRUE; - break; - } - osptr=i-vn2+l2; /*osptr=Diagonal under investigation*/ - if(slopes[osptr]!=1) /*Get the next diagonal if that one is not flagged*/ - { - vn2=zzb[vn2]; - continue; - } - flen=0; - fs=ktup; - next=maxsf; - - /* A-loop*/ - while(TRUE) - { - if(!next) - { - ++curr_frag; - if(curr_frag>=2*max_aln_length) - { - - return; - } - displ[osptr]=curr_frag; - put_frag(fs,i,vn2,flen); /*sets the coordinates of the fragments*/ - } - else - { - tv1=accum[1][next]; - tv2=accum[2][next]; - if(frag_rel_pos(i,vn2,tv1,tv2)) - { - if(i-vn2==accum[1][next]-accum[2][next]) - { - if(i>accum[1][next]+(ktup-1)) - fs=accum[0][next]+ktup; - else - { - rmndr=i-accum[1][next]; - fs=accum[0][next]+rmndr; - } - flen=next; - next=0; - continue; - } - else - { - if(displ[osptr]==0) - subt1=ktup; - else - { - if(i>accum[1][displ[osptr]]+(ktup-1)) - subt1=accum[0][displ[osptr]]+ktup; - else - { - rmndr=i-accum[1][displ[osptr]]; - subt1=accum[0][displ[osptr]]+rmndr; - } - } - subt2=accum[0][next]-wind_gap+ktup; - if(subt2>subt1) - { - flen=next; - fs=subt2; - } - else - { - flen=displ[osptr]; - fs=subt1; - } - next=0; - continue; - } - - } - else - { - next=accum[4][next]; - continue; - } - } - break; - } - /* - * End of Aloop - */ - - vn2=zzb[vn2]; - } - } - -} - -int ** show_pair(int istart, int iend, int jstart, int jend, int *in_seqlen_array, char **in_seq_array, int dna_ktup, int dna_window, int dna_wind_gap, int dna_signif,int prot_ktup, int prot_window,int prot_wind_gap,int prot_signif, int in_nseqs,int in_dnaflag, int in_max_aa, int in_max_aln_length ) -{ - int i,j,dsr; - double calc_score; - int **tmat; - - seqlen_array=vcalloc ( in_nseqs+1, sizeof(int)); - for ( i=0; i< in_nseqs; i++)seqlen_array[i+1]=in_seqlen_array[i]; - - - seq_array=declare_char ( in_nseqs+1, in_max_aln_length); - for ( i=0; i< in_nseqs; i++)sprintf (seq_array[i+1], "%s",in_seq_array[i]); - - - nseqs=in_nseqs; - dnaflag=in_dnaflag; - max_aa=in_max_aa; - max_aln_length=in_max_aln_length; - - - tmat=declare_int ( nseqs+1, nseqs+1); - accum=declare_int( 5, 2*max_aln_length+1); - - displ = (int *) vcalloc( (2*max_aln_length +1), sizeof (int) ); - slopes = (char *)vcalloc( (2*max_aln_length +1) , sizeof (char)); - diag_index = (int *) vcalloc( (2*max_aln_length +1) , sizeof (int) ); - - zza = (int *)vcalloc( (max_aln_length+1),sizeof (int) ); - zzb = (int *)vcalloc( (max_aln_length+1),sizeof (int) ); - - zzc = (int *)vcalloc( (max_aln_length+1), sizeof (int) ); - zzd = (int *)vcalloc( (max_aln_length+1), sizeof (int) ); - - if(dnaflag) { - ktup = dna_ktup; - window = dna_window; - signif = dna_signif; - wind_gap = dna_wind_gap; - } - else { - ktup = prot_ktup; - window = prot_window; - signif = prot_signif; - wind_gap = prot_wind_gap; - } - - for(i=istart+1;i<=iend;++i) - { - if(dnaflag) - make_n_ptrs(zza,zzc,i,seqlen_array[i]); - else - make_p_ptrs(zza,zzc,i,seqlen_array[i]); - for(j=MAX(jstart+1, i+1);j<=jend;++j) - { - if (i!=j) - { - if(dnaflag) - make_n_ptrs(zzb,zzd,j,seqlen_array[j]); - else - make_p_ptrs(zzb,zzd,j,seqlen_array[j]); - - pair_align(i,seqlen_array[i],seqlen_array[j]); - - if(!maxsf) - calc_score=0.0; - else { - calc_score=(double)accum[0][maxsf]; - if(percent) { - dsr=(seqlen_array[i] %.2f",i, j, (float)calc_score ); - } - } - } - - free_int ( accum, -1); - - vfree(displ); - vfree(slopes); - vfree(diag_index); - - vfree(zza); - vfree(zzb); - vfree(zzc); - vfree(zzd); - return tmat; -} - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/source_list b/binaries/src/tcoffee/t_coffee_source/source_list deleted file mode 100644 index e0e85dd..0000000 --- a/binaries/src/tcoffee/t_coffee_source/source_list +++ /dev/null @@ -1,50 +0,0 @@ -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/CUSTOM_evaluate_for_struc.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/aln_compare.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/aln_convertion_util.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/dev1.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/dev2.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/dev3.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/dev4.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/evaluate.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/evaluate_dirichlet.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/evaluate_for_domain.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/evaluate_for_struc.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/fastal.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/fsa_dp.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/hsearch.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/io_func.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/parttree.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/pavie_dp.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/pb_util_read_seq_util.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/pb_util_read_sequence.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/random.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/reformat.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/reformat_struc.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/showpair.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/t_coffee.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/tree_util.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_aln_analyze.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_analyse_constraints_list.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_constraints_list.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_declare.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_domain_constraints_list.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_domain_dp.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_domain_dp_drivers.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_dp_cdna_fasta_nw.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_dp_clean_maln.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_dp_drivers.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_dp_est.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_dp_fasta_nw.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_dp_fasta_sw.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_dp_generic_fasta_nw.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_dp_gotoh_nw.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_dp_gotoh_sw.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_dp_mm_nw.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_dp_sim.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_dp_ssec_pwaln.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_dp_suboptimal_nw.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_dps.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_graph_maln.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_job_handling.c -/home/notredame/distributions/T-COFFEE_distribution_Version_8.14/t_coffee_source/util_make_tree.c diff --git a/binaries/src/tcoffee/t_coffee_source/t_coffee b/binaries/src/tcoffee/t_coffee_source/t_coffee deleted file mode 100644 index 9cf3447..0000000 Binary files a/binaries/src/tcoffee/t_coffee_source/t_coffee and /dev/null differ diff --git a/binaries/src/tcoffee/t_coffee_source/t_coffee.c b/binaries/src/tcoffee/t_coffee_source/t_coffee.c deleted file mode 100644 index 83f5d58..0000000 --- a/binaries/src/tcoffee/t_coffee_source/t_coffee.c +++ /dev/null @@ -1,5321 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "dp_lib_header.h" -#include "define_header.h" -#include "t_coffee.h" -static void test(); -static char * get_seq_type_from_cl (int argc, char **argv); -static char *get_defaults(char *buf, char *type); -static char *get_evaluate_defaults(char *buf, char *type); -static char *get_genome_defaults(char *buf, char *type); -static char *get_dali_defaults(char *buf, char *type); -static char *get_mcoffee_defaults(char *buf, char *type); -static char *get_fmcoffee_defaults(char *buf, char *type); -static char *get_t_coffee_defaults(char *buf, char *type); - -static char *get_dmcoffee_defaults(char *buf, char *type); -static char *get_rcoffee_consan_defaults(char *buf, char *type); - -static char *get_rmcoffee_defaults(char *buf, char *type);//Original R-Coffee Paper -static char *get_rcoffee_defaults(char *buf, char *type);//Original R-Coffee Paper -static char *get_rmcoffee_defaults_old(char *buf, char *type);//Original R-Coffee Paper -static char *get_rcoffee_defaults_old(char *buf, char *type);//Original R-Coffee Paper -static char *get_best4RNA_defaults(char *buf, char *type); - -static char *get_very_fast_defaults(char *buf, char *type); -static char *get_precomputed_defaults(char *buf, char *type); -static char *get_3dcoffee_defaults(char *buf, char *type); -static char *get_expresso_defaults(char *buf, char *type); - -static char *get_accurate_defaults(char *buf, char *type); -static char *get_accurate4PROTEIN_defaults(char *buf, char *type); -static char *get_accurate4DNA_defaults(char *buf, char *type); -static char *get_accurate4RNA_defaults(char *buf, char *type); - -static char *get_psicoffee_defaults(char *buf, char *type); -static char *get_dna_defaults(char *buf, char *type); -static char *get_cdna_defaults(char *buf, char *type); -static char *get_repeat_defaults(char *buf, char *type); -static char *get_low_memory_defaults( char *buf, char *type); - -static int set_methods_limits (char **method_limits,int n_methods_limit,char **list_file, int n_list, int *maxnseq, int *maxlen); -static FILE *t_coffee_tip (FILE *fp,char *mode); - -static int run_other_pg(int argc, char *argv[]); -static char* prepare_one2all (char *seq,Sequence *S, char *lib_file); -static char* prepare_subset2all (char *seq,Sequence *S, char *lib_file, Constraint_list *CL); - -#define is_a_seq_file(file) (!is_matrix(file) && !is_matrix(file+1) && !is_method (file) && !is_method (file+1) &&(check_file_exists(file) || check_file_exists(file+1))) -static int NO_METHODS_IN_CL; -int batch_main ( int argc, char **argv); -int main (int argc, char *argv[]) -{ - int r, a; - - if (argc>=2 && strcmp (argv[1], "-batch")==0) - { - char **list; - list=file2lines (argv[2]); - for (a=1; a=3 && strm (argv[1], "-other_pg")) - { - standard_initialisation_start (NULL,NULL); - return run_other_pg (argc-2, argv+2); - } - -/*PARAMETER PROTOTYPE: READ PARAMETER FILE */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-no_error_report" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Limit the maximum memory usage (in Megabytes). 0: no limit" ,\ - /*Parameter*/ &no_error_report ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: READ PARAMETER FILE */ - declare_name (parameters); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-parameters" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "R_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "get bottom parameters" ,\ - /*Parameter*/ ¶meters ,\ - /*Def 1*/ "NULL" ,\ - /*Def 2*/ "stdin" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - - - special_mode_list1=declare_char (100, STRING); - - n_special_mode1=get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-mode" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 100 ,\ - /*DOC*/ "specifies a special mode: genome, quickaln, dali, 3dcoffee" ,\ - /*Parameter*/ special_mode_list1 ,\ - /*Def 1*/ "unspecified" ,\ - /*Def 2*/ "HARD_CODED" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - - - special_mode_list2=declare_char (100, STRING); - n_special_mode2=get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-special_mode" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 100 ,\ - /*DOC*/ "[DEPRECATED ** -special_mode is deprected use -mode instead]" ,\ - /*Parameter*/ special_mode_list2 ,\ - /*Def 1*/ "unspecified" ,\ - /*Def 2*/ "HARD_CODED" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - - special_mode_list=declare_char (n_special_mode1+n_special_mode2, STRING); - n_special_mode=0; - for (a=0; aprompt>special>parameters>defaults*/ - argv=break_list ( argv, &argc, "=;, \n"); - argv=merge_list ( argv, &argc); - if (argc>1 && argv[1][0]!='-')argv=push_string ("-seq ", argv, &argc, 1); - - if ( name_is_in_list ("-method",argv, argc,100)==-1) - { - NO_METHODS_IN_CL=1; - } - -if (t_coffee_defaults_flag) - { - char *pname=NULL; - - pname=getenv ( "TCOFFEE_DEFAULTS"); - - if (check_file_exists ( t_coffee_defaults))pname=t_coffee_defaults; - else if ( getenv ( "TCOFFEE_DEFAULTS")) - { - pname=getenv ( "TCOFFEE_DEFAULTS"); - if (check_file_exists(pname)); - else pname=NULL; - } - else - { - declare_name(pname);sprintf (pname, "%s/.t_coffee_defaults",getenv ( "HOME") ); - if (!check_file_exists (pname)){vfree(pname);pname=NULL;} - } - - if (pname) - { - argv=push_string (file2string(pname), argv, &argc, 1); - t_coffee_defaults=pname; - } - else - { - t_coffee_defaults=NULL; - } - } - - if ( parameters && parameters[0])argv=push_string (file2string (parameters), argv, &argc, 1); - - - if (n_special_mode && !type_only) - { - char *special_mode; - char *lseq_type; - declare_name(lseq_type); - if (type && !strm (type, "")) - sprintf (lseq_type,"%s",type); - else - sprintf (lseq_type,"%s",get_seq_type_from_cl (argc, argv)); - - for ( a=0; a< n_special_mode; a++) - { - char *new_arg=NULL; - - special_mode=special_mode_list[a]; - - store_mode (special_mode); - - - if (special_mode && !special_mode[0]); - else if ( strm (special_mode, "regular") || strm (special_mode, "regular_fast")|| strm (special_mode, "default"))new_arg=get_defaults (NULL,lseq_type); - else if ( strm (special_mode, "genome"))new_arg=get_genome_defaults(NULL,lseq_type); - else if ( strm (special_mode, "quickaln"))new_arg=get_very_fast_defaults(NULL,lseq_type); - else if ( strm (special_mode, "dali"))new_arg=get_dali_defaults(NULL,lseq_type); - else if ( strm (special_mode, "evaluate"))new_arg=get_evaluate_defaults(NULL,lseq_type); - else if ( strm (special_mode, "precomputed"))new_arg=get_precomputed_defaults(NULL,lseq_type); - else if ( strm (special_mode, "3dcoffee"))new_arg=get_3dcoffee_defaults(NULL,lseq_type); - else if ( strm (special_mode, "expresso"))new_arg=get_expresso_defaults(NULL,lseq_type); - else if ( strm (special_mode, "repeats"))new_arg=get_repeat_defaults(NULL,lseq_type); - else if ( strm (special_mode, "psicoffee"))new_arg=get_psicoffee_defaults(NULL,lseq_type); - - else if ( strm (special_mode, "accurate") || strm (special_mode, "accurate_slow") || strm (special_mode, "psicoffee_expresso"))new_arg=get_accurate_defaults(NULL, lseq_type); - else if ( strm (special_mode, "accurate4DNA"))new_arg=get_accurate4DNA_defaults(NULL,lseq_type); - else if ( strm (special_mode, "accurate4RNA"))new_arg=get_accurate4RNA_defaults(NULL,lseq_type); - else if ( strm (special_mode, "best4RNA"))new_arg=get_best4RNA_defaults(NULL,lseq_type); - else if ( strm (special_mode, "accurate4PROTEIN"))new_arg=get_accurate4PROTEIN_defaults(NULL,lseq_type); - - else if ( strm (special_mode, "low_memory") || strm (special_mode, "memory"))new_arg=get_low_memory_defaults(NULL,lseq_type); - - - else if ( strm (special_mode, "dna"))new_arg=get_dna_defaults(NULL,lseq_type); - else if ( strm (special_mode, "cdna"))new_arg=get_dna_defaults(NULL,lseq_type); - else if ( strm (special_mode, "protein"))new_arg=get_low_memory_defaults(NULL,lseq_type); - else if ( strm (special_mode, "mcoffee"))new_arg=get_mcoffee_defaults(NULL,lseq_type); - else if ( strm (special_mode, "dmcoffee"))new_arg=get_dmcoffee_defaults(NULL,lseq_type); - else if ( strm (special_mode, "fmcoffee"))new_arg=get_fmcoffee_defaults(NULL,lseq_type); - - else if ( strm (special_mode, "rcoffee_consan"))new_arg=get_rcoffee_consan_defaults(NULL,lseq_type); - else if ( strm (special_mode, "rmcoffee") ||strm (special_mode, "mrcoffee") )new_arg=get_rmcoffee_defaults(NULL,lseq_type); - else if ( strm (special_mode, "rcoffee"))new_arg=get_rcoffee_defaults(NULL,lseq_type); - - else if ( strm (special_mode, "rcoffee_slow_accurate"))new_arg=get_rcoffee_consan_defaults(NULL,lseq_type); - else if ( strm (special_mode, "rcoffee_fast_approximate"))new_arg=get_rmcoffee_defaults(NULL,lseq_type); - else if ( strm (special_mode, "t_coffee"))new_arg=get_t_coffee_defaults(NULL,lseq_type); - - - else if ( strm (special_mode, "unspecified")); - else - { - fprintf ( stderr, "\nERROR: special_mode %s is unknown [FATAL:%s]\n",special_mode, PROGRAM); - myexit (EXIT_FAILURE); - } - - if (new_arg)argv=push_string (new_arg, argv, &argc, 1); - } - } - -if ( getenv ("TCOFFEE_EXTRA_PARAM"))argv=push_string (getenv ("TCOFFEE_EXTRA_PARAM"), argv, &argc, argc); - - -argv=break_list ( argv, &argc, "=;, \n"); -argv=merge_list ( argv, &argc); -/*check_cl4t_coffee ( argc, argv); */ - - -/*PARAMETER PROTOTYPE: VERSION */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-version" ,\ - /*Flag*/ &do_version ,\ - /*TYPE*/ "FL" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "forces the program to output the version number and exit" ,\ - /*Parameter*/ &do_version ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "1" \ - ); - - -/*PARAMETER PROTOTYPE: DO EVALUATE */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-score" ,\ - /*Flag*/ &do_evaluate ,\ - /*TYPE*/ "FL" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "DEPRECATED: use -special_mode evaluate instead " ,\ - /*Parameter*/ &do_evaluate ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "1" \ - ); -if ( !do_evaluate) - { -/*PARAMETER PROTOTYPE: DO EVALUATE */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-evaluate" ,\ - /*Flag*/ &do_evaluate ,\ - /*TYPE*/ "FL" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Use -special_mode evaluate for a default behavior " ,\ - /*Parameter*/ &do_evaluate ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "1" \ - ); - } -/*PARAMETER PROTOTYPE: DO FORMAT */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-convert" ,\ - /*Flag*/ &do_convert ,\ - /*TYPE*/ "FL" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "forces the program to make a conversion" ,\ - /*Parameter*/ &do_convert ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "1" \ - ); - - -/*PARAMETER PROTOTYPE*/ - - declare_name (se_name); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-quiet" ,\ - /*Flag*/ &quiet ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Defines the file in which the log output is written" ,\ - /*Parameter*/ &se_name ,\ - /*Def 1*/ "stderr" ,\ - /*Def 2*/ "/dev/null" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - if (type_only==1)sprintf ( se_name, "/dev/null"); - - /*PARAMETER PROTOTYPE: DO FORMAT */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-check_configuration" ,\ - /*Flag*/ &check_configuration ,\ - /*TYPE*/ "FL" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "checks that the required programs are installed" ,\ - /*Parameter*/ &check_configuration ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "1" \ - ); - /*PARAMETER PROTOTYPE: UPDATE */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-update" ,\ - /*Flag*/ &update,\ - /*TYPE*/ "FL" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "checks the existence of an updated version" ,\ - /*Parameter*/ &update ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "1" \ - ); - - - - if ( check_configuration) - { - - check_configuration4program(); - return EXIT_SUCCESS; - - } - if ( update) - { - myexit (check_for_update(DISTRIBUTION_ADDRESS)); - } - if ( do_version) - { - fprintf ( stdout, "PROGRAM: %s (%s)\n",PROGRAM,VERSION); - return EXIT_SUCCESS; - } - - - le=vfopen ( se_name, "w"); - fprintf ( le, "\nPROGRAM: %s (%s)\n",PROGRAM,VERSION); - -/*PARAMETER PROTOTYPE: RUN NAME*/ - declare_name (full_log); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-full_log" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Sets the prefix of all the output files" ,\ - /*Parameter*/ &full_log ,\ - /*Def 1*/ "NULL" ,\ - /*Def 2*/ "full_log" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - vremove(full_log); -/*PARAMETER PROTOTYPE: RUN NAME*/ - declare_name (run_name); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-run_name" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Sets the prefix of all the output files" ,\ - /*Parameter*/ &run_name ,\ - /*Def 1*/ "NULL" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: MEM MODE*/ - declare_name(mem_mode); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-mem_mode" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Deprecated" ,\ - /*Parameter*/ &mem_mode ,\ - /*Def 1*/ "mem" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: EXTEND */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-extend" ,\ - /*Flag*/ &do_extend ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Do Library Extention On the Fly" ,\ - /*Parameter*/ &do_extend ,\ - /*Def 1*/ "1" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: EXTEND */ - declare_name (extend_mode); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-extend_mode" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Library extension mode" ,\ - /*Parameter*/ &extend_mode ,\ - /*Def 1*/ "very_fast_triplet" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - /*PARAMETER PROTOTYPE: EXTEND */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-max_n_pair" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Indicates the Number of Pairs to Compare when making prf Vs prf. 0<=>every pair " ,\ - /*Parameter*/ &max_n_pair ,\ - /*Def 1*/ "10" ,\ - /*Def 2*/ "3" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: SEQUENCES TO EXTEND */ - seq_name_for_quadruplet=declare_char ( 200, STRING); - nseq_for_quadruplet=get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-seq_name_for_quadruplet" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 200 ,\ - /*DOC*/ "Indicates which sequence must be used to compute quadruplets" ,\ - /*Parameter*/ seq_name_for_quadruplet ,\ - /*Def 1*/ "all",\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: COMPACT */ - declare_name (compact_mode); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-compact" ,\ - /*Flag*/ &do_compact ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Deprecated" ,\ - /*Parameter*/ &compact_mode ,\ - /*Def 1*/ "default" ,\ - /*Def 2*/ "default" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "1" \ - ); - - -/*PARAMETER PROTOTYPE: CLEAN*/ - declare_name ( clean_mode); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-clean" ,\ - /*Flag*/ &do_clean ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Deprecated" ,\ - /*Parameter*/ &clean_mode ,\ - /*Def 1*/ "no" ,\ - /*Def 2*/ "shadow" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: DO SELF */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-do_self" ,\ - /*Flag*/ &do_self ,\ - /*TYPE*/ "FL" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 0 ,\ - /*DOC*/ "Make self extension. Used by Mocca" ,\ - /*Parameter*/ &do_self ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "1" \ - ); - -/*PARAMETER PROTOTYPE: DO NORMALISE */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-do_normalise" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Normalisation factor when computing scores" ,\ - /*Parameter*/ &do_normalise ,\ - /*Def 1*/ "1000" ,\ - /*Def 2*/ "1000" ,\ - /*Min_value*/ "-10000" ,\ - /*Max Value*/ "10000" \ - ); -/*PARAMETER PROTOTYPE: IN */ - template_file_list=declare_char (100, STRING); - n_template_file=get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-template_file" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1000 ,\ - /*DOC*/ "List of templates file for the sequences",\ - /*Parameter*/ template_file_list , \ - /*Def 1*/ "",\ - /*Def 2*/ "stdin" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: IN */ - template_mode_list=declare_char (100, STRING); - n_template_mode=get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-template_mode" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1000 ,\ - /*DOC*/ "List of template procedures",\ - /*Parameter*/ template_mode_list , \ - /*Def 1*/ "",\ - /*Def 2*/ "stdin" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - for (a=0; a_sim1, _sim2, _sim3, _cov, _gap" ,\ - /*Parameter*/ &distance_matrix_sim_mode ,\ - /*Def 1*/ "idmat_sim1" ,\ - /*Def 2*/ "idmat_sim1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: OUT_LIB */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-quicktree" ,\ - /*Flag*/ &quicktree ,\ - /*TYPE*/ "FL" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 0 ,\ - /*DOC*/ "Use distance_matrix_mode=very_fast" ,\ - /*Parameter*/ &quicktree ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - if ( quicktree)sprintf ( distance_matrix_mode, "very_fast"); -/*PARAMETER PROTOTYPE: OUTFILE */ - declare_name ( out_aln); - tot_out_aln=declare_char (200, STRING); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-outfile" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Name of the output alignment" ,\ - /*Parameter*/ &out_aln ,\ - /*Def 1*/ "default" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: MAXIMISE */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-maximise" ,\ - /*Flag*/ &maximise ,\ - /*TYPE*/ "FL" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 0 ,\ - /*DOC*/ "Deprecated" ,\ - /*Parameter*/ &maximise ,\ - /*Def 1*/ "1" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: OUTPUT_FORMAT */ - out_aln_format=declare_char ( 200, STRING); - n_out_aln_format=get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-output" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 200 ,\ - /*DOC*/ "Specifies one or many formats that must be output: clustalw_aln, msf_aln. The file extension is the output format" ,\ - /*Parameter*/ out_aln_format,\ - /*Def 1*/ "aln,html" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: INFILE */ - declare_name (infile); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-infile" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "R_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "input a pre-computed alignment, or a file to reformat" ,\ - /*Parameter*/ &infile ,\ - /*Def 1*/ "" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: INFILE */ - declare_name (matrix); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-matrix" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Specifies the substitution matrix.",\ - /*Parameter*/ &matrix ,\ - /*Def 1*/ "default" ,\ - /*Def 2*/ "default" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: TG_MODE */ - - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-tg_mode" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "0: Penalise Term gap with gapopen and gapext\n1: gapopen only\n2: No penalty\n",\ - /*Parameter*/ &tg_mode ,\ - /*Def 1*/ "1",\ - /*Def 2*/ "0",\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: DP_MODE */ - declare_name (profile_mode); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-profile_mode" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Function used to compute profile2profile scores",\ - /*Parameter*/ &profile_mode ,\ - /*Def 1*/ "cw_profile_profile",\ - /*Def 2*/ "cw_profile_profile",\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - - declare_name (profile_comparison); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-profile_comparison" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Method used to compare two profiles: full: compares pair of sequence and every pair of structure if a structure method is used,profile: compares only the profiles. ",\ - /*Parameter*/ &profile_comparison ,\ - /*Def 1*/ "profile",\ - /*Def 2*/ "full50",\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: DP_MODE */ - declare_name (dp_mode); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-dp_mode" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Type of alignment algorithm used by T-Coffee: gotoh_pair_wise, myers_millers_pair_wise, " ,\ - /*Parameter*/ &dp_mode ,\ - /*Def 1*/ "linked_pair_wise",\ - /*Def 2*/ "cfasta_pair_wise",\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: KTUP */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-ktuple" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Word size when using the heursitic dynamic programming modes fasta_pair_wise and cfasta_pair_wise " ,\ - /*Parameter*/ &ktup ,\ - /*Def 1*/ "1",\ - /*Def 2*/ "1",\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: FASTA_STEP */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-ndiag" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Number of diagonals to consider when using the heursitic dynamic programming modes fasta_pair_wise and cfasta_pair_wise" ,\ - /*Parameter*/ &fasta_step ,\ - /*Def 1*/ "0",\ - /*Def 2*/ "10",\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: FASTA_STEP */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-diag_threshold" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/ &diag_threshold ,\ - /*Def 1*/ "0",\ - /*Def 2*/ "10",\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: diag_mode */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-diag_mode" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "0: Use the whole Diag\n1: Use the best match\n" ,\ - /*Parameter*/ &diag_mode ,\ - /*Def 1*/ "0",\ - /*Def 2*/ "1", - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: SIM_MATRIX */ - declare_name (sim_matrix); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-sim_matrix" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Degenerated matrix used to compute a similarity" ,\ - /*Parameter*/ &sim_matrix ,\ - /*Def 1*/ "vasiliky",\ - /*Def 2*/ "idmat",\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: INFILE */ - declare_name (transform); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-transform" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "dna2rna, rna2dna, dna2prot", \ - /*Parameter*/ &transform ,\ - /*Def 1*/ "" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: INFILE */ - declare_name (outorder); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-outorder" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Specifies the order of the sequences in the msa: input or aligned" ,\ - /*Parameter*/ &outorder ,\ - /*Def 1*/ "input" ,\ - /*Def 2*/ "input" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: INFILE */ - declare_name (inorder); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-inorder" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "aligned: sort the sequences in alphabetic order before starting thus making the input order irrelevant but delivering a library in arbitratry order, keep: input order is used in the library but results become input order dependant" ,\ - /*Parameter*/ &inorder ,\ - /*Def 1*/ "aligned" ,\ - /*Def 2*/ "input" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: INFILE */ - declare_name (output_res_num); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-seqnos" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Adds Residue Numbers to the MSA" ,\ - /*Parameter*/ &output_res_num ,\ - /*Def 1*/ "off" ,\ - /*Def 2*/ "on" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: INFILE */ - declare_name (residue_case); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-case" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Causes the case to be: kept:lower:upper." ,\ - /*Parameter*/ &residue_case ,\ - /*Def 1*/ "keep" ,\ - /*Def 2*/ "upper" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: CPU */ - - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-cpu" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Makes it possible to add a pre-specified amount of cpu time to the measured usage" ,\ - /*Parameter*/ &extra_cpu ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "0" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: MAXNSEQ */ - - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-maxnseq" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Maximum number of sequences (-1=no max)" ,\ - /*Parameter*/ &maxnseq ,\ - /*Def 1*/ "1000" ,\ - /*Def 2*/ "0" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: MAXLEN */ - - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-maxlen" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Maximum length of a sequence (-1=no max)" ,\ - /*Parameter*/ &maxlen ,\ - /*Def 1*/ "-1" ,\ - /*Def 2*/ "-1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - - -/*PARAMETER PROTOTYPE: WEIGHT */ - declare_name ( weight); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-weight" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Defines the library weight: sim OR sim_(matrix) OR winsim" ,\ - /*Parameter*/ &weight ,\ - /*Def 1*/ "default" ,\ - /*Def 2*/ "sim" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); /*PARAMETER PROTOTYPE: WEIGHT */ - declare_name ( seq_weight); - - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-seq_weight" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Defines the sequences weighting scheme t_coffee" ,\ - /*Parameter*/ &seq_weight ,\ - /*Def 1*/ "t_coffee" ,\ - /*Def 2*/ "t_coffee" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: DO ALIGN */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-align" ,\ - /*Flag*/ &do_align ,\ - /*TYPE*/ "FL" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 0 ,\ - /*DOC*/ "forces the program to make the alignment" ,\ - /*Parameter*/ &do_align ,\ - /*Def 1*/ "1" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: DO DOMAIN */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-mocca" ,\ - /*Flag*/ &do_domain ,\ - /*TYPE*/ "FL" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 0 ,\ - /*DOC*/ "forces the program to extract domains" ,\ - /*Parameter*/ &do_domain ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - if ( !do_domain) - { -/*PARAMETER PROTOTYPE: DO DOMAIN */ - get_cl_param( \ - /*argc*/ argc , \ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-domain" ,\ - /*Flag*/ &do_domain ,\ - /*TYPE*/ "FL" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 0 ,\ - /*DOC*/ "forces the program to extract domains" ,\ - /*Parameter*/ &do_domain ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - } -/*PARAMETER PROTOTYPE: Domain Param */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-start" ,\ - /*Flag*/ &domain_start ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "start of the master domain in the mocca mode" ,\ - /*Parameter*/ &domain_start ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-len" ,\ - /*Flag*/ &domain_len ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "length of the master domain in the mocca mode" ,\ - /*Parameter*/ &domain_len ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-scale" ,\ - /*Flag*/ &domain_scale ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Decreases the t_coffee score by Scale, so that non match get negative values" ,\ - /*Parameter*/ &domain_scale ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-mocca_interactive" ,\ - /*Flag*/ &domain_interactive ,\ - /*TYPE*/ "FL" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 0 ,\ - /*DOC*/ "Runs Mocca in an interactive manneer" ,\ - /*Parameter*/ &domain_interactive,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: WEIGHT */ - declare_name (method_evaluate_mode); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-method_evaluate_mode" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Specifies which method should be used to evaluate the score at the pairwise level" ,\ - /*Parameter*/ &method_evaluate_mode ,\ - /*Def 1*/ "default" ,\ - /*Def 2*/ "default" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - /*PARAMETER PROTOTYPE: WEIGHT */ - declare_name (evaluate_mode); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-evaluate_mode" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Mode used to produce the color output:t_coffee_fast,t_coffee_slow " ,\ - /*Parameter*/ &evaluate_mode ,\ - /*Def 1*/ "t_coffee_fast" ,\ - /*Def 2*/ "dali" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-get_type" ,\ - /*Flag*/ &get_type ,\ - /*TYPE*/ "FL" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "forces t_coffee top get the type of the sequences" ,\ - /*Parameter*/ &get_type ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "1" \ - ); - -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-clean_aln" ,\ - /*Flag*/ &clean_aln ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Forces weak portion of aln to be realigned" ,\ - /*Parameter*/ &clean_aln ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "1" \ - ); -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-clean_threshold" ,\ - /*Flag*/ &clean_threshold ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Threshold for the portions of the MSA that will are realigned by '-clean_evaluate_mode'. The threshold refers to the CORE score set by '-evaluate_mode'" ,\ - /*Parameter*/ &clean_threshold ,\ - /*Def 1*/ "1" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-clean_iteration" ,\ - /*Flag*/ &clean_iteration ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Number of rounds for '-clean_aln'" ,\ - /*Parameter*/ &clean_iteration ,\ - /*Def 1*/ "1" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -declare_name (clean_evaluate_mode); -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-clean_evaluate_mode" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Mode used to score residues (see evaluate_mode)" ,\ - /*Parameter*/ &clean_evaluate_mode ,\ - /*Def 1*/ "t_coffee_fast" ,\ - /*Def 2*/ "t_coffee_fast" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -/*PARAMETER PROTOTYPE: DO EXTENDED MATRIX */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-extend_matrix" ,\ - /*Flag*/ &do_extended_matrix ,\ - /*TYPE*/ "FL" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 0 ,\ - /*DOC*/ "Deprecated" ,\ - /*Parameter*/ &do_extended_matrix ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-prot_min_sim" ,\ - /*Flag*/ &prot_min_sim ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Minimum similarity between a sequence and its PDB target" ,\ - /*Parameter*/ &prot_min_sim ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "20" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - set_int_variable ("prot_min_sim", prot_min_sim); - -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-prot_max_sim" ,\ - /*Flag*/ &prot_max_sim ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Maximum similarity between a sequence and its BLAST relatives" ,\ - /*Parameter*/ &prot_max_sim ,\ - /*Def 1*/ "90" ,\ - /*Def 2*/ "100" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - set_int_variable ("prot_max_sim", prot_max_sim); - -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-prot_min_cov" ,\ - /*Flag*/ &prot_min_cov ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Minimum coverage of a sequence by its BLAST relatives" ,\ - /*Parameter*/ &prot_min_cov ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "0" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -set_int_variable ("prot_min_cov", prot_min_cov); - -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-pdb_min_sim" ,\ - /*Flag*/ &pdb_min_sim ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Minimum similarity between a sequence and its PDB target" ,\ - /*Parameter*/ &pdb_min_sim ,\ - /*Def 1*/ "35" ,\ - /*Def 2*/ "35" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - - set_int_variable ("pdb_min_sim", pdb_min_sim); - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-pdb_max_sim" ,\ - /*Flag*/ &pdb_max_sim ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Maximum similarity between a sequence and its PDB target" ,\ - /*Parameter*/ &pdb_max_sim ,\ - /*Def 1*/ "100" ,\ - /*Def 2*/ "0" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - set_int_variable ("pdb_max_sim", pdb_max_sim); - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-pdb_min_cov" ,\ - /*Flag*/ &pdb_min_cov ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Minimum coverage of a sequence by its PDB target" ,\ - /*Parameter*/ &pdb_min_cov ,\ - /*Def 1*/ "50" ,\ - /*Def 2*/ "25" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -set_int_variable ("pdb_min_cov", pdb_min_cov); - - - -declare_name (pdb_blast_server); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-pdb_blast_server" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/&pdb_blast_server ,\ - /*Def 1*/ "EBI" ,\ - /*Def 2*/ "default" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -declare_name (prot_blast_server); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-blast" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/&prot_blast_server ,\ - /*Def 1*/ "" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - //make sure that -blast and -blast_server are both supported blast>blast_server - if ( !prot_blast_server[0]) - { - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-blast_server" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/&prot_blast_server ,\ - /*Def 1*/ "EBI" ,\ - /*Def 2*/ "default" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - } - // HERE ("%s", blast_server); - if ( strm (prot_blast_server, "env"))prot_blast_server=get_env_variable ("blast_server_4_TCOFFEE",IS_FATAL); - set_string_variable ("blast_server", prot_blast_server); - - - - declare_name (pdb_db); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-pdb_db" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Non Redundant PDB database" ,\ - /*Parameter*/&pdb_db ,\ - /*Def 1*/ "pdb" ,\ - /*Def 2*/ "default" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - if ( strm (pdb_db, "env"))pdb_db=get_env_variable ("pdb_db_4_TCOFFEE", IS_FATAL); - set_string_variable ("pdb_db", pdb_db); - - -declare_name (prot_db); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-protein_db" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/&prot_db ,\ - /*Def 1*/ "uniprot" ,\ - /*Def 2*/ "default" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - if ( strm (prot_db, "env"))prot_db=get_env_variable ("protein_db_4_TCOFFEE", IS_FATAL); - set_string_variable ("prot_db", prot_db); - - declare_name (method_log); - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-method_log" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "ND" ,\ - /*Parameter*/&method_log ,\ - /*Def 1*/ "no" ,\ - /*Def 2*/ "default" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: IN */ - struc_to_use=declare_char ( 200, STRING); - n_struc_to_use=get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-struc_to_use" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 200 ,\ - /*DOC*/ "Specifies the structures that must be used when combining sequences and structures. The default is to use all the structures." ,\ - /*Parameter*/ struc_to_use ,\ - /*Def 1*/ "",\ - /*Def 2*/ "stdin" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -declare_name (cache); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-cache" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Specifies that a cache must be used to save the structures and their comparison, as well as the blast searches.\navailable modes are: use,ignore,update,local, directory name" ,\ - /*Parameter*/ &cache ,\ - /*Def 1*/ "use" ,\ - /*Def 2*/ "update" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -declare_name (align_pdb_param_file); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-align_pdb_param_file" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "parameter_file" ,\ - /*Parameter*/ &align_pdb_param_file ,\ - /*Def 1*/ "no" ,\ - /*Def 2*/ "no" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -declare_name (align_pdb_hasch_mode); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-align_pdb_hasch_mode" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "W_F" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "parameter_file" ,\ - /*Parameter*/ &align_pdb_hasch_mode ,\ - /*Def 1*/ "hasch_ca_trace_bubble" ,\ - /*Def 2*/ "hasch_ca_trace_bubble" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -declare_name (use_seqan); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-external_aligner" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Use seqan to compute the MSA",\ - /*Parameter*/ &use_seqan ,\ - /*Def 1*/ "NO" ,\ - /*Def 2*/ "seqan_tcoffee" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -declare_name (msa_mode); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-msa_mode" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Algorithm used to compute the MSA: tree | graph" ,\ - /*Parameter*/ &msa_mode ,\ - /*Def 1*/ "tree" ,\ - /*Def 2*/ "tree" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -declare_name (one2all); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-one2all" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Align all the sequences to the master sequence" ,\ - /*Parameter*/ &one2all ,\ - /*Def 1*/ "NULL" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -declare_name (subset2all); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-subset2all" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Align all the sequences to the master sequence" ,\ - /*Parameter*/ &subset2all ,\ - /*Def 1*/ "NULL" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-lalign_n_top" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Number of local alignments reported by the local method (lalign) when building the library" ,\ - /*Parameter*/ &lalign_n_top ,\ - /*Def 1*/ "10" ,\ - /*Def 2*/ "10" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-iterate" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "NUmber of iteration on the progressive alignment [0: no iteration, -1: Nseq iterations]",\ - /*Parameter*/ &iterate ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "100" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-trim" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "trim dataset",\ - /*Parameter*/ &trim ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-split" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "split dataset",\ - /*Parameter*/ &split ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -declare_name(trimfile); -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-trimfile" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "trim dataset filename",\ - /*Parameter*/ &trimfile ,\ - /*Def 1*/ "default" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-split" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "split dataset",\ - /*Parameter*/ &split ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - -if (trim && !split)split=trim; - -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-split_nseq_thres" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Maximum Number of sequences within a subgroup",\ - /*Parameter*/ &split_nseq_thres ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-split_score_thres" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Minimum score within a split dataset",\ - /*Parameter*/ &split_score_thres ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-check_pdb_status" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Reports the existance of a PDB file",\ - /*Parameter*/ &check_pdb_status ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-clean_seq_name" ,\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Remove Special Char from sequence names",\ - /*Parameter*/ &clean_seq_name ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - - -/*PARAMETER PROTOTYPE: SEQ TO ALIGN */ - seq_to_keep=declare_char ( 2000, STRING); - n_seq_to_keep=get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-seq_to_keep",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 200 ,\ - /*DOC*/ "File containing the name of the sequences to keep when triming OR a list of names)",\ - /*Parameter*/ seq_to_keep ,\ - /*Def 1*/ "NULL" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*******************************************************************************************************/ -/* */ -/* TCoffee_dpa Parameter:START */ -/* */ -/*******************************************************************************************************/ -/*PARAMETER PROTOTYPE: dpa_master_aln */ - declare_name (dpa_master_aln); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-dpa_master_aln",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Approximate Alignment: File|method",\ - /*Parameter*/ &dpa_master_aln ,\ - /*Def 1*/ "" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - /*PARAMETER PROTOTYPE: dpa_maxnseq */ - - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-dpa_maxnseq",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Maximum number of sequences to be aligned with DPA",\ - /*Parameter*/ &dpa_maxnseq ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "50" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); -/*PARAMETER PROTOTYPE: dpa_min_score1 */ - - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-dpa_min_score1",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 200 ,\ - /*DOC*/ "Minimum percent ID to merge sequences in the approximate alignment",\ - /*Parameter*/ &dpa_min_score1 ,\ - /*Def 1*/ "" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "100" \ - ); -/*PARAMETER PROTOTYPE: dpa_min_score2 */ - - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-dpa_min_score2",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 200 ,\ - /*DOC*/ "Threshold for aligning a group in the slow double progressive alignment (automatically readjusted)",\ - /*Parameter*/ &dpa_min_score2 ,\ - /*Def 1*/ "" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "100" \ - ); -/*PARAMETER PROTOTYPE: dpa_keep_tmp_file */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-dpa_keep_tmpfile" ,\ - /*Flag*/ &dpa_keep_tmpfile ,\ - /*TYPE*/ "FL" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Prevents deletion of the tmpfile generated by t_coffee_dpa",\ - /*Parameter*/ &do_version ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "1" \ - - ); -/*PARAMETER PROTOTYPE: dpa_debug */ - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-dpa_debug" ,\ - /*Flag*/ &dpa_debug ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "DEbug mode for DPA ( causes dpa tmp files to be kept)",\ - /*Parameter*/ &do_version ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "1" \ - - ); - -/*PARAMETER PROTOTYPE: multi_core */ - declare_name (multi_core); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-multi_core",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Multi core: template_jobs_relax_msa",\ - /*Parameter*/ &multi_core ,\ - /*Def 1*/ "templates_jobs_relax_msa" ,\ - /*Def 2*/ "templates_jobs_relax_msa" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "100" \ - ); - if (multi_core[0])set_string_variable ("multi_core",multi_core); -/*PARAMETER PROTOTYPE: multi_core */ - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-n_core",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Number of cores to be used by machine [default=0 => all those defined in the environement]",\ - /*Parameter*/ &n_core ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "0" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "100" \ - ); - if (n_core)set_int_variable ("n_core",n_core); - - -/*PARAMETER PROTOTYPE: lib_list */ - declare_name (lib_list); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-lib_list",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "A File that contains every pair/group of sequence to process when computing the lib, Format: ",\ - /*Parameter*/ &lib_list ,\ - /*Def 1*/ "" ,\ - /*Def 2*/ "default" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "100" \ - ); - - /*PARAMETER PROTOTYPE: lib_list */ - declare_name (prune_lib_mode); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-prune_lib_mode",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "A File that contains every pair/group of sequence to process when computing the lib, Format: ",\ - /*Parameter*/ &prune_lib_mode ,\ - /*Def 1*/ "5" ,\ - /*Def 2*/ "5" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "100" \ - ); - set_string_variable ("prune_lib_mode",prune_lib_mode); - - /*PARAMETER PROTOTYPE: multi_thread */ - declare_name (tip); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-tip",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Controls The Output of A TIP When Computation is over [one,all,none]",\ - /*Parameter*/ &tip ,\ - /*Def 1*/ "one" ,\ - /*Def 2*/ "all" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - /*PARAMETER PROTOTYPE: RNA LIB */ - declare_name (rna_lib); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-rna_lib",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "",\ - /*Parameter*/ &rna_lib ,\ - /*Def 1*/ "" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-no_warning",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Suppresses all Warnings",\ - /*Parameter*/ &no_warning ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "0" ,\ - /*Max Value*/ "1" \ - ); - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv , \ - /*output*/ &le ,\ - /*Name*/ "-run_local_script",\ - /*Flag*/ &garbage , \ - /*TYPE*/ "D" , \ - /*OPTIONAL?*/ OPTIONAL , \ - /*MAX Nval*/ 1 , \ - /*DOC*/ "Run Local Script if in current directory", \ - /*Parameter*/ &run_local_script , \ - /*Def 1*/ "0" , \ - /*Def 2*/ "1" , \ - /*Min_value*/ "0" , \ - /*Max Value*/ "1" \ - ); - set_int_variable ("run_local_script", run_local_script); - declare_name (plugins); - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv , \ - /*output*/ &le ,\ - /*Name*/ "-plugins",\ - /*Flag*/ &garbage , \ - /*TYPE*/ "S" , \ - /*OPTIONAL?*/ OPTIONAL , \ - /*MAX Nval*/ 1 , \ - /*DOC*/ "Set the directory containing the plugins", \ - /*Parameter*/ &plugins , \ - /*Def 1*/ "default" , \ - /*Def 2*/ "" , \ - /*Min_value*/ "any" , \ - /*Max Value*/ "any" \ - ); - if ( !strm (plugins, "default")) - { - set_path_4_plugins (plugins); - } - - - declare_name (proxy); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-proxy",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "proxy used to access to webservices, when required",\ - /*Parameter*/ &proxy ,\ - /*Def 1*/ "unset" ,\ - /*Def 2*/ " " ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - if ( !strm (proxy, "unset"))set_string_variable ("cl_proxy",proxy); - declare_name (email); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-email",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "email provided to webservices, when required",\ - /*Parameter*/ &email ,\ - /*Def 1*/ "" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - if ( strstr (email, "@")) - { - set_string_variable ("email", email); - set_string_variable ("cl_email", email); - } - - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-clean_overaln",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Ratio between overaligned exon id Vs legitimates *100",\ - /*Parameter*/ &clean_overaln ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "1" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - overaln_param=declare_char ( 10, STRING); - n_overaln_param=get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-overaln_param",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 10 ,\ - /*DOC*/ "Parameters for the overaln",\ - /*Parameter*/ overaln_param ,\ - /*Def 1*/ "NULL" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - declare_name (overaln_mode); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-overaln_mode",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "lower || uanlaign",\ - /*Parameter*/ &overaln_mode ,\ - /*Def 1*/ "" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - if (overaln_mode[0])set_string_variable ("overaln_mode", overaln_mode); - declare_name (overaln_model); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-overaln_model",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "fsa1 (no exon boundaries), fsa2 (exon boundaries)",\ - /*Parameter*/ &overaln_model ,\ - /*Def 1*/ "" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - if (overaln_mode[0])set_string_variable ("overaln_model", overaln_model); - - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-overaln_threshold",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Ratio between overaligned exon id Vs legitimates *100",\ - /*Parameter*/ &overaln_threshold ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - set_int_variable ("overaln_threshold", overaln_threshold); - - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-overaln_target",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Ratio between overaligned exon id Vs legitimates *100",\ - /*Parameter*/ &overaln_target ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - set_int_variable ("overaln_target", overaln_threshold); - - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-overaln_P1",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Ratio between overaligned exon id Vs legitimates *100",\ - /*Parameter*/ &overaln_P1 ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - if (overaln_P1)set_int_variable ("overaln_P1", overaln_P1); - - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-overaln_P2",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Ratio between overaligned exon id Vs legitimates *100",\ - /*Parameter*/ &overaln_P2 ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - if (overaln_P2)set_int_variable ("overaln_P2", overaln_P2); - - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-overaln_P3",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Ratio between overaligned exon id Vs legitimates *100",\ - /*Parameter*/ &overaln_P3 ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - if (overaln_P3)set_int_variable ("overaln_P3", overaln_P3); - - get_cl_param( \ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-overaln_P4",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "D" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "Ratio between overaligned exon id Vs legitimates *100",\ - /*Parameter*/ &overaln_P4 ,\ - /*Def 1*/ "0" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - if (overaln_P4)set_int_variable ("overaln_P4", overaln_P4); - - - declare_name (exon_boundaries); - get_cl_param(\ - /*argc*/ argc ,\ - /*argv*/ argv ,\ - /*output*/ &le ,\ - /*Name*/ "-exon_boundaries",\ - /*Flag*/ &garbage ,\ - /*TYPE*/ "S" ,\ - /*OPTIONAL?*/ OPTIONAL ,\ - /*MAX Nval*/ 1 ,\ - /*DOC*/ "exon_boundaries [EBI boj format]",\ - /*Parameter*/ &exon_boundaries ,\ - /*Def 1*/ "" ,\ - /*Def 2*/ "" ,\ - /*Min_value*/ "any" ,\ - /*Max Value*/ "any" \ - ); - if ( exon_boundaries[0])set_string_variable ("exon_boundaries", exon_boundaries); - - - - -/*******************************************************************************************************/ -/* */ -/* Standard Initialization:END */ -/* */ -/*******************************************************************************************************/ - standard_initialisation_end ( argv, &argc); -/*******************************************************************************************************/ -/* */ -/* TCoffee_dpa Parameter:END */ -/* */ -/*******************************************************************************************************/ - - - - if (argc==1 || name_is_in_list("-help", argv, argc, STRING)!=-1 ) - { - display_method_names ("display", stdout); - return EXIT_SUCCESS; - } - get_cl_param( argc, argv,&le, NULL,NULL,NULL,0,0,NULL); - prepare_cache (cache); -/*******************************************************************************************************/ -/* */ -/* FILL list_file (contains seq, aln and meth) */ -/* */ -/*******************************************************************************************************/ - - - -/*Re-introduce the sequences introduced with -infile*/ -/*Standard*/ - - if ( infile[0] && !do_evaluate) - { - sprintf ( list_file[n_list++], "%s",infile); - } -/*DO EVALUATE: The aln to evaluate must be provided via -infile*/ - else if (do_evaluate) - { - if (!infile[0] || !(main_read_aln ( infile, NULL))) - { - fprintf ( stderr,"\nERROR: When using -evaluate, Provide a multiple sequence alignment via the -infile flag [FATAL:%s]\n", PROGRAM); - myexit (EXIT_FAILURE); - } - else if (! main_read_aln ( infile,NULL)) - { - fprintf ( stderr,"\nERROR: FILE %s is NOT a valid alignment [FATAL:%s]\n", infile, PROGRAM); - myexit (EXIT_FAILURE); - } - else if ( infile[0]=='A' ||infile[0]=='S') - { - sprintf ( list_file[n_list++], "S%s",infile+1); - } - else sprintf ( list_file[n_list++], "S%s",infile); - } - - - -/*Make Sure -infile is set*/ - if (!infile[0]&& (do_evaluate || do_convert)) - { - - if ( do_evaluate || do_convert)sprintf ( infile, "%s",seq_list[0]); - } - -/*EXPAND -in*/ - /*Introduce the sequences from the -profile flag*/ - if ( profile1 && profile1[0]) - { - sprintf ( list_file[n_list++], "R%s",profile1); - } - if ( profile2 && profile2[0]) - { - sprintf ( list_file[n_list++], "R%s",profile2); - } - - for ( a=0; a< n_profile_list; a++) - { - FILE *fp; - if ( (fp=find_token_in_file (profile_list[a], NULL, "FILE_LIST"))!=NULL) - { - int z; - char rname[1000]; - vfclose (fp); - fp=vfopen (profile_list[a], "r"); - - while ( (z=fgetc(fp))!=EOF) - { - ungetc(z, fp); - fscanf (fp, "%s\n", rname); - if ( check_file_exists(rname))sprintf ( list_file[n_list++], "R%s", rname); - } - vfclose (fp); - } - else if (format_is_conc_aln (profile_list[a])) - { - Alignment *P; - char *cname; - - P=input_conc_aln (profile_list[a],NULL); - while (P) - { - cname=vtmpnam (NULL); - output_fasta_aln (cname, P); - P=P->A; - sprintf ( list_file[n_list++], "R%s",cname); - } - free_aln (P); - } - - else - { - sprintf ( list_file[n_list++], "R%s",profile_list[a]); - } - } - /*Introduce the sequences from the -seq flag*/ - for (a=0; anseq); - free_aln (ExA); - } - /*FETCH THE STRUCTURES INTRODUCED WITH -pdb and add them to -in*/ - if ( n_pdb) - { - for ( a=0; a< n_pdb; a++) - { - if ( is_number (pdb_list[a])); - else - { - pdb_start=pdb_end=0; - if ( a+1< n_pdb && is_number (pdb_list[a+1]))pdb_start=atoi (pdb_list[a+1]); - if ( a+2< n_pdb && is_number (pdb_list[a+2]))pdb_end=atoi (pdb_list[a+2]); - - pdb_name=get_pdb_struc ( pdb_list[a],pdb_start, pdb_end); - if (pdb_name){sprintf (list_file[n_list++], "P%s", pdb_name);} - /*Warning: do not free pdb_name: it is statically allocated by get_pdb_struc*/ - } - } - } - - /*Check That Enough Methods/Libraries/Alignments Have been Chiped in*/ - - if (list_file) - { - int *nn; - nn=vcalloc ( 256, sizeof (int)); - for (a=0; alocal_stderr, "\nWARNING: File %s was not properly tag. Potential ambiguity\n",list_file[a]); - } - } - - - if ( (nn['A']+nn['L']+nn['M'])==0) - { - sprintf ( list_file[n_list++], "Mproba_pair"); //new default - //sprintf ( list_file[n_list++], "Mlalign_id_pair"); - //sprintf ( list_file[n_list++], "Mslow_pair"); - } - vfree (nn); - } - -/*FILL THE F STRUCTURE (Contains Information for Output names For the defaults)*/ - if (n_list==0 || argc<=1) - { - fprintf ( stderr, "\nERROR: You have NOT provided enough arguments [FATAL:%s]", PROGRAM); - myexit (EXIT_FAILURE); - } - - - else if ( argv[1][0]!='-' && (check_file_exists( argv[1]) || check_file_exists(argv[1]+1))) - { - if (check_file_exists(argv[1]))F=parse_fname(argv[1]); - else if ( check_file_exists(argv[1]+1))F=parse_fname(argv[1]+1); - - } - else if (infile[0]) - { - - if ( check_file_exists (infile))F=parse_fname(infile); - else if (check_file_exists (infile+1))F =parse_fname(infile+1); - } - else - { - - for ( a=0; a< n_list; a++) - { - if (!is_method(list_file[a])) - { - - - if ( check_file_exists( list_file[a])){F=parse_fname(list_file[a]);break;} - else if ( is_in_set ( list_file[a][0], "ASLX") && check_file_exists( list_file[a]+1)){F=parse_fname(list_file[a]+1);break;} - else if ( is_in_set ( list_file[a][0], "R") && check_file_exists( list_file[a]+1)) - { - char lname[100]; - F=parse_fname(list_file[a]+1); - sprintf ( lname, "%s_1", F->name); - sprintf ( F->name, "%s", lname); - break; - } - - else if ( is_in_set ( list_file[a][0], "P") && is_pdb_struc (list_file[a]+1)) - { - F=parse_fname(is_pdb_struc (list_file[a]+1));break; - - } - } - } - - } - - - /*Get Structures*/ - for ( a=0; a< n_list; a++) - { - if ( list_file[a][0]=='P' && !check_file_exists(list_file[a])) - { - char buf[1000]; - sprintf(buf, "%s", list_file[a]+1); - sprintf(list_file[a], "P%s",is_pdb_struc (buf)); - } - } - - /*FATAL: NO SEQUENCES*/ - if (!F) - { - fprintf ( stderr, "\nERROR: You have not provided any sequence [FATAL:%s]\n",PROGRAM); - myexit (EXIT_FAILURE); - } - if (run_name)F=parse_fname(run_name); - else F->path[0]='\0'; - - - identify_list_format (list_file, n_list); - - - fprintf (le, "\nINPUT FILES\n"); - for ( a=0; a< n_list; a++) - { - fprintf (le, "\tInput File (%c) %s ",list_file[a][0],list_file[a]+1); - if ( list_file[a][0]=='A' || list_file[a][0]=='S' || list_file[a][0]=='P'|| list_file[a][0]=='R' ) - { - fprintf (le, " Format %s\n", f=identify_seq_format ( list_file[a]+1)); - - if (!f || f[0]=='\0') - { - fprintf ( stderr, "\nERROR: The format of %s is not supported[FATAL:%s]", list_file[a]+1,PROGRAM); - myexit (EXIT_FAILURE); - } - vfree (f); - } - else fprintf (le, "\n"); - } - - -/*CONVERT, ALIGN OR EVALUATE: CHOSE THE RIGHT VERB*/ - /*Set the Hierarchy of the verbs*/ - /*The first one decides...*/ - - - do_list=vcalloc ( 100, sizeof (int*)); - n_do=0; - do_list[n_do++]=&do_extended_matrix; - do_list[n_do++]=&do_convert; - do_list[n_do++]=&do_evaluate; - do_list[n_do++]=&do_domain; - do_list[n_do++]=&do_align; - - - for ( a=0; a< n_do; a++) - { - if ( do_list[a][0]) - { - for ( b=0; b< n_do; b++)if ( b!=a)do_list[b][0]=0; - break; - } - } - - - -/*SET THE DEFAULT NAMES*/ - if ( do_convert) - { - if ( strm (tree_file, "default"))sprintf ( tree_file, "no"); - } - - - - if ( do_evaluate) - { - sprintf ( out_lib, "no"); - sprintf ( tree_file, "no"); - clean_aln=0; - } - - - if ( F && strm ( tree_file, "default"))sprintf ( tree_file ,"%s%s.dnd",F->path ,F->name); - if ( F && strm ( ph_tree_file, "default"))sprintf ( ph_tree_file ,"%s%s.ph",F->path ,F->name); - - for (a=0; a< n_out_aln_format; a++) - { - if (is_out_format_list (out_aln_format[a])); - else - { - fprintf (stderr, "\n%s is not a valid format [FATAL:%s]\n", out_aln_format[a], PROGRAM); - myexit (EXIT_FAILURE); - } - } - - for (a=0; apath,F->name,out_aln_format[a]); - } - } - else - { - sprintf ( tot_out_aln[0], "%s", out_aln); - for (a=1; a< n_out_aln_format; a++) - sprintf ( tot_out_aln[a] ,"%s%s.%s", F->path ,out_aln, out_aln_format[a]); - } - - - - if ( F && strm ( out_lib , "default"))sprintf ( out_lib ,"%s%s.tc_lib",F->path , F->name); - - if ( type && type[0]) - { - if (strm2 (type,"Protein", "protein"))sprintf ( type, "PROTEIN"); - if (strm2 (type,"DNA", "dna"))sprintf ( type, "DNA"); - if (strm2 (type,"RNA", "rna"))sprintf ( type, "RNA"); - - } - - - if ( !use_tree && check_file_exists (tree_file))vremove (tree_file); - else if ( !use_tree || (use_tree && strm (use_tree, "default"))); - else sprintf ( tree_file, "%s", use_tree); - -/*******************************************************************************************************/ -/* */ -/* Input Sequences and Library */ -/* */ -/*******************************************************************************************************/ - - set_methods_limits (method_limits,n_method_limits,list_file, n_list, &maxnseq, &maxlen); - /*Set Global Values*/ - - - -/*START*/ - /*1 READ THE SEQUENCES*/ - S=read_seq_in_n_list (list_file, n_list, type,seq_source); - if ( check_type) - { - if (!strm (S->type, get_array_type (S->nseq, S->seq))) - { - fprintf ( stderr, "\nINCORRECT SEQUENCE TYPE (USE %s ONLY) [FATAL:%s]", S->type, PROGRAM); - myexit (EXIT_FAILURE); - } - } - - if (S->nseq<=1 && !do_domain) - { - printf_exit (EXIT_FAILURE,stderr, "\nERROR: Your Dataset Contains %d Sequence. For multiple alignments you need at least 2 sequences[FATAL:%s]", S->nseq,PROGRAM); - } - - store_seq_type (S->type); - - if ( type_only==1) - { - fprintf ( stdout, "%s", S->type); - return EXIT_SUCCESS; - } - /*Translate Sequences*/ - if ( transform && transform[0]) - { - S=transform_sequence (S, transform); - } - - /*Abort if the sequences are too long */ - if (maxlen!=-1 && S->max_len>maxlen) - { - fprintf ( stderr, "\nSEQUENCES TOO LONG [Longuest=%d][MAX=%d][FATAL:%s]\n", S->max_len,maxlen, PROGRAM); - myexit (EXIT_FAILURE); - - } - - if (dpa) - { - list_file=list_file2dpa_list_file (list_file,&n_list,maxnseq,S); - S=read_seq_in_n_list (list_file, n_list, type,seq_source); - } - else if ( maxnseq!=-1 && S->nseq>maxnseq) - { - fprintf ( stderr, "\nTOO MANY SEQUENCES [N=%d][MAX=%d][FATAL:%s]\n", S->nseq,maxnseq, PROGRAM); - myexit (EXIT_FAILURE); - - } - - - S=seq2template_seq(S, "SELF_S_",F); - /* Get the Templates*/ - if ( n_template_file) - { - fprintf ( le, "\nLooking For Sequence Templates:\n"); - for ( a=0; a< n_template_file; a++) - { - //correct for missing extension modes - if (strm (template_file_list[a],"RNA") && !strstr (extend_mode, "rna"))sprintf ( extend_mode, "rna2"); - - - fprintf ( le, "\n\tTemplate Type: [%s] Mode Or File: [%s] [Start", template_type2type_name(template_file_list[a]), template_file_list[a]); - S=seq2template_seq(S, template_file_list[a], F); - fprintf ( le, "]"); - - if (S==NULL) - { - add_warning (stderr, "\nImpossible to find %s Templates\nCheck that your blast server is properly installed [See documentation][FATAL:%s]\n", template_file_list[a],PROGRAM); - exit (EXIT_FAILURE); - } - } - if (seq2n_X_template ( S, "_*_"))sprintf (S->template_file, "%s",seq2template_file (S, NULL)); - } - else - { - int ptf=0; - for ( a=0; anseq; a++) - { - if ( seq_has_template ( S, a, "_P_"))ptf=1; - } - if (ptf) - { - int j; - sprintf ( S->template_file ,"%s%s.template_file",F->path , F->name); - seq2template_file (S,S->template_file); - display_output_filename ( stdout, "Template_List","fasta_seq", S->template_file, STORE); - } - } - - - if (n_profile_template_file) - { - fprintf ( le, "\nLooking For Profile Templates"); - for ( a=0; a< n_profile_template_file; a++) - { - fprintf ( le, "\n\tTemplate Type: [%s] Mode Or File: [%s] [Start", template_type2type_name(profile_template_file_list[a]), profile_template_file_list[a]); - S=profile_seq2template_seq(S, profile_template_file_list[a], F); - fprintf ( le, "]"); - if (S==NULL) - { - add_warning(stderr, "Impossible to find %s Templates\nCheck that your blast server is properly installed [See documentation][FATAL:%s]\n",profile_template_file_list[a], PROGRAM); - exit (EXIT_FAILURE); - } - } - } - - S=seq2template_type (S); - le=display_sequences_names ( S, le, check_pdb_status, TEMPLATES); - - - - - - - if ( get_type) - { - S=get_sequence_type (S); - fprintf ( stdout , "%s\n", S->type); - free_sequence(S, S->nseq); - return 1; - } - - - /*Reorder the sequences*/ - new_order=duplicate_char (S->name, -1, -1); - if ( strm (inorder, "aligned"))new_order=sort_string_array (new_order, S->nseq); - - initial_order=duplicate_char (S->name, -1, -1); - S=reorder_seq(S,new_order,S->nseq); - free_char (new_order, -1); - - - - /*3 PREPARE THE CONSTRAINT LIST*/ - - CL=declare_constraint_list ( S,NULL, NULL, 0,(strm(mem_mode, "disk"))?tmpfile():NULL, NULL); - sprintf ( CL->method_evaluate_mode, "%s", method_evaluate_mode); - (CL->TC)->use_seqan=use_seqan; - CL->local_stderr=le; - - /*Estimate the distance Matrix*/ - CL->DM=cl2distance_matrix ( CL,NOALN,distance_matrix_mode, distance_matrix_sim_mode,1); - - /*one to all alignment*/ - if (one2all && one2all[0])prepare_one2all (one2all,S, lib_list); - else if ( subset2all) - { - prepare_subset2all (subset2all,S, lib_list,CL); - } - - if ( matrix && matrix[0]) - { - sprintf ( CL->method_matrix,"%s", matrix); - - } - /*Set the filtering*/ - CL->filter_lib=filter_lib; - /*Set the evaluation Functions*/ - CL->profile_mode=get_profile_mode_function (profile_mode, NULL); - sprintf ( CL->profile_comparison, "%s", profile_comparison); - if ( n_struc_to_use) - { - CL->STRUC_LIST=declare_sequence (1,1,n_struc_to_use); - CL->STRUC_LIST->nseq=0; - for ( a=0; a< n_struc_to_use; a++) - { - - sprintf ( (CL->STRUC_LIST)->name[(CL->STRUC_LIST)->nseq++],"%s",struc_to_use[a]); - } - } - sprintf (CL->align_pdb_param_file, "%s", align_pdb_param_file); - sprintf (CL->align_pdb_hasch_mode, "%s", align_pdb_hasch_mode); - - - - /*Blast Parameters*/ - (CL->Prot_Blast)->min_id=prot_min_sim; - (CL->Prot_Blast)->max_id=prot_max_sim; - (CL->Prot_Blast)->min_cov=prot_min_cov; - sprintf ( (CL->Prot_Blast)->blast_server, "%s", prot_blast_server); - sprintf ( (CL->Prot_Blast)->db, "%s", prot_db); - - (CL->Pdb_Blast)->min_id=pdb_min_sim; - - (CL->Pdb_Blast)->max_id=pdb_max_sim; - (CL->Pdb_Blast)->min_cov=pdb_min_cov; - sprintf ( (CL->Pdb_Blast)->blast_server, "%s", pdb_blast_server); - sprintf ( (CL->Pdb_Blast)->db, "%s", pdb_db); - CL->check_pdb_status=check_pdb_status; - /*split parameters */ - CL->split=split; - CL->split_nseq_thres=split_nseq_thres; - CL->split_score_thres=split_score_thres; - /*Blast Parameters - (CL->DNA_Blast)->min_id=dna_min_sim; - (CL->DNA_Blast)->max_id=dna_max_sim; - (CL->DNA_Blast)->min_cov=dna_min_cov; - sprintf ( (CL->DNA_Blast)->blast_server, "%s", dna_blast_server); - sprintf ( (CL->DNA_Blast)->db, "%s", dna_db); - */ - - if ( method_log) - { - if ( strm (method_log, "default")) - { - sprintf ( CL->method_log, "%s%s.method_log",F->path, F->name); - } - else if ( !strm (method_log, "no")) - { - sprintf ( CL->method_log, "%s", method_log); - } - set_string_variable ("method_log", method_log); - } - - - CL->lalign_n_top=lalign_n_top; - sprintf ( CL->multi_thread, "%s", multi_core); - sprintf ( CL->lib_list, "%s", lib_list); - sprintf (CL->rna_lib, "%s", rna_lib); -/* Important: This is where the library is compiled!!!!*/ - - if ((CL->S)->nseq>1 && !do_convert) - { - CL=read_n_constraint_list (list_file,n_list,NULL, mem_mode,weight,type, le, CL, seq_source); - - } - else if ( do_convert && out_lib[0]) - { - if ( infile[0]) - {sprintf (list_file[0], "%s", name2type_name(infile)); - CL=read_n_constraint_list (list_file,1,NULL, mem_mode,weight,type, le, CL, seq_source); - } - else - { - CL=read_n_constraint_list (list_file,n_list,NULL, mem_mode,weight,type, le, CL, seq_source); - } - } - if ( CL->M)clean_aln=0; - - if ( is_number (weight)) - { - int weight_value; - weight_value=atoi(weight); - for (a=0; ane; a++) - { - vwrite_clist(CL, a, WE, weight_value); - } - } - - free_pair_wise ();//Free ststic memory allocated in some of the pairwise functions - - - //Shrink: re-run slow_pair using the library, remove everything - - - /*If the List is empty*/ - if ( (CL->S)->nseq>1 && CL->ne==0 && !CL->M &&!(do_convert && n_list>0)) - { - fprintf ( stderr, "\n******************ERROR*****************************************\n"); - - fprintf ( stderr, "\nYou have not provided any method or enough Sequences[FATAL]"); - fprintf ( stderr, "\nIf you have used the '-in' Flag, ADD the methods you wish to use:"); - fprintf ( stderr, "\n\t-in Mlalign_id_pair Mfast_pair\n"); - fprintf ( stderr, "\nAnd make sure you provide at least TWO sequences\n"); - for ( a=0; a< argc; a++)fprintf ( stderr, "%s ", argv[a]); - fprintf ( stderr, "\n*****************************************************************\n"); - myexit(EXIT_FAILURE); - } - - - CL->normalise=do_normalise; - - if ( type && type[0])sprintf ( (CL->S)->type, "%s", type); - CL->extend_jit=(do_extend>0)?1:0; - - CL->extend_threshold=(do_extend==1)?0:do_extend; - CL->do_self=do_self; - sprintf (CL->extend_clean_mode, "%s", clean_mode); - sprintf (CL->extend_compact_mode, "%s", compact_mode); - if ( CL->extend_jit && CL->extend_threshold !=0)filter_list (CL,0, CL->ne, CL->extend_threshold); - CL->pw_parameters_set=1; - - - - CL->nomatch=nomatch; - set_int_variable ("nomatch", nomatch); - /*Gep and Gop*/ - if ( !gep && !gop && CL->M) - { - CL->gop=get_avg_matrix_mm ( CL->M, (strm3((CL->S)->type,"PROTEIN", "Protein", "protein")?AA_ALPHABET:"gcuta"))*10; - CL->gep=CL->gop/10; - fprintf ( CL->local_stderr, "\nAUTOMATIC PENALTIES: gapopen=%d gapext=%d", CL->gop, CL->gep); - } - else if ( !CL->M && cosmetic_penalty && !gep && !gop) - { - CL->gep=0; - CL->gop=cosmetic_penalty; - } - else - { - CL->gep=gep; - CL->gop=gop; - fprintf ( CL->local_stderr, "\nMANUAL PENALTIES: gapopen=%d gapext=%d", CL->gop, CL->gep); - } - - /*Frame Penalties*/ - CL->f_gep=f_gep; - CL->f_gop=f_gop; - - - CL->maximise=maximise; - - if (strm(retrieve_seq_type(),"DNA")|| strm(retrieve_seq_type(),"RNA") ) - CL->ktup=MAX(2,ktup); - else - CL->ktup=ktup; - - CL->use_fragments=diag_mode; - CL->fasta_step=fasta_step; - CL->diagonal_threshold=diag_threshold; - - sprintf ( CL->matrix_for_aa_group, "%s", sim_matrix); - sprintf ( CL->dp_mode, "%s", dp_mode); - CL->TG_MODE=tg_mode; - - sprintf ( CL->evaluate_mode, "%s", evaluate_mode); - fprintf (le, "\n\n\tLibrary Total Size: [%d]\n", CL->ne); - - - CL=choose_extension_mode (extend_mode, CL); - CL->max_n_pair=max_n_pair; - - processed_lib=0; - if (CL->ne>0 && out_lib[0]!='\0' && !strm (out_lib, "no")) - { - - if (strstr (out_lib_mode, "extended")) - { - char emode[1000]; - - //Do the processing before saving the extended lib*/ - processed_lib=1; - if ( filter_lib) CL=filter_constraint_list (CL,CL->weight_field, filter_lib); - for (a=0; ane, out_lib, NULL, "ascii",CL->S); - } - vfclose (OUT); - CL->local_stderr=display_output_filename (le, "TCLIB","tc_lib_format_01",out_lib, CHECK); - } - - /* - check for the connectivity of the constraint list - if ( !constraint_list_is_connected (CL)) - { - add_warning ( stderr, "ERROR: unconnected sequences in the constraint_list [FATAL:%s]\n", PROGRAM); - myexit (EXIT_FAILURE); - } - */ - if ( lib_only)return EXIT_SUCCESS; - - - if (!processed_lib) - { - if ( filter_lib) CL=filter_constraint_list (CL,CL->weight_field, filter_lib); - for (a=0; adistance_matrix_mode, "%s", distance_matrix_mode); - sprintf ( CL->distance_matrix_sim_mode, "%s", distance_matrix_sim_mode); - - sprintf ( CL->tree_mode, "%s", tree_mode); - //Re-estim�ate the distance matrix with consistency// - if ( strm ("cscore", distance_matrix_mode)) - { - CL->DM=cl2distance_matrix ( CL,NOALN,distance_matrix_mode, distance_matrix_sim_mode,1); - } - /*WEIGHT CONSTRAINT LIST*/ - - if ( !do_convert) - { - - CL->DM=cl2distance_matrix (CL, NOALN, NULL, NULL,0); - - CL=weight_constraint_list(CL, seq_weight); - - if (output_seq_weights (CL->W, outseqweight)) - CL->local_stderr=display_output_filename( CL->local_stderr,"WEIGHT","tc_weight",outseqweight, CHECK); - le=display_weights(CL->W, le); - } - - - - /*Prepare quadruplets*/ - if ( nseq_for_quadruplet && !strm(seq_name_for_quadruplet[0], "all")) - { - CL->nseq_for_quadruplet=nseq_for_quadruplet; - CL->seq_for_quadruplet=vcalloc ((CL->S)->nseq, sizeof (int)); - for (a=0; a< CL->nseq_for_quadruplet; a++) - { - printf ( "\nquad: %s", seq_name_for_quadruplet[a]); - if ( (b=name_is_in_list (seq_name_for_quadruplet[a],(CL->S)->name,(CL->S)->nseq, 100))!=-1)CL->seq_for_quadruplet[b]=1; - else add_warning ( stderr, "\nWARNING: Sequence %s is not in the set and cannot be used for quadruplet extension\n",seq_name_for_quadruplet[a]); - } - } - else if ( nseq_for_quadruplet && strm(seq_name_for_quadruplet[0], "all")) - { - - CL->nseq_for_quadruplet=(CL->S)->nseq; - CL->seq_for_quadruplet=vcalloc ((CL->S)->nseq, sizeof (int)); - for (a=0; a< CL->nseq_for_quadruplet; a++) - { - CL->seq_for_quadruplet[a]=1; - } - } - -/*******************************************************************************************************/ -/* */ -/* Prepare The Alignment */ -/* */ -/*******************************************************************************************************/ - - if ( do_align ) - { - A=seq2aln ((CL->S),NULL,1); - ungap_array(A->seq_al,A->nseq); - - /*Chose the right Mode for evaluating Columns*/ - - if ( A->nseq==1); - else if ( strm ( msa_mode, "seq_aln")) - { - A=seq_aln (A,(CL->S)->nseq, CL); - } - else if ( strm ( msa_mode, "sorted_aln")) - { - A=sorted_aln (A, CL); - } - else if ( strm ( msa_mode, "full_sorted_aln")) - { - full_sorted_aln (A, CL); - output_constraints (out_lib, "sim", A); - CL->local_stderr=display_output_filename (le, "TCLIB","tc_lib_format_01",out_lib, CHECK); - return EXIT_SUCCESS; - } - - else if ( strm ( msa_mode, "profile_aln")) - { - A=iterative_tree_aln (A, 0, CL); - A=profile_aln (A, CL); - } - else if ( strm ( msa_mode, "iterative_aln")) - { - A=iterative_tree_aln (A, 0, CL); - A=iterative_aln (A,10, CL); - } - else if ( strm ( msa_mode, "iterative_tree_aln")) - { - A=iterative_tree_aln (A,1, CL); - } - else if ( strm ( msa_mode, "dpa_aln")) - { - A=dpa_aln (A, CL); - } - else if ( strm ( msa_mode, "new_dpa_aln")) - { - A=new_dpa_aln (A, CL); - } - else if ( strm ( msa_mode, "delayed_tree_aln")) - { - A=make_delayed_tree_aln (A,2, CL); - } - else if ( strm ( msa_mode, "groups")) - { - A=seq2aln_group (A,dpa_maxnseq, CL); - out_aln_format[0]="conc_aln"; - n_out_aln_format=1; - } - else if ( strm ( msa_mode, "upgma")) - { - A=upgma_tree_aln (A, A->nseq, CL); - } - else if ( strm ( msa_mode, "graph")) - { - fprintf ( stderr, "\nDO GRAPH ALIGNMENT"); - A=graph_aln ( A, CL, (CL->S)); - } - else if ( strm ( msa_mode, "tsp")) - { - fprintf ( stderr, "\nDO TSP ALIGNMENT"); - A=tsp_aln ( A, CL, (CL->S)); - } - else if ( strm ( msa_mode, "precomputed")) - { - if (infile[0]) {free_aln (A);A=main_read_aln ( infile, declare_aln(CL->S));} - else{fprintf ( stderr, "\nERROR: distance_matrix_mode=aln requires an aln passed via the -infile flag [FATAL:%s]", PROGRAM);crash ("");} - - sprintf ( CL->dp_mode, "precomputed_pair_wise"); - sprintf ( CL->distance_matrix_mode, "aln"); - CL->tree_aln=A=reorder_aln ( A, (CL->S)->name,(CL->S)->nseq); - - pc=tree_file; - if ( strm (tree_file, "default") || !check_file_exists (tree_file)) - T=make_tree ( A,CL,gop, gep,(CL->S),pc, maximise); - else if ( strm (tree_file, "no")) - T=make_tree ( A,CL,gop, gep,(CL->S),NULL, maximise); - else - { - T=read_tree (pc,&tot_node,(CL->S)->nseq, (CL->S)->name); - } - - SNL=tree_aln ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL); - } - else if ( strm ( msa_mode, "tree")) - { - if ( strm (CL->distance_matrix_mode, "aln")) - { - if (infile[0]) {free_aln (A);A=main_read_aln ( infile, declare_aln(CL->S));} - else{fprintf ( stderr, "\nERROR: distance_matrix_mode=aln requires an aln passed via the -infile flag [FATAL:%s]", PROGRAM);crash ("");} - CL->tree_aln=A; - } - pc=tree_file; - if ( strm (tree_file, "default") || !check_file_exists (tree_file)) - T=make_tree ( A,CL,gop, gep,(CL->S),pc,maximise); - else if ( strm (tree_file, "no")) - T=make_tree ( A,CL,gop, gep,(CL->S),NULL, maximise); - else - { - fprintf ( le, "\nREAD PRECOMPUTED TREE: %s\n", pc); - T=read_tree (pc,&tot_node,(CL->S)->nseq, (CL->S)->name); - } - SNL=tree_aln ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL); - A->nseq=(CL->S)->nseq; - } - - else - { - fprintf ( stderr, "\nERROR: msa_mode %s is unknown [%s:FATAL]\n", msa_mode, PROGRAM); - crash (""); - } - - } - else if ( (do_evaluate || do_convert)) - { - - - A=(infile[0])?main_read_aln ( infile, declare_aln(CL->S)):NULL; - - if (!A)A=seq2aln((CL->S), NULL,0); - - - A->S=CL->S; - A->nseq=(CL->S)->nseq; - - - } - - - else if (do_domain) - { - CL->moca=vcalloc ( 1, sizeof ( Moca)); - if (strm ( "cfasta_pair_wise", dp_mode))sprintf (CL->dp_mode, "%s","domain_pair_wise"); - (CL->moca)->moca_start=domain_start; - (CL->moca)->moca_len =domain_len; - (CL->moca)->moca_scale=(domain_scale==0)?-(CL->normalise/20):domain_scale; - (CL->moca)->moca_interactive=domain_interactive; - - - - if (!cosmetic_penalty && !gep && !gop) - { - CL->gop=-200; - CL->gep=-100; - } - - CL=prepare_cl_for_moca (CL); - aln_list=moca_aln (CL); - free_int ( CL->packed_seq_lu, -1); - CL->packed_seq_lu=NULL; - - a=0; - while ( aln_list[a]) - { - for ( b=0; b< n_out_aln_format; b++) - { - - output_format_aln (out_aln_format[b],aln_list[a],EA=fast_coffee_evaluate_output(aln_list[a], CL), tot_out_aln[b]); - le=display_output_filename( le,"MSA",out_aln_format[b], tot_out_aln[b], CHECK); - } - a++; - } - return EXIT_SUCCESS; - } - else if ( do_extended_matrix) - { - A=seq2aln(CL->S, NULL, 1); - A->CL=CL; - for ( a=0; a< n_out_aln_format; a++) - { - output_format_aln (out_aln_format[a],A,EA, tot_out_aln[a]); - le=display_output_filename( le,"MSA",out_aln_format[a], tot_out_aln[a], CHECK); - } - - return EXIT_SUCCESS; - } - - -/*******************************************************************************************************/ -/* */ -/* PREPARE THE ALIGNMENT FOR OUTPUT */ -/* */ -/*******************************************************************************************************/ - - if (A) - { - /* - for ( a=0; a< A->nseq; a++) - { - for ( b=0; b< A->len_aln ; b++) - if ( A->seq_al[a][b]=='O' || A->seq_al[a][b]=='o')A->seq_al[a][b]='-'; - } - */ - - - - - if ( check_file_exists(outorder)) - { - Sequence *OS; - OS=get_fasta_sequence (outorder, NULL); - if ( prf_in_seq (CL->S))A->expanded_order=OS->name; - else A=reorder_aln ( A,OS->name,A->nseq); - } - else if ( strm(outorder, "aligned") && T) - { - A=reorder_aln ( A,A->tree_order,A->nseq); - - } - else - { - - A=reorder_aln ( A, (CL->S)->name,(CL->S)->nseq); - A=reorder_aln ( A, initial_order,(CL->S)->nseq); - - } - - A->output_res_num=strm3 ( output_res_num, "on", "On", "ON"); - - if ( strm2 (residue_case, "keep", "retain"))A->residue_case=KEEP_CASE; - else if (strm3 (residue_case, "upper", "Upper", "UPPER"))A->residue_case=UPPER_CASE; - else if (strm3 (residue_case, "lower", "Lower", "LOWER"))A->residue_case=LOWER_CASE; - else A->residue_case=1; - - - - - - - if ( iterate) - { - A=iterate_aln (A, iterate, CL); - A=ungap_aln(A); - } - - if ( clean_aln) - { - EA=main_coffee_evaluate_output(A, CL,clean_evaluate_mode); - A=clean_maln(A, EA,clean_threshold,clean_iteration); - free_aln (EA); - A=ungap_aln(A); - } - - //overaln - if (clean_overaln) - { - char *over_aln_tmp; - over_aln_tmp=vtmpnam(NULL); - output_format_aln ("overaln", A, NULL, over_aln_tmp); - A=main_read_aln (over_aln_tmp,A); - } - - EA=main_coffee_evaluate_output(A, CL, evaluate_mode); - - //correct ascii file - if (clean_overaln) - { - EA=overlay_alignment_evaluation (A,EA); - } - - - if (A->A)A=A->A; - if (!strm2(out_aln, "stdout", "stderr") && le==stderr && !do_convert)output_format_aln ("aln",A,NULL,"stdout"); - - - A->CL=CL; - for ( a=0; a< n_out_aln_format; a++) - if ( !strstr ( out_aln_format[a], "expand"))output_format_aln (out_aln_format[a],A,EA, tot_out_aln[a]); - for ( a=0; a< n_out_aln_format; a++) - if ( strstr (out_aln_format[a], "expand"))output_format_aln (out_aln_format[a],A,EA, tot_out_aln[a]); - - - - fprintf (le, "\n\nOUTPUT RESULTS"); - le=display_output_filename (le, "GUIDE_TREE","newick", tree_file, CHECK); - - for ( a=0; a< n_out_aln_format; a++) - le=display_output_filename( le,"MSA",out_aln_format[a], tot_out_aln[a], CHECK); - - if (!strm (ph_tree_file, "NO") && A->nseq>2) - { - NT_node T; - FILE *tfp; - char **tmode; - tmode=declare_char (2, 100); - sprintf (tmode[0], "nj"); - T=tree_compute (A, 1, tmode); - tfp=vfopen (ph_tree_file, "w"); - tfp=print_tree (T, "newick", tfp); - vfclose (tfp); - le=display_output_filename (le, "PHYLOGENIC_TREE","newick", ph_tree_file, CHECK); - } - - } - - if (split) - { - - if (trim && n_seq_to_keep) - { - if (n_seq_to_keep==1 && check_file_exists (seq_to_keep[0])) - { - - SEQ_TO_KEEP=read_sequences (seq_to_keep[0]); - } - else - { - - SEQ_TO_KEEP=declare_sequence ( 1, 1,n_seq_to_keep); - for ( a=0; a< n_seq_to_keep; a++)sprintf ( SEQ_TO_KEEP->name[a], "%s", seq_to_keep[a]); - } - } - - sprintf ( CL->dp_mode, "precomputed_pair_wise"); - sprintf ( CL->distance_matrix_mode, "aln"); - - - - CL->tree_aln=A=reorder_aln ( A, (CL->S)->name,(CL->S)->nseq); - CL->S=aln2seq ( A); - - if (!T) - { - - pc=tree_file; - if ( strm (tree_file, "default") || !check_file_exists (tree_file)) - T=make_tree ( A,CL,gop, gep,(CL->S),pc, maximise); - else if ( strm (tree_file, "no")) - T=make_tree ( A,CL,gop, gep,(CL->S),NULL, maximise); - else - { - T=read_tree (pc,&tot_node,(CL->S)->nseq, (CL->S)->name); - } - } - - SNL=tree_aln ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL); - - - for ( a=0, b=0; anseq; a++)b+=(SNL[a])?1:0; - fprintf ( le, "\n\nSPLIT DATASET: %d Groups\n", b); - /*Display Group Names*/ - - if ( trim && SEQ_TO_KEEP) - { - for ( a=0; a< SEQ_TO_KEEP->nseq; a++) - { - - trim_subS=extract_one_seq(SEQ_TO_KEEP->name[a],0,0,A,KEEP_NAME); - trim_S=add_sequence (trim_subS,trim_S,0); - } - } - for ( a=0, b=0; anseq; a++) - { - - if ( SNL[a]) - { - b++; - fprintf ( le, "\n\tSPLIT_GROUP %d ; Nseq %d ; Score %d ; List ",b, (SNL[a])->nseq, (int)(SNL[a])->score); - for ( c=0; c< (SNL[a])->nseq; c++) - { - fprintf ( le, "%s ",(CL->S)->name[(SNL[a])->lseq[c]]); - } - - SPLIT_ALN=extract_sub_aln (A, (SNL[a])->nseq,(SNL[a])->lseq); - SPLIT_ALN->S=A->S; - ungap_aln (SPLIT_ALN); - - if (!trim) - { - sprintf ( split_format, "%s", "clustalw"); - sprintf ( split_name, "%s.split.%d.%s", F->name, b,split_format); - fprintf ( le, " ; File %s", split_name); - output_format_aln (split_format,SPLIT_ALN,NULL,split_name); - le=display_output_filename( le,"SPLIT_SEQ",split_format,split_name, CHECK); - } - else if (trim) - { - t=aln2most_similar_sequence(SPLIT_ALN, "idmat"); - trim_subS=extract_one_seq(SPLIT_ALN->name[t],0,0,SPLIT_ALN,KEEP_NAME); - trim_S=add_sequence (trim_subS,trim_S,0); - fprintf ( le, "\n\tTRIM_SEQ: Kept sequence %s",SPLIT_ALN->name[t]); - } - free_aln (SPLIT_ALN); - fprintf (le, "\n"); - } - } - - if (trim) - { - - - SPLIT_ALN=seq2aln (trim_S,NULL, KEEP_GAP); - ungap_aln (SPLIT_ALN); - sprintf ( trim_format, "%s", "fasta_aln"); - if ( strm (trimfile, "default"))sprintf ( trimfile, "%s.trim.%s", F->name,trim_format); - - output_format_aln (trim_format,SPLIT_ALN,NULL,trimfile); - le=display_output_filename( le,"TRIM_SEQ",trim_format,trimfile, CHECK); - } - } - - if (remove_template_file){S=vremove_seq_template_files(S);} - else - { - S=display_seq_template_files (S); - } - - //fLUSH OUT THE NAME OF ALL THE FILES THAT HAVE BEEN PRODUCED - le=display_output_filename (le, NULL, NULL, NULL, FLUSH); - - - fprintf (le, "\n\n"); - - free_char (list_file, -1); - free_Alignment (A); - free_Alignment (EA); - - - S=free_constraint_list (CL); - free_sequence (S, S->nseq); - - - - vremove ( "core"); - - vfree_all(); - - le=t_coffee_tip (le, tip); - le=print_command_line ( le); - le=print_mem_usage (le, PROGRAM); - le=print_cpu_usage(le, PROGRAM); - le=print_program_information (le, NULL); - - - if (full_log && full_log[0])log_function(full_log); - - return EXIT_SUCCESS; - } - -/*Specialized set of Parameters*/ -char *get_defaults (char *buf, char *type) -{ - return NULL; -} -char *get_precomputed_defaults(char *buf, char *type) - { - - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - buf=strcat (buf," -msa_mode=precomputed "); - buf=strcat (buf," -seq_weight=no "); - buf=strcat (buf," -evaluate_mode no "); - buf=strcat (buf," -in Xpam250mt "); - return buf; - } -char *get_evaluate_defaults(char *buf, char *type) - { - - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - buf=strcat (buf," -quiet=stdout "); - /*buf=strcat (buf," -seq_weight=no ");*/ - buf=strcat (buf," -output score_ascii html "); - buf=strcat (buf," -iterate 0 "); - - buf=strcat (buf," -evaluate "); - - - - return buf; - } -char *get_genome_defaults(char *buf, char *type) - { - - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - buf=strcat (buf," -seq_weight=no "); - buf=strcat (buf," -dp_mode sim_pair_wise_lalign "); - buf=strcat (buf," -output glalign "); - buf=strcat (buf," -iterate 0 "); - buf=strcat (buf," -distance_matrix_mode ktup "); - buf=strcat (buf," -evaluate_mode t_coffee_slow "); - buf=strcat (buf," -gapopen 100 -gapext 20 -nomatch 30 "); - buf=strcat (buf," -clean_aln 0 "); - buf=strcat (buf,"-output clustalw,score_ascii "); - - - return buf; - } -char *get_dali_defaults(char *buf, char *type) - { - - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - buf=strcat (buf,"-cosmetic_penalty=-50 "); - buf=strcat (buf,"-distance_matrix_mode=slow "); - buf=strcat (buf,"-output clustalw,score_ascii "); - buf=strcat (buf,"-evaluate_mode=non_extended_t_coffee "); - buf=strcat (buf,"-clean_aln 0 "); - - return buf; - } - -char *get_very_fast_defaults(char *buf, char *type) - { - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - buf=strcat (buf,"-in Xblosum62mt "); - buf=strcat (buf,"-distance_matrix_mode ktup "); - buf=strcat (buf,"-maxnseq 10000 "); - buf=strcat (buf,"-dpa_maxnseq 0 "); - buf=strcat (buf,"-dp_mode fasta_pair_wise "); - buf=strcat (buf,"-extend_mode matrix "); - buf=strcat (buf,"-gapopen -10 "); - buf=strcat (buf,"-gapext -1 "); - buf=strcat (buf,"-iterate 0 "); - /*buf=strcat (buf,"-in ");*/ - - return buf; - } - -char *get_low_memory_defaults(char *buf, char *type) - { - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - if (NO_METHODS_IN_CL)buf=strcat (buf,"-distance_matrix_mode=idscore -method lalign_id_pair slow_pair -dp_mode=linked_pair_wise "); - else buf=strcat (buf,"-distance_matrix_mode=idscore -dp_mode=linked_pair_wise "); - return buf; - } -char *get_dna_defaults(char *buf, char *type) -{ - - return buf; -} -char *get_cdna_defaults(char *buf, char *type) -{ - buf=strcat (buf,"-distance_matrix_mode=idscore -dp_mode=fasta_cdna_pair_wise "); - return buf; -} -char *get_3dcoffee_defaults(char *buf, char *type) - { - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - buf=strcat (buf,"-in Msap_pair -template_file SELF_P_ "); - /*buf=strcat (buf,"-in ");*/ - - return buf; - } -char *get_expresso_defaults(char *buf, char *type) - { - - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - buf=strcat (buf,"-in Msap_pair -template_file EXPRESSO"); - - /*buf=strcat (buf,"-in ");*/ - - return buf; - } -char *get_psicoffee_defaults(char *buf, char *type) - { - - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - buf=strcat (buf,"-in Mproba_pair -template_file BLAST "); - /*buf=strcat (buf,"-in ");*/ - - return buf; - } -char *get_accurate_defaults ( char *buf, char *type) -{ - - if ( strm (type, "PROTEIN")) return get_accurate4PROTEIN_defaults(buf, type); - else if ( strm (type, "DNA")) return get_accurate4DNA_defaults(buf, type); - else if ( strm (type, "RNA")) return get_accurate4RNA_defaults(buf, type); - else return get_defaults(buf, type); -} -char *get_accurate4PROTEIN_defaults(char *buf, char *type) - { - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - if (NO_METHODS_IN_CL)buf=strcat (buf,"-in Mbest_pair4prot -template_file BLAST -template_file EXPRESSO "); - else buf=strcat (buf,"-template_file BLAST -template_file EXPRESSO "); - buf=strcat (buf,"-output aln, expanded_fasta_aln "); - - return buf; - } - - - - -char *get_accurate4DNA_defaults(char *buf, char *type) -{ - return get_low_memory_defaults (buf,type); -} -char *get_accurate4RNA_defaults(char *buf, char *type) -{ - return get_rcoffee_defaults (buf,type); -} -char *get_t_coffee_defaults(char *buf, char *type) -{ - return buf; -} -char *get_fmcoffee_defaults(char *buf, char *type) - { - //Fast Mcoffee - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - if (NO_METHODS_IN_CL) buf=strcat (buf,"-in Mclustalw2_msa Mmuscle_msa Mmafft_msa -multi_core methods_relax_msa"); - - /*buf=strcat (buf,"-in ");*/ - - return buf; - } - -char *get_mcoffee_defaults(char *buf, char *type) - { - - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - - if (NO_METHODS_IN_CL) buf=strcat (buf,"-in Mclustalw2_msa Mt_coffee_msa Mpoa_msa Mmuscle_msa Mmafft_msa Mdialignt_msa Mpcma_msa Mprobcons_msa -multi_core methods_relax_msa "); - /*buf=strcat (buf,"-in ");*/ - return buf; - } -char *get_dmcoffee_defaults(char *buf, char *type) - { - - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - if (NO_METHODS_IN_CL)buf=strcat (buf,"-in Mkalign_msa Mt_coffee_msa Mpoa_msa Mmuscle_msa Mmafft_msa Mdialignt_msa Mprobcons_msa Mamap_msa -multi_core methods_relax_msa"); - /*buf=strcat (buf,"-in ");*/ - - return buf; - } -char *get_rcoffee_consan_defaults(char *buf, char *type) - { - - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - check_program_is_installed (RNAPLFOLD_4_TCOFFEE,NULL, NULL,RNAPLFOLD_ADDRESS, INSTALL_OR_DIE); - if (NO_METHODS_IN_CL)buf=strcat (buf,"-in Mconsan_pair -dp_mode myers_miller_pair_wise -extend_mode rna2 -template_file RCOFFEE -transform dna2rna -type DNA -relax_lib 0"); - else buf=strcat (buf,"-dp_mode myers_miller_pair_wise -extend_mode rna2 -template_file RCOFFEE -transform dna2rna -type DNA -relax_lib 0"); - /*buf=strcat (buf,"-in ");*/ - - return buf; - } -char *get_rmcoffee_defaults(char *buf, char *type) - { - - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - check_program_is_installed (RNAPLFOLD_4_TCOFFEE,NULL, NULL,RNAPLFOLD_ADDRESS, INSTALL_OR_DIE); - if (NO_METHODS_IN_CL)buf=strcat (buf,"-in Mprobcons_msa Mmafft_msa Mmuscle_msa -extend_mode rna2 -template_file RCOFFEE -transform dna2rna -check_type -type DNA -relax_lib 0"); - else buf=strcat (buf,"-extend_mode rna2 -template_file RCOFFEE -transform dna2rna -check_type -type DNA -relax_lib 0"); - /*buf=strcat (buf,"-in ");*/ - - return buf; - } -char *get_rmcoffee_defaults_old(char *buf, char *type) - { - - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - check_program_is_installed (RNAPLFOLD_4_TCOFFEE,NULL, NULL,RNAPLFOLD_ADDRESS, INSTALL_OR_DIE); - buf=strcat (buf,"-in Mprobcons_msa Mmafft_msa Mmuscle_msa -dp_mode myers_miller_pair_wise -extend_mode rna2 -template_file RCOFFEE -transform dna2rna -check_type -type DNA -relax_lib 0"); - /*buf=strcat (buf,"-in ");*/ - - return buf; - } - -// if (NO_METHODS_IN_CL)buf=strcat (buf,"-in Mbest_pair4prot -template_file BLAST -template_file EXPRESSO "); - char *get_best4RNA_defaults(char *buf, char *type) - { - - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - check_program_is_installed (RNAPLFOLD_4_TCOFFEE,NULL, NULL,RNAPLFOLD_ADDRESS, INSTALL_OR_DIE); - buf=strcat (buf," -extend_mode rna2 -template_file PDB,RNA -in Mbest_pair4rna"); - /*buf=strcat (buf,"-in ");*/ - - return buf; - } - -char *get_rcoffee_defaults(char *buf, char *type) - { - - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - check_program_is_installed (RNAPLFOLD_4_TCOFFEE,NULL, NULL,RNAPLFOLD_ADDRESS, INSTALL_OR_DIE); - buf=strcat (buf," -extend_mode rna2 -template_file RCOFFEE -transform dna2rna -check_type -type DNA -relax_lib 0"); - /*buf=strcat (buf,"-in ");*/ - - return buf; - } -char *get_rcoffee_defaults_old(char *buf, char *type) - { - - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - check_program_is_installed (RNAPLFOLD_4_TCOFFEE,NULL, NULL,RNAPLFOLD_ADDRESS, INSTALL_OR_DIE); - buf=strcat (buf,"-dp_mode myers_miller_pair_wise -extend_mode rna2 -template_file RCOFFEE -transform dna2rna -check_type -type DNA -relax_lib 0"); - /*buf=strcat (buf,"-in ");*/ - - return buf; - } - -char *get_repeat_defaults(char *buf, char *type) - { - - if (buf==NULL)buf=vcalloc (1000, sizeof (char)); - - buf=strcat (buf,"-in slow_pair -matrix idmat -out_lib -profile_comparison profile -profile_mode channel_profile_profile -dp_mode myers_miller_pair_wise "); - /*buf=strcat (buf,"-in ");*/ - return buf; - } - - -int check_configuration4program() - { - return 1; - - } - -/*Chose the right Mode for comparing residues*/ - -void test () -{ - char command[1000]; - char *c2; - - c2=vcalloc ( 100, sizeof (char)); - - sprintf (command, "cat hmgt_mouseVsnrl3d.blast_result |blast_aln2fasta_aln.pl | fasta_aln2fasta_aln_unique_name.pl > my_test"); - - fprintf ( stderr, "C1: %d, C2:%d", is_dynamic_memory (c2), is_dynamic_memory (c2)); - - - myexit (0); -} - -int run_other_pg ( int argc, char *argv[]) -{ - //make minimum initialization - get_t_coffee_environement (NULL); - if ( strm (argv[0], "seq_reformat") || strm (argv[0], "saltt")) - { - return seq_reformat (argc, argv); - } - else if ( strm (argv[0], "aln_compare")) - { - return aln_compare (argc, argv); - } - else if ( strm (argv[0], "analyse_pdb") || strm (argv[0], "apdb") || strm (argv[0], "irmsd") || strm (argv[0], "trmsd")) - { - return apdb ( argc, argv); - } - else if ( strm (argv[0], "quantile")) - { - return quantile ( argc, argv); - } - else if ( strstr ( argv[0], "unpack_")) - { - unpack_all_perl_script (argv[0]+strlen ("unpack_")); - } - else if ( strstr ( argv[0], "fastal")) - { - return fastal(argc, argv); - } - else - { - return my_system_cl (argc, argv); - } - return EXIT_FAILURE; -} - -FILE * t_coffee_tip (FILE *fp,char *mode) -{ - static char **tip; - static int n; - int a; - if ( !tip) - { - tip=declare_char ( 100, 300); - sprintf ( tip[n++],"Get the most accurate protein alignments with: t_coffee -special_mode accurate [Slow]\n"); - sprintf ( tip[n++],"Change the Width of your MSA with the environement variable ALN_LINE_LENGTH (all formats)"); - sprintf ( tip[n++],"Align 2 or more profiles with -profiles= aln1, aln2"); - sprintf ( tip[n++],"-special_mode=expresso to fetch your structures automatically"); - sprintf ( tip[n++],"-special_mode=psicoffee to expand your sequences"); - sprintf ( tip[n++],"-special_mode=accurate The best we can do [slow]"); - - sprintf ( tip[n++],"-special_mode=3dcoffee to combine sequences and structures"); - sprintf ( tip[n++],"-special_mode=mcoffee to combine alternative msa methods"); - sprintf ( tip[n++],"-special_mode=dmcoffee to combine alternative msa methods on debian"); - - sprintf ( tip[n++],"-usetree= to use your own guide tree"); - sprintf ( tip[n++],"-infile= -special_mode=evaluate to evaluate your own alignment"); - sprintf ( tip[n++],"-other_pg seq_reformat to access seq_reformat"); - sprintf ( tip[n++],"-other_pg extract_from_pdb to use our pdb retriever"); - sprintf ( tip[n++],"All the latest versions on www.tcoffee.org"); - sprintf ( tip[n++],"-version to check for updates"); - sprintf ( tip[n++],"-output=html will produce a colored output"); - sprintf ( tip[n++],"-outorder=aligned will order the sequences according to the guide tree in newick"); - sprintf ( tip[n++],"-special_mode=quickaln will produce fast/low accuracy alignments"); - sprintf ( tip[n++],"-other_pg seq_reformat -in -action +trim %%50 Will reduce the redundancy of your MSA"); - sprintf ( tip[n++],"-tip=all to see all the tips, tip=no will prevent them all"); - sprintf ( tip[n++],"-other_pg unpack_all will unpack all the perl scripts within t_coffee"); - } - - if ( strm (mode, "none"))return fp; - - fprintf ( fp, "\n# TIP :See The Full Documentation on www.tcoffee.org\n"); - - if (strm ( mode, "all")) - { - for ( a=0; a< n; a++) - { - fprintf (fp, "# TIP %d: %s\n", a+1,tip[a]); - } - } - else if ( strm ( mode, "one")) - { - int b; - vsrand(0); - b=(rand()%(n-1))+1; - fprintf (fp, "# TIP %2s: %s","1", tip[0]); - fprintf (fp, "# TIP %2d: %s\n", b+1, tip[b]); - - } - - fprintf ( fp, "\n"); - return fp; -} - -char* prepare_one2all (char *seq,Sequence *S, char *lib_file) - { - int a, n, i; - FILE *fp; - char **name, *use_tree; - - - if ( S->nseq==2) return NULL; - - - if ((i=name_is_in_list (seq,S->name,S->nseq, 100))!=-1); - else if ( is_number (seq)) - i=atoi(seq)-1; - else - return NULL; - - - declare_name (use_tree); - - - name=declare_char (S->nseq+1, 100); - for (a=0; anseq; a++) - sprintf (name[a], "%s", S->name[a]); - n=S->nseq; - - if (i!=0) - { - sprintf (name[n], "%s", name[i]); - sprintf (name[i], "%s", name[0]); - sprintf (name[0], "%s", name[n]); - } - sprintf (lib_file, "%s", vtmpnam (NULL)); - fp=vfopen (lib_file, "w"); - for ( a=1; anseq==2) return NULL; - name=declare_char (S->nseq+1, 100); - done=declare_int (S->nseq, S->nseq); - for (a=0; anseq; a++)done[a][a]=1; - - if ( check_file_exists (mode)) - { - Sequence *L; - L=main_read_seq (mode); - for (a=0; a< L->nseq; a++) - if ( (b=name_is_in_list (L->name[a], S->name,S->nseq, 100))!=-1) - { - sprintf ( name[nseq++], "%s", L->name[a]); - } - } - else if ( strm (mode, "_P_")) - { - for (a=0; anseq; a++) - { - if (seq_has_template (S, a, "_P_")) - { - sprintf (name[nseq++], "%s", name [a]); - } - } - } - else if ( is_number (mode)) - { - Sequence *LS; - - nseq=atoi (mode); - if ( nseq<0) - nseq=((float)S->nseq*((float)nseq/(float)100.0)*(float)-1); - - nseq=MIN(nseq,S->nseq); - if ( nseq>=S->nseq)LS=S; - else - { - Alignment *A, *SA; - char tmode[1000]; - A=very_fast_aln (seq2aln (S, NULL, RM_GAP), 0, NULL); - sprintf (tmode, "_aln_n%d", nseq); - SA=simple_trimseq (A, NULL, tmode, NULL); - LS=aln2seq(SA); - free_aln (A); - free_aln (SA); - } - for (a=0; anseq; a++) - { - sprintf (name[a], "%s", LS->name[a]); - fprintf ( stderr, "\n\tMaster Sequence: %s", name[a]); - } - if (LS!=S)free_sequence (LS, LS->nseq); - } - else - { - printf_exit (EXIT_FAILURE, stderr, "ERROR: %s is neither a file nor a method nor a number for subset2all [FATAL:%s]\n",mode,PROGRAM); - } - - sprintf (lib_file, "%s", vtmpnam (NULL)); - fp=vfopen (lib_file, "w"); - for (a=0; anseq; b++) - { - if(!done[a][b] && !strm (name[a],S->name[b]))fprintf ( fp, "2 %s %s\n", name[a],S->name[b]); - done[a][b]=done[b][a]=1; - } - } - vfclose (fp); - - return NULL; - } -int set_methods_limits (char ** method,int nl,char **list, int n, int *maxnseq, int *maxlen) -{ - int a,ns, ml, nm=0; - char string[1000]; - - nl/=3; - for (a=0; ans))maxnseq[0]=ns; - if (ml!=-1 && (maxlen[0]==-1 || maxlen[0]>ml))maxlen[0]=ml; - nm++; - } - } - return nm; -} - - - -char * get_seq_type_from_cl (int argc, char **argv) -{ - char *buf, *r; - char file[100]; - int a; - int seq=0; - - sprintf (file, "%d.tmp", rand()%10000); - buf=vcalloc ( 1000, sizeof (char)); - sprintf ( buf, "%s ", get_string_variable ("t_coffee")); - for (a=1, seq=0; a"); - buf=strcat (buf, file); - my_system ( buf); - - r=file2string (file); - vremove (file); - return r; -} -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/t_coffee.h b/binaries/src/tcoffee/t_coffee_source/t_coffee.h deleted file mode 100644 index 261a386..0000000 --- a/binaries/src/tcoffee/t_coffee_source/t_coffee.h +++ /dev/null @@ -1,31 +0,0 @@ -int run_default (char *pg,char *filename); -//int check_configuration4program(); -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/tree_util.c b/binaries/src/tcoffee/t_coffee_source/tree_util.c deleted file mode 100644 index df50107..0000000 --- a/binaries/src/tcoffee/t_coffee_source/tree_util.c +++ /dev/null @@ -1,5231 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "dp_lib_header.h" -#include "define_header.h" - -#define TOPOLOGY 1 -#define WEIGHTED 2 -#define LENGTH 3 -#define RECODE 4 - -int distance_tree; -int rooted_tree; -int tot_nseq; -static NT_node compute_fj_tree (NT_node T, Alignment *A, int limit, char *mode); -static NT_node compute_cw_tree (Alignment *A); -static NT_node compute_std_tree (Alignment *A, int n, char **arg); -static NT_node tree2fj_tree (NT_node T); -int tree_contains_duplicates (NT_node T); -int display_tree_duplicates (NT_node T); - -static int compare_node1 ( int *b1, int *b2, int n); -static int compare_node2 ( int *b1, int *b2, int n); -static int find_seq_chain (Alignment *A, int **sim,int *used,int seq0,int seq1, int seq2,int chain_length, int limit, int max_chain, int *nseq); -int new_display_tree (NT_node T, int n); -NT_node display_code (NT_node T, int nseq, FILE *fp); -NT_node display_dist (NT_node T, int n, FILE *fp); -/*********************************************************************/ -/* */ -/* dpa_tree_manipulation */ -/* */ -/*********************************************************************/ -static NT_node code_dpa_tree ( NT_node T, int **D); -NT_node collapse_sub_tree ( NT_node T,int nseq, int *list, char *new_name); -NT_node seq2dpa_tree (Sequence *S, char *mode) -{ - Constraint_list *CL; - NT_node **T; - NT_node Tree; - CL=declare_constraint_list_simple (S); - CL->local_stderr=NULL; - - - CL->DM=cl2distance_matrix (CL,NOALN,(mode==NULL)?"ktup":mode, NULL, 0); - - T=int_dist2nj_tree ( (CL->DM)->similarity_matrix, S->name, S->nseq, vtmpnam (NULL)); - Tree=T[3][0]; - - Tree=recode_tree (Tree, S); - Tree=reset_dist_tree (Tree, -1); - - Tree=code_dpa_tree (Tree, (CL->DM)->similarity_matrix); - free_distance_matrix (CL->DM); - return Tree; -} - -NT_node tree2dpa_tree (NT_node T, Alignment *A, char *mode) -{ - /*This Function sets the branches with Length values used by DP*/ - /*The tree must be rooted*/ - Sequence *S; - int **D; - - S=aln2seq (A); - T=recode_tree (T, S); - T=reset_dist_tree (T, -1); - D=get_sim_aln_array (A,mode); - - T=code_dpa_tree (T, D); - return T; -} - -NT_node code_dpa_tree ( NT_node T, int **D) -{ - if ( !T) return T; - else if ( T->leaf==1) - { - T->dist=100; - return T; - } - else - { - int nl, *ll; - int nr, *lr; - int a, b, min=100; - float tot, n=0; - - nl=(T->left)->nseq;ll=(T->left)->lseq; - nr=(T->right)->nseq;lr=(T->right)->lseq; - - for (tot=0,n=0, a=0; a< nl; a++) - for ( b=0; b< nr; b++, n++) - { - tot+=D[ll[a]][lr[b]]; - min=MIN(min,(D[ll[a]][lr[b]])); - } - /* T->dist=(mode==AVERAGE)?(tot/n):min:;*/ - T->dist=(n>0)?tot/n:0; - T->dist=min; - code_dpa_tree ( T->right, D); - code_dpa_tree ( T->left, D); - return T; - } -} -static int group_number; -char *tree2Ngroup (Alignment *A, NT_node T, int max_n, char *fname, char *mat) -{ - double top, bot, mid, pmid; - Sequence *S; - int n; - - - - if (!T) - { - char **list; - - list=declare_char ( 2, 100); - sprintf (list[0], "%s",mat); - - fprintf ( stderr, "\nCompute Phylogenetic tree [Matrix=%s]", mat); - T=compute_std_tree(A,1, list); - fprintf ( stderr, "\nCompute dpa tree"); - T=tree2dpa_tree (T,A, mat); - } - - S=tree2seq(T, NULL); - - if ( max_n<0) - { - max_n*=-1; - n=tree2group_file (T,S,0, max_n, fname); - fprintf ( stderr, "\n#TrimTC: Split in %d Groups at a minimum of %d%% ID\n",n, (int)max_n); - return fname; - - } - else if ( max_n>0) - { - if ( max_n>S->nseq)max_n=S->nseq; - - top=100; bot=0; - pmid=0; mid=50; - n=tree2group_file(T, S,0, (int)mid,fname); - mid=dichotomy((double)n, (double)max_n,(pmid=mid), &bot, &top); - while (n!=max_n && (int)pmid!=(int)mid) - { - n=tree2group_file(T, S,0, (int)mid, fname); - mid=dichotomy((double)n, (double)max_n,(pmid=mid), &bot, &top); - } - fprintf ( stderr, "\nDONE2"); - fprintf ( stderr, "\n#TrimTC: Split in %d Groups at a minimum of %d%% ID\n",n, (int)mid); - return fname; - } - return NULL; -} -static int group_number; -int tree2group_file ( NT_node T,Sequence *S, int maxnseq, int minsim, char *name) - { - FILE *fp; - - - fp=vfopen (name, "w"); - vfclose (tree2group (T, S,maxnseq,minsim, "tree2ngroup",fp)); - - return count_n_line_in_file(name); - } - - -FILE * tree2group ( NT_node T,Sequence *S, int maxnseq, int minsim,char *name, FILE *fp) -{ - if ( !T)return fp; - else - { - int m,d; - - m=(maxnseq==0)?S->nseq:maxnseq; - d=minsim; - - - - if ( T->nseq<=m && T->dist>=d) - { - int a; - fprintf ( fp, ">%s_%d ", (name)?name:"", ++group_number); - for ( a=0; a< T->nseq; a++) - fprintf ( fp, "%s ", S->name[T->lseq[a]]); - fprintf (fp, "\n"); - if (!T->parent)group_number=0; - return fp; - } - else - { - fp=tree2group (T->right, S, maxnseq, minsim, name,fp); - fp=tree2group (T->left, S, maxnseq, minsim, name,fp); - if (!T->parent)group_number=0; - return fp; - } - - } -} - - -NT_node tree2collapsed_tree (NT_node T, int n, char **string) -{ - char ***list; - Sequence *A; - int a, *nlist; - - - A=tree2seq(T, NULL); - T=recode_tree(T, A); - list=vcalloc (A->nseq, sizeof (char***)); - nlist=vcalloc (A->nseq, sizeof (int)); - if ( n==0)return T; - else if (n>1) - { - int l; - char *buf; - - for (l=0,a=0; a< n; a++)l+=strlen (string[a]); - buf=vcalloc ( 2*n+l+1, sizeof (char)); - for (a=0; a< n; a++){buf=strcat (buf,string[a]), buf=strcat ( buf, " ");} - list[0]=string2list (buf); - vfree (buf); - } - else if ( file_exists (NULL,string[0])) - { - list=read_group (string[0]); - } - else - { - fprintf (stderr, "\nERROR: file <%s> does not exist [FATAL:%s]\n",string[0], PROGRAM); - myexit (EXIT_FAILURE); - } - - - a=0; - while (list[a]) - { - int i, b; - n=atoi (list[a][0]); - for (b=0; bnseq; b++)nlist[b]=0; - for (b=2; bname, A->nseq, MAXNAMES); - nlist[i]=1; - } - T=collapse_sub_tree ( T,A->nseq,nlist,list[a][1]); - free_char (list[a], -1); - a++; - } - vfree (list); - return T; -} - -NT_node collapse_sub_tree ( NT_node T,int nseq, int *list, char *new_name) -{ - if (!T) return T; - else - { - int a=0; - - - while (alseq2[a]){a++;} - if (a==nseq) - { - sprintf ( T->name, "%s", new_name); - T->leaf=T->isseq=1; - T->left=T->right=NULL; - return T; - } - else - { - collapse_sub_tree (T->right, nseq, list, new_name); - collapse_sub_tree (T->left, nseq, list, new_name); - return T; - } - } -} - -/*********************************************************************/ -/* */ -/* tree pruning */ -/* */ -/* */ -/*********************************************************************/ -NT_node remove_leaf ( NT_node T); -NT_node prune_root (NT_node T); -NT_node main_prune_tree ( NT_node T, Sequence *S) -{ - T=prune_tree ( T, S); - return T; -} - -NT_node prune_tree ( NT_node T, Sequence *S) -{ - - if (!T ) return T; - - if (T->leaf && T->isseq && name_is_in_list (T->name,S->name, S->nseq, 100)==-1) - { - NT_node C, P, PP; - - P=T->parent; - if ( !P) - { - int a; - for (a=0; a< S->nseq; a++) - { - HERE ("prune pb ---%s", S->name[a]); - } - exit (EXIT_FAILURE); - } - C=(P->right==T)?P->left:P->right; - PP=C->parent=P->parent; - - if (PP && PP->right==P)PP->right=C; - else if (PP)PP->left=C; - else - { - if (T==P->right)P->right=NULL; - else P->left=NULL; - T=C; - - } - } - else - { - prune_tree (T->left, S); - prune_tree (T->right, S); - } - return prune_root(T); -} - -NT_node prune_root (NT_node T) -{ - //This function prunes the root if needed (and frees it). - if (T->parent)return T; - - if (!T->right && T->left) - { - return prune_root (T->left); - } - else if (T->right && !T->left) - { - - return prune_root (T->right); - } - else - { - return T; - } -} -/*********************************************************************/ -/* */ -/* tree comparison */ -/* */ -/* */ -/*********************************************************************/ -int main_compare_cog_tree (NT_node T1, char *cogfile) -{ - char ***array; - int a, nbac, n=0, p, c, b; - Alignment *A; - - array=file2list(cogfile, ";\n"); - nbac=atoi(array[0][0])-2; - - A=declare_aln2 (nbac+1, 10); - for (a=0; aname[a], "%s", array[0][a+2]); - A->seq_al[a][0]='a'; - A->seq_al[a][1]='\0'; - - } - sprintf ( A->name[nbac], "cons"); - - A->nseq=nbac+1; - A->len_aln=1; - - - n=3; - while (array[n]!=NULL) - { - for (b=0; bseq_al[b][0]=p; - A->seq_al[b][1]=c; - A->seq_al[b][2]='\0'; - - } - sprintf (A->file[0], "%s", array[n][1]); - A->len_aln=2; - main_compare_aln_tree (T1, A, stdout); - n++; - } - return n; -} - - -int main_compare_aln_tree (NT_node T1, Alignment *A, FILE *fp) -{ - int n=0; - - fprintf ( fp, "\nTOT_CLASH COG %s N %d", A->file[0], compare_aln_tree (T1, A, &n, fp)); - vfclose (fp); - return n; -} - -int compare_aln_tree (NT_node T, Alignment *A, int *n, FILE *fp) -{ - if (T->leaf) - { - int i; - i=name_is_in_list (T->name, A->name, A->nseq, 100); - T->seqal=A->seq_al[i]; - return 0; - } - else - { - char *seq1, *seq2; - if (!(T->left )->seqal)compare_aln_tree (T->left, A,n, fp); - if (!(T->right)->seqal)compare_aln_tree (T->right, A,n, fp); - - seq1=(T->left)->seqal; - seq2=(T->right)->seqal; - (T->left)->seqal=(T->right)->seqal=NULL; - if ( seq1 && seq2) - { - if (strm (seq1, seq2)) - { - T->seqal=seq1; - - } - else - { - - if (seq1[0]!=seq2[0] && seq1[1]!=seq2[1]) - { - fprintf ( fp, "\nNODE_CLASH: COG %s (%s,%s):(",A->file[0],seq1,seq2 ); - display_leaf_below_node (T->left, fp); - fprintf ( fp, ");("); - display_leaf_below_node (T->right, fp); - fprintf ( fp, ")"); - n[0]++; - } - } - } - } - return n[0]; -} -//********************************************************************** - -int compare_split (int *s1, int *s2, int l); -int get_split_size (int *s, int l); - -int main_compare_splits ( NT_node T1, NT_node T2, char *mode,FILE *fp) -{ - Sequence *S1, *S2, *S; - int a, b; - - - - int **sl1, n1; - int **sl2, n2; - if ( tree_contains_duplicates (T1)) - { - display_tree_duplicates (T1); - printf_exit (EXIT_FAILURE, stderr, "\nFirst Tree Contains Duplicated Sequences [main_compare_trees][FATAL:%s]", PROGRAM); - - } - else if ( tree_contains_duplicates (T2)) - { - display_tree_duplicates (T2); - printf_exit (EXIT_FAILURE, stderr, "\nSecond Tree Contains Duplicated Sequences [main_compare_trees]"); - - } - - //Identify the commom Sequence Set - S1=tree2seq(T1, NULL); - - - S2=tree2seq(T2, NULL); - - - S=trim_seq ( S1, S2); - - //Prune the trees and recode the subtree list - T1=prune_tree (T1, S); - T1=recode_tree(T1, S); - - T2=prune_tree (T2, S); - T2=recode_tree(T2, S); - HERE ("1"); - sl1=declare_int (10*S->nseq, S->nseq); - sl2=declare_int (10*S->nseq, S->nseq); - - HERE ("2"); - n1=n2=0; - tree2split_list (T1, S->nseq, sl1, &n1); - tree2split_list (T2, S->nseq, sl2, &n2); - - for (a=0; anseq); - for (best=0,b=0; bnseq); - best=MAX(s,best); - } - fprintf ( fp, "\n%4d %4d ", MIN(n,(S->nseq)), best); - for (b=0; bnseq; b++)fprintf ( fp, "%d", sl1[a][b]); - } - - free_sequence (S, -1); - free_sequence (S1, -1); - free_sequence (S2, -1); - exit (EXIT_SUCCESS); - return 1; -} -int compare_split (int *s1, int *s2, int l) -{ - int n1, n2, score1, score2, a; - n1=get_split_size (s1, l); - n2=get_split_size (s2, l); - - for (score1=0,a=0; a< l; a++) - { - score1+=(s1[a]==1 && s2[a]==1)?1:0; - } - score1=(score1*200)/(n1+n2); - - for ( score2=0, a=0; a SCORE_MAX) - SCORE_MAX = score[i]; - } - for(i = 0; i < len; i++) - score[i] = (9*(score[i]-SCORE_MIN)/(SCORE_MAX-SCORE_MIN)); -} - -int new_compare_trees ( NT_node T1, NT_node T2, int nseq, Tree_sim *TS); -NT_node new_search_split (NT_node T, NT_node B, int nseq); -int new_compare_split ( int *b1, int *b2, int n); -Tree_sim* tree_scan_pos (Alignment *A, int start, int end, char *ptree, NT_node RT); -Tree_sim* tree_scan_pos_woble (Alignment *A, int center, int max, char *ptree, NT_node RT, int *br, int *bl ); -Tree_sim* tree_scan_pair_pos (Alignment *A, int start, int end, int start2, int end2,char *ptree, NT_node RT); -Tree_sim* tree_scan_multiple_pos (int *poslist, int *wlist,int nl, Alignment *A, char *ptree, NT_node RT); -NT_node aln2std_tree(Alignment *A, int ipara1, int ipara2, char *mode); - -NT_node tree_scan (Alignment *A,NT_node RT, char *pscan, char *ptree) -{ - int l, a,ax, c, cx, b; - char mode[100]; - int start, w; - int nl, *poslist; - char posfile[100]; - - char *pcFileName = A->file[0]; - char prefix[200] ={0}; - int len = (strrchr(pcFileName,'.')?strrchr(pcFileName,'.')-pcFileName:strlen(pcFileName)); - strncpy(prefix, pcFileName, len); - - float *fascore; - char out_format[100]; - - char *score_csv_file = vcalloc(200, sizeof (char)); - char *score_html_file = vcalloc(200, sizeof (char)); - char *hit_matrix_file = vcalloc(200, sizeof (char)); - char *hit_html_file = vcalloc(200, sizeof (char)); - char *tree_file = vcalloc(200, sizeof (char)); - - sprintf(score_csv_file, "%s%s", prefix, ".score_csv"); - sprintf(score_html_file, "%s%s", prefix, ".ts_html"); - sprintf(hit_matrix_file, "%s%s", prefix, ".hit_matrix"); - sprintf(hit_html_file, "%s%s", prefix, ".hit_html"); - sprintf(tree_file, "%s%s", prefix, ".trees_txt"); - - if ( pscan && strstr ( pscan, "help")) - { - fprintf ( stdout, "\n+tree_scan| _W_ : Window size for the tree computation|STD size in norscan mode"); - fprintf ( stdout, "\n+tree_scan| _MODE_ : Mode for the number of windows (single, double, list, scan, pairscan, norscan, hit, norhit)"); - fprintf ( stdout, "\n+tree_scan| _MINW_ : Minimum Window size when using the scan mode (4)"); - fprintf ( stdout, "\n+tree_scan| _OUTTREE_ : specify the format of outputing tree in every position (default: not ouput)"); - exit (EXIT_SUCCESS); - } - - strget_param (pscan, "_W_", "5", "%d",&w); - strget_param (pscan, "_MODE_", "single", "%s",mode); - strget_param (pscan, "_MINW_", "1", "%d",&start); - strget_param (pscan, "_POSFILE_", "NO", "%s", posfile); - strget_param (pscan, "_OUTTREE_", "", "%s", &out_format); - - if(strlen(out_format) > 1) - unlink(tree_file); - - l=intlen (A->len_aln); - - poslist=vcalloc ( A->len_aln, sizeof (int)); - nl=0; - fascore = vcalloc(A->len_aln, sizeof (float)); - - if ( strm (posfile, "NO")) - { - - for ( a=0; a< A->len_aln; a++)poslist[nl++]=a+1; - } - else - { - int *p; - p=file2pos_list (A,posfile); - poslist=pos2list (p, A->len_aln, &nl); - for (a=0; auw); - vfree (TS); - } - } - else if ( strm (mode, "single")) - { - for (ax=0; axA->len_aln)continue; - if (pend<1 || pend>A->len_aln)continue; - TS=tree_scan_pos (A, pstart,pend, ptree, RT); - fprintf ( stdout, "P: %*d I: %*d %*d SIM: %6.2f L: %2d\n", l,a,l,pstart,l,pend,TS->uw, (w*2)+1); - vfree (TS); - } - } - else if (strm (mode, "scan")||strm (mode, "hit")) - { - FILE *fp_ts; - fp_ts=vfopen (score_csv_file, "w"); - fprintf ( fp_ts, "Position,Win_Beg,Win_End,Similarity,Win_Len\n"); - for ( ax=0; axA->len_aln)continue; - if (pend<1 || pend>A->len_aln)continue; - TS=tree_scan_pos (A, pstart,pend, ptree, RT); - if (TS->uw>=best_score) - {best_score=TS->uw;best_w=b;best_start=pstart; best_end=pend;} - vfree (TS); - } - fprintf (fp_ts, "%*d,%*d,%*d,%6.2f,%2d\n", l,best_pos, l,best_start, l,best_end, best_score,(best_w*2)+1); - fascore[ax]=(float)best_score; - if(strlen(out_format) > 1) - vfclose (print_tree (aln2std_tree(A, best_start, best_end, mode), out_format, vfopen (tree_file, "a+"))); - if(strm (mode, "hit")) - TreeArray[ax] = aln2std_tree(A, best_start, best_end, mode); - } - vfclose(fp_ts); - } -//tree scan by using normal distribution window -//or -//generate hit matrix - else if ( strm (mode, "norscan")||strm (mode, "norhit")) - { - FILE *fp_ts; - ptree=vcalloc(100, sizeof (char)); - fp_ts=vfopen (score_csv_file, "w"); - fprintf ( fp_ts, "Position,Similarity,STD_Len\n"); - for ( ax=0; axuw>=best_score) - {best_score=TS->uw;best_STD=b;} - vfree (TS); - } - fascore[ax]=best_score; - fprintf ( fp_ts, "%*d,%6.2f,%d\n", l,a, fascore[ax], best_STD); - if(strlen(out_format) > 1) - vfclose (print_tree (aln2std_tree(A, best_STD, a, mode), out_format, vfopen (tree_file, "a+"))); - if(strm (mode, "norhit")) - TreeArray[ax] = aln2std_tree(A, best_STD, a, mode); - } - vfclose(fp_ts); - } -//generate hit matrix - if (strm (mode, "hit")||strm (mode, "norhit")) - { -//Compute the pair score of tree scan segqtion - fprintf (stdout, "[STRAT] Calculate the hit matrix of the tree scan\n"); - float **ffpHitScoreMatrix; - - ffpHitScoreMatrix=vcalloc (nl, sizeof (float*)); - int i, j; - for(i = 0; i < nl; i++) - ffpHitScoreMatrix[i]=vcalloc (nl-i, sizeof (float)); - - fprintf (stdout, "Process positions\n", i); - for(i = 0; i < nl; i++) - { - fprintf (stdout, "%d, ", i); - for(j = i; j < nl; j++) - { - Tree_sim *TS; - TS=tree_cmp (TreeArray[i], TreeArray[j]); - ffpHitScoreMatrix[i][j-i] = TS->uw; - vfree (TS); - } - } - vfree(TreeArray); - fprintf (stdout, "\n"); - output_hit_matrix(hit_matrix_file, ffpHitScoreMatrix, nl); - fprintf (stdout, "[END]Calculate the hit matrix of the tree scan\n"); - -//Output Hit Score into color html - output_hit_color_html (A, ffpHitScoreMatrix, nl, hit_html_file); - vfree(ffpHitScoreMatrix); - } - else if ( strm (mode, "pairscan")) - { - int d, set; - - for ( ax=0; axA->len_aln)continue; - if (pend<1 || pend>A->len_aln)continue; - if (p2start<1 || p2start>A->len_aln)continue; - if (p2end<1 || p2end>A->len_aln)continue; - if (pstart<=p2start && pend>=p2start) continue; - if (pstart<=p2end && pend>=p2end) continue; - TS=tree_scan_pair_pos (A, pstart,pend,p2start, p2end, ptree, RT); - - if (TS->uw>=best_score){best_score=TS->uw; best_pos=a;best_w=b;best_start=pstart; best_end=pend; best_pos2=c, best_w2=d, best_start2=p2start, best_end2=p2end;set=1;} - vfree (TS); - } - } - if (set)fprintf ( stdout, "P1: %*d I1: %*d %*d P2: %*d I2: %*d %*d SIM: %6.2f L: %2d\n", l,best_pos, l,best_start, l,best_end, l, best_pos2, l, best_start2, l, best_end2, best_score,(best_w*2)+1 ); - - set=0; - } - } - } - else if ( strm (mode, "multiplescan")) - { - int n, **wlist, best_pos; - float best_score; - Tree_sim *TS; - wlist=generate_array_int_list (nl*2,start, w,1, &n, NULL); - HERE ("Scan %d Possibilities", n); - - for (best_score=best_pos=0,a=0; auw>best_score) - { - best_score=TS->uw; - fprintf ( stdout, "\n"); - for (b=0; bS; - ST=copy_aln (A, NULL); - for (a=0; anseq; a++) - { - i=name_is_in_list (ST->name[a],S->name, S->nseq, 100); - if ( i!=-1) - { - for (b=0; blen_aln; b++) - { - r1=ST->seq_al[a][b]; - if ( r1!='-') - r1 = (int)fascore[b] + 48; - ST->seq_al[a][b]=r1; - } - } - } - output_color_html ( A, ST, score_html_file); - -//free memory - free_aln(ST); - vfree(fascore); - vfree(score_csv_file); - vfree(score_html_file); - vfree(hit_matrix_file); - vfree(hit_html_file); - - exit(EXIT_SUCCESS); -} - -NT_node aln2std_tree(Alignment *A, int ipara1, int ipara2, char *mode) -{ - Alignment *B; - NT_node T; - char *cpSet = vcalloc(100, sizeof (char)); - - if(strm (mode, "norhit")) - { - B=extract_aln (A, 1, A->len_aln); - sprintf ( cpSet, "+aln2tree _COMPARE_nordisaln__STD_%d__CENTER_%d_", ipara1, ipara2); - } - else - B=extract_aln (A, ipara1, ipara2); - - T=compute_std_tree (B, (cpSet)?1:0, (cpSet)?&cpSet:NULL); - free_aln(B); - return T; -} - -Tree_sim* tree_scan_multiple_pos (int *poslist, int *wlist,int nl, Alignment *A, char *ptree, NT_node RT) -{ - static Alignment *B; - static int *pos; - int a, b, n, s, p, left, right; - Tree_sim *TS; - NT_node T=NULL; - - - //poslist positions come [1..n] - vfree(pos); - free_aln (B); - - pos=vcalloc ( A->len_aln+1, sizeof (int)); - B=copy_aln (A, NULL); - - for (a=0; aA->len_aln) return NULL; - else pos[b]++; - - if (pos[b]>1) return NULL; - } - } - - for (s=0; snseq; s++) - { - for (n=0,a=1; a<=A->len_aln; a++) - { - if (pos[a])B->seq_al[s][n++]=A->seq_al[s][a-1]; - } - } - - B->len_aln=n; - for (s=0; snseq; s++)B->seq_al[s][B->len_aln]='\0'; - - T=compute_std_tree (B, (ptree)?1:0, (ptree)?&ptree:NULL); - - TS=tree_cmp (T, RT); - - free_tree(T); - return TS; - } - -Tree_sim* tree_scan_pair_pos (Alignment *A, int start, int end, int start2, int end2,char *ptree, NT_node RT) - { - Tree_sim *TS; - Alignment *B,*B1, *B2; - NT_node T=NULL; - int a; - - - B=copy_aln (A, NULL); - B1=extract_aln (A,start,end); - B2=extract_aln (A,start2, end2); - - - for ( a=0; a< B->nseq;a++) - sprintf (B->seq_al[a], "%s%s", B1->seq_al[a], B2->seq_al[a]); - B->len_aln=strlen (B->seq_al[0]); - - T=compute_std_tree (B, (ptree)?1:0, (ptree)?&ptree:NULL); - TS=tree_cmp (T, RT); - - free_tree(T); - free_aln (B);free_aln(B1); free_aln(B2); - - return TS; - } - -Tree_sim* tree_scan_pos (Alignment *A, int start, int end, char *ptree, NT_node RT) - { - Tree_sim *TS; - Alignment *B; - NT_node T; - - if ( start<1 || start>A->len_aln) return NULL; - if ( end<1 || end>A->len_aln) return NULL; - - B=extract_aln (A,start,end); - T=compute_std_tree (B, (ptree)?1:0, (ptree)?&ptree:NULL); - TS=tree_cmp (T, RT); - free_tree(T);free_aln (B); - return TS; - } -Tree_sim* tree_scan_pos_woble (Alignment *A, int center, int max, char *ptree, NT_node RT, int *br, int *bl ) - { - Tree_sim *TS,*BTS; - - - int left, right; - float best_score=0; - int start, end; - - br[0]=bl[0]=0; - BTS=vcalloc (1, sizeof (Tree_sim)); - - for (left=0; leftuw >best_score) - { - best_score=TS->uw; - BTS[0]=TS[0]; - br[0]=right; bl[0]=left; - vfree(TS); - } - } - return BTS; - } - -Tree_sim* tree_cmp( NT_node T1, NT_node T2) -{ - Sequence *S1, *S2, *S; - int n; - int a; - - Tree_sim *TS1, *TS2; - - if ( tree_contains_duplicates (T1)) - { - display_tree_duplicates (T1); - printf_exit (EXIT_FAILURE, stderr, "\nFirst Tree Contains Duplicated Sequences [main_compare_trees][FATAL:%s]", PROGRAM); - - } - else if ( tree_contains_duplicates (T2)) - { - display_tree_duplicates (T2); - printf_exit (EXIT_FAILURE, stderr, "\nSecond Tree Contains Duplicated Sequences [main_compare_trees]"); - - } - - //Identify the commom Sequence Set - S1=tree2seq(T1, NULL); - S2=tree2seq(T2, NULL); - - S=trim_seq ( S1, S2); - - if ( S->nseq<=2) - { - fprintf ( stderr, "\nERROR: Your two do not have enough common leaf to be compared [FATAL:PROGRAM]"); - } - - //Prune the trees and recode the subtree list - T1=prune_tree (T1, S); - T1=recode_tree(T1, S); - - T2=prune_tree (T2, S); - T2=recode_tree(T2, S); - - TS1=vcalloc (1, sizeof (Tree_sim)); - TS2=vcalloc (1, sizeof (Tree_sim)); - - new_compare_trees ( T1, T2, S->nseq, TS1); - new_compare_trees ( T2, T1, S->nseq, TS2); - - - TS1->n=tree2nnode (T1); - TS1->nseq=S->nseq; - - TS2->n=tree2nnode (T2); - /*if (TS1->n !=TS2->n) - printf_exit (EXIT_FAILURE, stderr,"\nERROR: Different number of Nodes in the two provided trees after prunning [FATAL: %s]", PROGRAM); - */ - - free_sequence (S, -1); - free_sequence (S1, -1); - free_sequence (S2, -1); - - TS1->uw=(TS1->uw+TS2->uw)*100/(TS1->max_uw+TS2->max_uw); - TS1->w=(TS1->w+TS2->w)*100/(TS1->max_w+TS2->max_w); - TS1->d=(TS1->d+TS2->d)*100/(TS1->max_d+TS2->max_d); - TS1->rf=(TS1->rf+TS2->rf)/2; - vfree (TS2); - return TS1; -} - -NT_node main_compare_trees ( NT_node T1, NT_node T2, FILE *fp) -{ - Tree_sim *T; - - T=tree_cmp (T1, T2); - fprintf ( fp, "\n#tree_cmp|T: %.f W: %.2f L: %.2f RF: %d N: %d S: %d", T->uw, T->w, T->d, T->rf, T->n, T->nseq); - fprintf ( fp, "\n#tree_cmp_def|T: ratio of identical nodes"); - fprintf ( fp, "\n#tree_cmp_def|W: ratio of identical nodes weighted with the min Nseq below node"); - fprintf ( fp, "\n#tree_cmp_def|L: average branch length similarity"); - fprintf ( fp, "\n#tree_cmp_def|RF: Robinson and Foulds"); - fprintf ( fp, "\n#tree_cmp_def|N: number of Nodes in T1 [unrooted]"); - fprintf ( fp, "\n#tree_cmp_def|S: number of Sequences in T1\n"); - - vfree (T); - return T1; -} - -int new_compare_trees ( NT_node T1, NT_node T2, int nseq, Tree_sim *TS) -{ - int n=0; - NT_node N; - float t1, t2; - - if (!T1 || !T2) return 0; - - n+=new_compare_trees (T1->left, T2, nseq,TS); - n+=new_compare_trees (T1->right, T2, nseq,TS); - - //Exclude arbitrary splits (dist==0) - if ((T1->dist==0) && !(T1->parent))return n; - - N=new_search_split (T1, T2, nseq); - t1=FABS(T1->dist); - t2=(N)?FABS(N->dist):0; - TS->max_d+=MAX(t1, t2); - - if (!N)TS->rf++; - if (T1->nseq>1) - { - int w; - w=MIN((nseq-T1->nseq),T1->nseq); - TS->max_uw++; - TS->max_w+=w; - - if (N) - { - TS->uw++; - TS->w+=w; - TS->d+=MIN(t1, t2); - //T1->dist=T1->nseq; - } - else - { - //T1->dist=T1->nseq*-1; - ; - } - } - else - { - TS->d+=MIN(t1, t2); - //T1->dist=1; - } - return ++n; -} -NT_node new_search_split (NT_node T, NT_node B, int nseq) -{ - NT_node N; - if (!T || !B) return NULL; - else if ( new_compare_split (T->lseq2, B->lseq2, nseq)==1)return B; - else if ( (N=new_search_split (T, B->right, nseq)))return N; - else return new_search_split (T, B->left, nseq); -} -int new_compare_split ( int *b1, int *b2, int n) -{ - int a, flag; - - for (flag=1, a=0; aparent && T1->nseq>1)n+=search_node ( T1, T2, nseq, mode); - - n+=compare_trees ( T1->left, T2, nseq, mode); - n+=compare_trees ( T1->right, T2, nseq, mode); - - return n; -} - -float search_node ( NT_node B, NT_node T, int nseq, int mode) -{ - int n=0; - if ( !B || !T) return -1; - if (getenv4debug("DEBUG_TREE_COMPARE"))display_node ( T, "\n\t", nseq); - - n=compare_node ( B->lseq2, T->lseq2, nseq ); - - if ( n==1) - { - if (getenv4debug("DEBUG_TREE_COMPARE"))fprintf ( stderr, "[1][%d]", (int)evaluate_node_similarity ( B, T, nseq, mode)); - if (mode==RECODE)B->dist=B->leaf; - return evaluate_node_similarity ( B, T, nseq, mode); - } - else if ( n==-1) - { - if (getenv4debug("DEBUG_TREE_COMPARE"))fprintf ( stderr, "[-1]"); - if (mode==RECODE)B->dist=-B->leaf; - return 0; - } - else - { - if (getenv4debug("DEBUG_TREE_COMPARE"))fprintf ( stderr, "[0]"); - n=search_node ( B, T->left, nseq, mode); - if ( n>0) return n; - n=search_node ( B, T->right, nseq, mode); - if ( n>0) return n; - n=search_node ( B, T->bot, nseq, mode); - if ( n>0) return n; - } - return n; -} - -float evaluate_node_similarity ( NT_node B, NT_node T, int nseq, int mode) -{ -int a, c; - - if ( mode==TOPOLOGY || mode ==RECODE) - { - for ( a=0; a< nseq; a++) - if ( B->lseq2[a]!=T->lseq2[a]) return 0; - return 1; - } - else if ( mode == WEIGHTED) - { - for (c=0, a=0; a< nseq; a++) - { - if ( B->lseq2[a]!=T->lseq2[a]) return 0; - else c+=B->lseq2[a]; - } - return (float)(MIN(c,nseq)); - } - else if ( mode == LENGTH ) - { - float d1, d2; - - for (c=0, a=0; a< nseq; a++) - { - if ( B->lseq2[a]!=T->lseq2[a]) return 0; - } - d1=FABS((B->dist-T->dist)); - d2=MAX(B->dist, T->dist); - return (d2>0)?(d1*100)/d2:0; - } - else - { - return 0; - } -} -int compare_node ( int *b1, int *b2, int nseq) -{ - int n1, n2; - - n1=compare_node1 ( b1, b2, nseq); - /*fprintf ( stderr, "[%d]", n1);*/ - if ( n1==1) return 1; - - n2=compare_node2 ( b1, b2, nseq); - /* fprintf ( stderr, "[%d]", n2);*/ - if ( n2==1)return 1; - else if ( n2==-1 && n1==-1) return -1; - else return 0; -} -int compare_node1 ( int *b1, int *b2, int n) -{ - int a; - int l1, l2; - int r=1; - for ( a=0; a< n; a++) - { - l1=b1[a]; - l2=b2[a]; - if ( l1==1 && l2==0) return -1; - if ( l1!=l2)r=0; - } - return r; -} -int compare_node2 ( int *b1, int *b2, int n) -{ - int a; - int l1, l2; - int r=1; - - for ( a=0; a< n; a++) - { - l1=1-b1[a]; - l2=b2[a]; - if ( l1==1 && l2==0) return -1; - if ( l1!=l2) r=0; - } - return r; -} -void display_node (NT_node N, char *string,int nseq) -{ - int a; - fprintf ( stderr, "%s", string); - for (a=0; a< nseq; a++)fprintf ( stderr, "%d", N->lseq2[a]); -} - - -/*********************************************************************/ -/* */ -/* FJ_tree Computation */ -/* */ -/* */ -/*********************************************************************/ - - -NT_node tree_compute ( Alignment *A, int n, char ** arg_list) -{ - if (n==0 || strm (arg_list[0], "cw")) - { - return compute_cw_tree (A); - } - else if ( strm (arg_list[0], "fj")) - { - return compute_fj_tree ( NULL, A, (n>=1)?atoi(arg_list[1]):8, (n>=2)?arg_list[2]:NULL); - } - - else if ( ( strm (arg_list[0], "nj"))) - { - return compute_std_tree (A, n, arg_list); - } - else - return compute_std_tree (A, n, arg_list); -} - -NT_node compute_std_tree (Alignment *A, int n, char **arg_list) -{ - return compute_std_tree_2 (A, NULL, list2string (arg_list, n)); -} -NT_node compute_std_tree_2 (Alignment *A, int **s, char *cl) -{ - - NT_node T, **BT=NULL; - char *tree_name; - - char matrix[100]; - char score [100]; - char compare[100]; - char tmode[100]; - int free_s=0; - - tree_name =vtmpnam (NULL); - - if (strstr (cl, "help")) - { - fprintf ( stdout, "\n+aln2tree| _MATRIX_ : matrix used for the comparison (idmat, sarmat, pam250mt..)\n"); - fprintf ( stdout, "\n+aln2tree| _SCORE_ : score mode used for the distance (sim, raw)\n"); - fprintf ( stdout, "\n+aln2tree| _COMPARE_: comparison mode (aln, ktup, align, nordisaln)\n"); - fprintf ( stdout, "\n+aln2tree| _TMODE_ : tree mode (nj, upgma)\n"); - exit (EXIT_SUCCESS); - } - - - //matrix: idmat, ktup,sarmat, sarmat2 - strget_param (cl, "_MATRIX_", "idmat", "%s",matrix); - - //score: sim, raw - strget_param (cl, "_SCORE_", "sim", "%s",score); - - //compare: aln, ktup, align - strget_param (cl, "_COMPARE_", "aln", "%s",compare); - - //compare: aln, ktup, align - strget_param (cl, "_TMODE_", "nj", "%s",tmode); - - int STD, CENTER; - if ( strm (compare, "nordisaln")) - { - strget_param (cl, "_STD_", "1", "%d", &STD); - strget_param (cl, "_CENTER_", "5", "%d", &CENTER); - } - //Use external msa2tree methods - if ( strm (tmode, "cw")) - { - free_int (s, -1); - return compute_cw_tree (A); - } - - //compute distance matrix if needed - if ( !s) - { - free_s=1; - if ( strm (compare, "ktup")) - { - ungap_array (A->seq_al, A->nseq); - s=get_sim_aln_array ( A,cl); - } - else if ( strm ( compare, "aln")) - { - if (strm (score, "sim")) - s=get_sim_aln_array(A, matrix); - else if ( strm (score, "raw")) - { - s=get_raw_sim_aln_array (A,matrix); - } - } - else if ( strm ( compare, "nordisaln")) - { - s=get_sim_aln_array_normal_distribution(A, matrix, &STD, &CENTER); - } - s=sim_array2dist_array(s, 100); - } - - //Compute the tree - if (strm (tmode, "nj")) - { - - BT=int_dist2nj_tree (s, A->name, A->nseq, tree_name); - T=main_read_tree (tree_name); - free_read_tree(BT); - } - else if (strm (tmode, "upgma")) - { - BT=int_dist2upgma_tree (s,A, A->nseq, tree_name); - T=main_read_tree (tree_name); - free_read_tree(BT); - } - - if ( strm ( cl, "dpa")) - { - s=dist_array2sim_array(s, 100); - T=code_dpa_tree (T,s); - } - - if (free_s)free_int (s, -1); - return T; -} - -NT_node similarities_file2tree (char *mat) -{ - int **s; - Alignment *A; - char *tree_name; - NT_node T; - - - - tree_name =vtmpnam (NULL); - - s=input_similarities (mat,NULL, NULL); - - - A=similarities_file2aln(mat); - s=sim_array2dist_array(s, 100); - - - int_dist2nj_tree (s, A->name, A->nseq, tree_name); - T=main_read_tree(tree_name); - free_int (s, -1); - return T; -} - -NT_node compute_cw_tree (Alignment *A) -{ - char *tmp1, *tmp2, tmp3[1000]; - char command[1000]; - - tmp1=vtmpnam (NULL); - tmp2=vtmpnam (NULL); - - sprintf ( tmp3, "%s.ph", tmp1); - output_clustal_aln (tmp1, A); - sprintf ( command, "clustalw -infile=%s -tree -newtree=%s %s ", tmp1,tmp3, TO_NULL_DEVICE); - my_system ( command); - sprintf ( command, "mv %s %s", tmp3, tmp2); - my_system ( command); - return main_read_tree(tmp2); -} - -NT_node compute_fj_tree (NT_node T, Alignment *A, int limit, char *mode) -{ - static int in_fj_tree; - if (!in_fj_tree)fprintf ( stderr, "\nComputation of an NJ tree using conserved positions\n"); - - in_fj_tree++; - if (T && T->leaf<=2); - else - { - T=aln2fj_tree(T,A,limit, mode); - T->right=compute_fj_tree ( T->right, A, limit, mode); - T->left=compute_fj_tree ( T->left, A, limit, mode); - } - in_fj_tree--; - return T; -} - - -NT_node aln2fj_tree(NT_node T, Alignment *A, int limit_in, char *mode) -{ - NT_node NT; - Sequence *S=NULL; - Alignment *subA=NULL; - int fraction_gap; - int l, limit; - - if (T) - S=tree2seq (T,NULL); - else - S=aln2seq (A); - - - l=0; - for ( fraction_gap=100; fraction_gap<=100 && l<1; fraction_gap+=10) - for ( limit=limit_in; limit>0 && l<1; limit--) - { - fprintf ( stderr, "\n%d %d", limit, fraction_gap); - free_aln (subA); - subA=extract_sub_aln2 (A,S->nseq,S->name); - subA=filter_aln4tree (subA,limit,fraction_gap, mode); - l=subA->len_aln; - } - - /* while ( subA->len_aln<1) - { - subA=extract_sub_aln2 (A,S->nseq,S->name); - subA=filter_aln4tree (subA,limit,fraction_gap,mode); - free_aln (subA); - subA=extract_sub_aln2 (A,S->nseq,S->name); - subA=filter_aln4tree (subA,--limit,fraction_gap, mode); - } - */ - NT=aln2tree (subA); - NT=tree2fj_tree (NT); - - NT=realloc_tree (NT,A->nseq); - fprintf ( stderr, "Limit:%d Gap: %d Columns: %4d Left: %4d Right %4d BL:%4.2f\n",limit,fraction_gap, subA->len_aln, (NT->right)->leaf,(NT->left)->leaf, (NT->left)->dist+(NT->right)->dist); - - if ( T) - { - NT->dist=T->dist; - NT->parent=T->parent; - } - free_tree(T); - free_aln (subA); - free_sequence (S, -1); - return NT; -} - -Alignment * filter_aln4tree (Alignment *A, int n,int fraction_gap,char *mode) -{ - char *aln_file; - char *ungaped_aln_file; - char *scored_aln_file; - char *filtered_aln_file; - char command[1000]; - - - aln_file=vtmpnam(NULL); - ungaped_aln_file=vtmpnam (NULL); - scored_aln_file=vtmpnam (NULL); - scored_aln_file=vtmpnam(NULL); - filtered_aln_file=vtmpnam(NULL); - - - - output_clustal_aln (aln_file, A); - /* 1: remove columns with too many gaps*/ - sprintf ( command, "t_coffee -other_pg seq_reformat -in %s -action +rm_gap %d -output clustalw > %s", aln_file,fraction_gap, ungaped_aln_file); - my_system ( command); - /* 2: evaluate the alignment*/ - - sprintf ( command, "t_coffee -other_pg seq_reformat -in %s -action +evaluate %s -output clustalw > %s", ungaped_aln_file,(mode)?mode:"categories", scored_aln_file); - my_system ( command); - - /*3 extract the high scoring columns*/ - sprintf ( command, "t_coffee -other_pg seq_reformat -in %s -struc_in %s -struc_in_f number_aln -action +use_cons +keep '[%d-8]' +rm_gap -output clustalw > %s", ungaped_aln_file, scored_aln_file,n, filtered_aln_file); - my_system ( command); - - free_aln (A); - - A=main_read_aln ( filtered_aln_file, NULL); - print_aln (A); - - return A; -} - -NT_node tree2fj_tree (NT_node T) -{ - NT_node L; - - return T; - - L=find_longest_branch (T, NULL); - T=reroot_tree (T, L); - return T; -} - - -/*********************************************************************/ -/* */ -/* Tree Filters and MAnipulation */ -/* */ -/* */ -/*********************************************************************/ -int tree2star_nodes (NT_node R, int n_max) -{ - if ( !R) return 0; - else if (!R->left && !R->right) - { - if (n_max>=1)R->dist=0; - return 1; - } - else - { - int n=0; - - n+=tree2star_nodes (R->right, n_max); - n+=tree2star_nodes (R->left, n_max); - - if (ndist=0; - return n; - } -} - -NT_node aln2tree (Alignment *A) -{ - NT_node **T=NULL; - - - T=make_nj_tree (A, NULL, 0, 0, A->seq_al, A->name, A->nseq, NULL, NULL); - tree2nleaf (T[3][0]); - - return T[3][0]; -} -NT_node realloc_tree ( NT_node R, int n) -{ - if ( !R)return R; - if ( !R->leaf) - { - R->right=realloc_tree (R->right,n); - R->left=realloc_tree (R->left,n); - R->bot=realloc_tree (R->bot,n); - } - R->lseq=vrealloc (R->lseq, n*sizeof (int)); - R->lseq2=vrealloc (R->lseq2, n*sizeof (int)); - return R; -} - -NT_node reset_boot_tree ( NT_node R, int n) -{ - if ( !R)return R; - if ( !R->leaf) - { - - R->right=reset_boot_tree (R->right,n); - R->left=reset_boot_tree (R->left,n); - R->bot=reset_boot_tree (R->bot,n); - } - R->bootstrap=(float)n; - - return R; -} -NT_node tree_dist2normalized_tree_dist ( NT_node R, float max) -{ - if (!R)return R; - else - { - tree_dist2normalized_tree_dist ( R->right, max); - tree_dist2normalized_tree_dist ( R->left, max); - R->bootstrap=(int)((R->dist*100)/max); - } - return R; -} -NT_node reset_dist_tree ( NT_node R, float n) -{ - if ( !R)return R; - if ( !R->leaf) - { - - R->right=reset_dist_tree (R->right,n); - R->left=reset_dist_tree (R->left,n); - R->bot=reset_dist_tree (R->bot,n); - } - if (R->parent && !(R->parent)->parent && !(R->parent)->bot)R->dist=n/2; - else R->dist=n; - - return R; -} - - -NT_node* free_treelist (NT_node *L) -{ - int n=0; - while (L[n])free_tree (L[n++]); - vfree (L); - return NULL; -} -NT_node free_tree ( NT_node R) -{ - if ( !R)return R; - - - - if ( R->leaf!=1) - { - R->right=free_tree (R->right); - R->left=free_tree (R->left); - R->bot=free_tree (R->bot); - } - - free_tree_node (R); - return R; -} - -NT_node free_tree_node ( NT_node R) -{ - if (!R)return NULL; - - vfree (R->seqal); - vfree (R->idist); - vfree (R->ldist); - vfree (R->file); - vfree ( R->name); - vfree ( R->lseq); vfree ( R->lseq2); - vfree (R); - return NULL; -} - -NT_node rename_seq_in_tree ( NT_node R, char ***list) -{ - if ( !R || !list) return R; - - if ( R->leaf!=1) - { - R->right=rename_seq_in_tree (R->right, list); - R->left=rename_seq_in_tree (R->left, list); - R->bot=rename_seq_in_tree (R->bot, list); - } - else - { - int n=0; - while ( list[n][0][0]) - { - if ( strm (list[n][0], R->name))sprintf (R->name, "%s",list[n][1]); - n++; - } - } - return R; -} -Sequence * tree2seq (NT_node R, Sequence *S) -{ - - if ( !R)return S; - if ( !S) - { - S=declare_sequence (10, 10, tree2nseq (R)); - S->nseq=0; - } - - if (R->leaf==1) - { - sprintf ( S->name[S->nseq++], "%s", R->name); - } - else - { - S=tree2seq (R->left, S); - S=tree2seq (R->right, S); - } - return S; -} - -NT_node balance_tree (NT_node T) -{ - static int **list; - NT_node NL[3]; - - if ( !T) return T; - else if ( T->leaf<=2)return T; - else - { - if (!list)list=declare_int (3, 2); - - NL[0]=T->left; - NL[1]=T->right; - NL[2]=T->bot; - - list[0][0]=(T->left)?(T->left)->leaf:0; - list[0][1]=0; - list[1][0]=(T->right)?(T->right)->leaf:0; - list[1][1]=1; - list[2][0]=(T->bot)?(T->bot)->leaf:0; - list[2][1]=2; - - sort_int (list,2,0,0,2); - - T->left=NL[list[2][1]]; - T->right=NL[list[1][1]]; - T->bot=NL[list[0][1]]; - - T->left=balance_tree (T->left); - T->right=balance_tree (T->right); - T->bot=balance_tree (T->bot); - return T; - } -} -FILE * display_tree (NT_node R, int nseq, FILE *fp) -{ - int a; - - if ( !R); - else - { - /* - if ( R->nseq==1)fprintf (stderr,"\n[%s] ", R->name); - else fprintf ( stderr, "\n[%d Node] ",R->nseq); - for ( a=0; a< R->nseq; a++) fprintf ( stderr, "[%d]", R->lseq[a]); - */ - fprintf (fp, "\n %10s N ", R->name); - for ( a=0; a< nseq; a++)fprintf (fp, "%d", R->lseq2[a]); - fprintf (fp, "\n %10s D ", R->name); - for ( a=0; a< nseq; a++)fprintf (fp, "%d", R->idist[a]); - - - if (R->leaf==1) fprintf (fp, " %s", R->name); - fprintf (fp, " :%.4f", R->dist); - HERE ("\nGo Left");fp=display_tree (R->left, nseq, fp); - HERE ("\nGo Right");fp=display_tree (R->right, nseq, fp); - HERE ("\nGo Bot");fp=display_tree (R->bot, nseq, fp); - } - return fp; -} -int tree2nnode_unresolved (NT_node R, int *l) -{ - if ( !R)return 0; - else if (R->leaf && R->dist==0){return 1;} - else - { - int n=0; - n+=tree2nnode_unresolved (R->right, l); - n+=tree2nnode_unresolved (R->left, l); - if (R->dist==0) - { - return n; - } - else - { - if (n)l[n]++; - return 0; - } - } - -} - -int tree2nnode ( NT_node R) -{ - int n; - if ( !R)n=0; - else if ( R->leaf==1){R->node=1;n=1;} - else - { - n=1; - n+=tree2nnode (R->right); - n+=tree2nnode (R->left); - n+=tree2nnode (R->bot); - R->node=n; - } - return n; -} -int tree2nleaf (NT_node R) -{ - if ( !R)return 0; - else if (R->leaf==1){return 1;} - else if (R->right==NULL && R->left==NULL && R->bot==NULL){R->leaf=1; return 1;} - else - { - int n=0; - n+=tree2nleaf (R->right); - n+=tree2nleaf (R->left); - n+=tree2nleaf (R->bot); - - R->leaf=n; - return n; - } -} - -int tree2nseq ( NT_node R) -{ - return tree2nleaf(R); -} - -int tree_file2nseq (char *fname) -{ - FILE *fp; - char *string; - int p, a, b, c, n; - - string=vcalloc (count_n_char_in_file(fname)+1, sizeof (char)); - - fp=vfopen (fname, "r"); - n=0; - while ( (c=fgetc(fp))!=EOF){if (c=='(' || c==')' || c==',' || c==';') string[n++]=c;} - vfclose (fp);string[n]='\0'; - - for (n=0, p=1; pvisited=0; - - if ( R->leaf==1); - else - { - clear_tree ( R->right); - clear_tree ( R->left); - clear_tree ( R->bot); - } -} -int display_leaf_below_node (NT_node T, FILE *fp) -{ - int n=0; - if ( !T)return 0; - - if ( T->leaf==1) - { - fprintf (fp, " %s", T->name); - return 1; - } - else - { - n+=display_leaf_below_node ( T->right, fp); - n+=display_leaf_below_node ( T->left, fp); - return n; - } -} -int display_leaf ( NT_node T, FILE *fp) -{ - int n=0; - if ( !T)return 0; - else if ( T->visited)return 0; - else T->visited=1; - - if ( T->leaf==1) - { - fprintf (fp, " %s", T->name); - return 1; - } - else - { - n+=display_leaf ( T->right, fp); - n+=display_leaf ( T->left, fp); - n+=display_leaf ( T->bot, fp); - return n; - } -} - - - - -NT_node find_longest_branch ( NT_node T, NT_node L) - { - - if ( !L || T->dist>L->dist) - { - - L=T; - } - - if ( T->leaf==1)return L; - else - { - L=find_longest_branch ( T->right, L); - L=find_longest_branch ( T->left, L); - return L; - } - } -int node2side (NT_node N); -int test_print (NT_node T); -NT_node straighten_node (NT_node N); -NT_node EMPTY; -NT_node Previous; -NT_node reroot_tree ( NT_node TREE, NT_node Right) -{ - /*ReRoots the tree between Node R and its parent*/ - NT_node NR; - int n1, n2; - - if (!EMPTY)EMPTY=vcalloc (1, sizeof (NT_node)); - if ( !Right->parent)return Right; - - TREE=unroot_tree (TREE); - if (Right->parent==NULL && Right->bot) - Right=Right->bot; - - n1=tree2nleaf (TREE); - - NR=declare_tree_node(TREE->maxnseq); - - NR->right=Right; - NR->left=Right->parent; - Right->parent=NR; - - Right->dist=Right->dist/2; - - if ((NR->left)->right==Right)(NR->left)->right=EMPTY; - else if ( (NR->left)->left==Right) (NR->left)->left=EMPTY; - - Previous=NULL; - - - NR->left=straighten_node (NR->left); - - - - (NR->left)->parent=NR; - (NR->left)->dist=Right->dist; - - - - n2=tree2nleaf(NR); - - if ( n1!=n2){fprintf ( stderr, "\n%d %d", n1, n2);myexit (EXIT_FAILURE);} - return NR; -} - -NT_node straighten_node ( NT_node N) -{ - NT_node Child; - - - if ( N->parent) - { - if (N->right==EMPTY)N->right=N->parent; - else if ( N->left==EMPTY) N->left=N->parent; - - Child=N->parent; - if (Child->right==N) - { - Child->right=EMPTY; - } - else if (Child->left==N) - { - Child->left=EMPTY; - } - - Previous=N; - Child=straighten_node (Child); - Child->parent=N; - Child->dist=N->dist; - return N; - } - else if ( N->bot && N->bot!=Previous) - { - if ( N->right==EMPTY)N->right=N->bot; - else if ( N->left==EMPTY)N->left=N->bot; - - N->bot=NULL; - return N; - } - else - { - N->bot=NULL; - return N; - } -} -int test_print (NT_node T) -{ - if ( !T) - { - fprintf ( stderr, "\nEMPTY"); - } - else if ( !T->left && !T->right) - { - fprintf ( stderr, "\n%s",T->name); - } - else - { - fprintf ( stderr, "\nGoing Right"); - test_print (T->right); - fprintf ( stderr, "\nGoing Left"); - test_print (T->left); - } - return 1; -} -int node2side (NT_node C) -{ - if ( !C->parent) return UNKNOWN; - else if ( (C->parent)->left==C)return LEFT; - else if ( (C->parent)->right==C)return RIGHT; - else return UNKNOWN; -} -NT_node straighten_tree ( NT_node P, NT_node C, float new_dist) -{ - float dist; - - if ( C==NULL)return NULL; - - - dist=C->dist; - C->dist=new_dist; - C->bot=NULL; - - if (C->left && C->right) - { - C->parent=P; - } - else if (!C->left) - { - C->left=C->parent; - C->parent=P; - } - - if ( C->parent==P); - else if ( C->left==NULL && C->right==NULL) - { - C->parent=P; - } - else if ( C->right==P) - { - C->right=C->parent; - C->parent=P; - - C=straighten_tree(C, C->right, dist); - } - else if ( C->left==P) - { - C->left=C->parent; - C->parent=P; - C=straighten_tree (C, C->left, dist); - } - else if ( C->parent==NULL) - { - C->parent=P; - } - - return C; -} - - -NT_node unroot_tree ( NT_node T) -{ - - if (!T || T->visited) return T; - else T->visited=1; - - if (T->parent==NULL) - { - - (T->right)->dist=(T->left)->dist=(T->right)->dist+(T->left)->dist; - (T->right)->parent=T->left; - (T->left)->parent=T->right; - T=T->left; - T->leaf=0; - vfree (T->parent); - } - else - { - T->parent=unroot_tree (T->parent); - T->right=unroot_tree (T->right); - T->left=unroot_tree (T->left); - } - T->visited=0; - return T; -} - -FILE * print_tree_list ( NT_node *T, char *format,FILE *fp) -{ - int a=0; - while ( T[a]) - { - fp=print_tree (T[a], format, fp); - a++; - } - return fp; -} -char * tree2string (NT_node T) -{ - if (!T) return NULL; - else - { - static char *f; - FILE *fp; - - if (!f)f=vtmpnam (NULL); - fp=vfopen (f, "w"); - print_tree (T, "newick", fp); - vfclose (fp); - return file2string (f); - } -} -char * tree2file (NT_node T, char *name, char *mode) -{ - if (!name)name=vtmpnam (NULL); - string2file (tree2string (T), name, mode); - return name; -} -FILE * print_tree ( NT_node T, char *format,FILE *fp) -{ - Sequence *S; - - tree2nleaf(T); - S=tree2seq(T, NULL); - - recode_tree (T, S); - - free_sequence (S, -1); - if ( format && strm (format, "binary")) - fp=display_tree ( T,S->nseq, fp); - else if ( ! format || strm2 (format, "newick_tree","newick")) - { - /*T=balance_tree (T);*/ - fp=rec_print_tree (T, fp); - fprintf ( fp, ";\n"); - } - else - { - fprintf ( stderr, "\nERROR: %s is an unknown tree format [FATAL:%s]\n", format, PROGRAM); - myexit (EXIT_FAILURE); - } - return fp; -} -int print_newick_tree ( NT_node T, char *name) -{ - FILE *fp; - fp=vfopen (name, "w"); - fp=rec_print_tree (T,fp); - fprintf (fp, ";\n"); - vfclose (fp); - return 1; -} -FILE * rec_print_tree ( NT_node T, FILE *fp) -{ - - - - if (!T)return fp; - - if ( T->isseq) - { - fprintf ( fp, " %s:%.5f",T->name, T->dist); - } - else - { - if (T->left && T->right) - { - fprintf ( fp, "(");fp=rec_print_tree ( T->left, fp); - fprintf ( fp, ",");fp=rec_print_tree ( T->right, fp); - fprintf ( fp, ")"); - if (T->parent || T->dist) - { - if ( T->bootstrap!=0)fprintf (fp, " %d", (int)T->bootstrap); - fprintf (fp, ":%.5f", T->dist); - } - } - else if (T->left)fp=rec_print_tree (T->left, fp); - else if (T->right)fp=rec_print_tree(T->right, fp); - } - - return fp; -} - - - - - -/*********************************************************************/ -/* */ -/* Tree Functions */ -/* */ -/* */ -/*********************************************************************/ - -int ** make_sub_tree_list ( NT_node **T, int nseq, int n_node) - { - - -/*This function produces a list of all the sub trees*/ - - -/* /A */ -/* -* */ -/* \ /B */ -/* \ / */ -/* ---* */ -/* \ */ -/* *--C */ -/* \ */ -/* \D */ - -/* Contains 4 i_nodes */ -/* 8 nodes (internal nodes +leaves) */ -/* 8 sub trees: */ -/* ABCD */ -/* 1111 */ -/* 0111 */ -/* 1000 */ -/* 0100 */ -/* 0011 */ -/* 0001 */ -/* 0010 */ - - int **sub_tree_list; - int a, n=0; - - - if (T) - { - sub_tree_list=declare_int ( (n_node), nseq); - make_all_sub_tree_list (T[3][0],sub_tree_list, &n); - - } - else - { - sub_tree_list=declare_int (nseq, nseq); - for ( a=0; a< nseq; a++)sub_tree_list[a][a]=1; - } - - return sub_tree_list; - } - -void make_all_sub_tree_list ( NT_node N, int **list, int *n) - { - make_one_sub_tree_list (N, list[n[0]++]); - if (N->leaf!=1) - { - make_all_sub_tree_list (N->left , list, n); - make_all_sub_tree_list (N->right, list, n); - } - return; - } - -void make_one_sub_tree_list ( NT_node T,int *list) - { - if (T->leaf==1) - { - - list[T->seq]=1; - } - else - { - make_one_sub_tree_list(T->left , list); - make_one_sub_tree_list(T->right, list); - } - return; - } - - -NT_node old_main_read_tree(char *treefile) -{ - /*Reads a tree w/o needing the sequence file*/ - NT_node **T; - T=simple_read_tree (treefile); - return T[3][0]; -} - - - -NT_node** simple_read_tree(char *treefile) -{ - int tot_node=0; - NT_node **T; - T=read_tree ( treefile, &tot_node,tree_file2nseq (treefile),NULL); - return T; -} - -void free_read_tree ( NT_node **BT) -{ - int a, s; - - - if (!BT) return; - - for (s=0,a=0; a<3; a++) - { - vfree (BT[a]); - } - free_tree (BT[3][0]); - vfree (BT); - return; -} - -NT_node** read_tree(char *treefile, int *tot_node,int nseq, char **seq_names) - { - - /*The Tree Root is in the TREE[3][0]...*/ - /*TREE[0][ntot]--> pointer to each node and leave*/ - char ch; - int a,b; - - FILE *fp; - int nseq_read = 0; - int nnodes = 0;/*Number of Internal Nodes*/ - int ntotal = 0;/*Number of Internal Nodes + Number of Leaves*/ - int flag; - int c_seq; - NT_node **lu_ptr; - NT_node seq_tree, root,p; - - - tot_nseq=nseq; - rooted_tree=distance_tree=TRUE; - - fp = vfopen(treefile, "r"); - fp=skip_space(fp); - ch = (char)getc(fp); - if (ch != '(') - { - fprintf(stderr, "Error: Wrong format in tree file %s\n", treefile); - myexit (EXIT_FAILURE); - } - rewind(fp); - - - lu_ptr=(NT_node **)vcalloc(4,sizeof(NT_node*)); - lu_ptr[0] = (NT_node *)vcalloc(10*nseq,sizeof(NT_node)); - lu_ptr[1] = (NT_node *)vcalloc(10*nseq,sizeof(NT_node)); - lu_ptr[2] = (NT_node *)vcalloc(10*nseq,sizeof(NT_node)); - lu_ptr[3] =(NT_node *) vcalloc(1,sizeof(NT_node)); - - seq_tree =(NT_node) declare_tree_node(nseq); - - set_info(seq_tree, NULL, 0, " ", 0.0, 0); - - - fp=create_tree(seq_tree,NULL,&nseq_read, &ntotal, &nnodes, lu_ptr, fp); - fclose (fp); - - - if (nseq != tot_nseq) - { - fprintf(stderr," Error: tree not compatible with alignment (%d sequences in alignment and %d in tree\n", nseq,nseq_read); - myexit (EXIT_FAILURE); - } - - if (distance_tree == FALSE) - { - if (rooted_tree == FALSE) - { - fprintf(stderr,"Error: input tree is unrooted and has no distances, cannot align sequences\n"); - myexit (EXIT_FAILURE); - } - } - - if (rooted_tree == FALSE) - { - root = reroot(seq_tree, nseq,ntotal,nnodes, lu_ptr); - lu_ptr[1][nnodes++]=lu_ptr[0][ntotal++]=root; - - } - else - { - root = seq_tree; - } - - lu_ptr[3][0]=root; - tot_node[0]=nnodes; - - - - for ( a=0; a< ntotal; a++) - { - (lu_ptr[0][a])->isseq=(lu_ptr[0][a])->leaf; - (lu_ptr[0][a])->dp=(lu_ptr[0][a])->dist; - } - - - for ( a=0; a< nseq; a++) - { - if (!seq_names) - { - flag=1; - (lu_ptr[2][a])->order=(lu_ptr[2][a])->seq=a; - } - else - { - for ( flag=0,b=0; bname, seq_names[b], MAXNAMES)==0) - { - flag=1; - - (lu_ptr[2][a])->order=(lu_ptr[2][a])->seq=b; - /*vfree ( (lu_ptr[2][a])->name);*/ - sprintf ((lu_ptr[2][a])->name, "%s", seq_names[b]); - } - } - } - /* - if ( flag==0 && (lu_ptr[0][a])->leaf==1) - { - fprintf ( stderr, "\n%s* not in tree",(lu_ptr[2][a])->name); - for ( a=0; a< ntotal; a++) - { - fprintf ( stderr, "\n%d %s",(lu_ptr[2][a])->leaf, (lu_ptr[2][a])->name); - } - } - */ - } - - if (seq_names) - { - int tnseq; - char *s; - char **tree_names; - int fail_flag=0; - tnseq=tree_file2nseq(treefile); - tree_names=vcalloc ( tnseq, sizeof (char*)); - for (a=0; aname; - tree_names[a]=s; - if ( name_is_in_list(s, seq_names, nseq, MAXNAMES+1)==-1) - { - fprintf (stderr, "\nERROR: Sequence %s in the tree [%s] is not in the alignment[FATAL:%s]\n", s, treefile, PROGRAM); - fail_flag=1; - } - } - for (a=0; aseq; - - while ( p!=NULL) - { - p->lseq[p->nseq]=c_seq; - p->nseq++; - p=p->parent; - } - } - - - return lu_ptr; - } - -FILE * create_linear_tree ( char **name, int n, FILE *fp) -{ - - if (!name || n==0 ||!fp) return NULL; - - - if (n==2) - fprintf ( fp, "(%s,%s);",name[0],name[1]); - else if ( n==3) - fprintf ( fp, "((%s,%s),%s);",name[0],name[1], name[2]); - else - { - int a; - for (a=0; aleft, ptree, nseq,ntotal,nnodes,lu,fp); - ch = (char)getc(fp); - if ( ch == ',') - { - fp=create_tree(ptree->right, ptree,nseq,ntotal,nnodes,lu,fp); - ch = (char)getc(fp); - if ( ch == ',') - { - - ptree = insert_tree_node(ptree); - lu[0][ntotal[0]] = lu[1][nnodes[0]] = ptree; - ntotal[0]++; - nnodes[0]++; - fp=create_tree(ptree->right, ptree,nseq,ntotal,nnodes,lu,fp); - rooted_tree = FALSE; - if ( getenv4debug ( "DEBUG_TREE")){fprintf ( stderr, "\n[DEBUG_TREE:create_tree] Unrooted Tree");} - } - } - - fp=skip_space(fp); - ch = (char)getc(fp); - } - else - { - type=LEAF; - lu[0][ntotal[0]] = lu[2][nseq[0]] = ptree; - ntotal[0]++; - nseq[0]++; - name[0] = ch; - i=1; - ch = (char)getc(fp); - if ( name[0]=='\'') - { - /*This protects names that are between single quotes*/ - while ( ch!='\'') - { - if (i < MAXNAMES) name[i++] = ch; - ch = (char)getc(fp); - } - if (i < MAXNAMES) name[i++] = ch; - while ((ch != ':') && (ch != ',') && (ch != ')'))ch = (char)getc(fp); - } - else - { - while ((ch != ':') && (ch != ',') && (ch != ')')) - { - if (i < MAXNAMES) name[i++] = ch; - ch = (char)getc(fp); - } - } - - name[i] = '\0'; - - if ( i>=(MAXNAMES+1)){fprintf (stderr, "\nName is too long");myexit (EXIT_FAILURE);} - if (ch != ':' && !isdigit(ch)) - { - /*distance_tree = FALSE*/; - } - } - if (ch == ':') - { - fp=skip_space(fp); - fscanf(fp,"%f",&dist); - fp=skip_space(fp); - bootstrap=0; - } - /*Tree with Bootstrap information*/ - else if (isdigit (ch)) - { - ungetc(ch,fp); - fscanf(fp,"%f",&bootstrap); - if ( fscanf(fp,":%f",&dist)==1); - else dist=0; - fp=skip_space(fp); - } - else - { - ungetc ( ch, fp); - skip_space(fp); - } - - set_info(ptree, parent, type, name, dist, bootstrap); - - - vfree (name); - return fp; - } - -NT_node declare_tree_node (int nseq) - { - NT_node p; - - p= (NT_node)vcalloc (1, sizeof ( Treenode)); - p->left = NULL; - p->right = NULL; - p->parent = NULL; - p->dist = 0.0; - p->leaf = 0; - p->order = 0; - p->maxnseq=nseq; - p->name=(char*)vcalloc (MAXNAMES+1,sizeof (char)); - p->name[0]='\0'; - p->lseq=(int*)vcalloc ( nseq, sizeof (int)); - return p; - - } - -void set_info(NT_node p, NT_node parent, int pleaf, char *pname, float pdist, float bootstrap) - { - p->parent = parent; - p->leaf = pleaf; - p->dist = pdist; - p->bootstrap=bootstrap; - p->order = 0; - - - sprintf (p->name, "%s", pname); - - if (pleaf ==1) - { - p->left = NULL; - p->right = NULL; - } - - } -NT_node insert_tree_node(NT_node pptr) - { - - NT_node newnode; - - newnode = declare_tree_node( pptr->maxnseq); - create_tree_node(newnode, pptr->parent); - - newnode->left = pptr; - pptr->parent = newnode; - - set_info(newnode, pptr->parent, 0, "", 0.0, 0); - - return(newnode); - } - -void create_tree_node(NT_node pptr, NT_node parent) - { - pptr->parent = parent; - pptr->left =declare_tree_node(pptr->maxnseq) ; - (pptr->left)->parent=pptr; - - pptr->right =declare_tree_node(pptr->maxnseq) ; - (pptr->right)->parent=pptr; - } - -FILE * skip_space(FILE *fp) - { - int c; - - do - c = getc(fp); - while(isspace(c)); - if ( c==EOF) - { - fprintf ( stderr, "\nEOF"); - myexit (EXIT_FAILURE); - } - ungetc(c, fp); - return fp; - } - - -NT_node reroot(NT_node ptree, int nseq, int ntotal, int nnodes, NT_node **lu) - { - NT_node p, rootnode, rootptr; - float diff, mindiff=0, mindepth = 1.0, maxdist; - int i; - int first = TRUE; - - - - rootptr = ptree; - - for (i=0; iparent == NULL) - diff = calc_root_mean(p, &maxdist, nseq, lu); - else - diff = calc_mean(p, &maxdist, nseq, lu); - - if ((diff == 0) || ((diff > 0) && (diff < 2 * p->dist))) - { - if ((maxdist < mindepth) || (first == TRUE)) - { - first = FALSE; - rootptr = p; - mindepth = maxdist; - mindiff = diff; - } - } - - } - if (rootptr == ptree) - { - mindiff = rootptr->left->dist + rootptr->right->dist; - rootptr = rootptr->right; - } - - rootnode = insert_root(rootptr, mindiff); - diff = calc_root_mean(rootnode, &maxdist, nseq, lu); - return(rootnode); - } - - -float calc_root_mean(NT_node root, float *maxdist, int nseq, NT_node **lu) - { - float dist , lsum = 0.0, rsum = 0.0, lmean,rmean,diff; - NT_node p; - int i; - int nl, nr; - int direction; - - - dist = (*maxdist) = 0; - nl = nr = 0; - for (i=0; i< nseq; i++) - { - p = lu[2][i]; - dist = 0.0; - while (p->parent != root) - { - dist += p->dist; - p = p->parent; - } - if (p == root->left) direction = LEFT; - else direction = RIGHT; - dist += p->dist; - - if (direction == LEFT) - { - lsum += dist; - nl++; - } - else - { - rsum += dist; - nr++; - } - - if (dist > (*maxdist)) *maxdist = dist; - } - - lmean = lsum / nl; - rmean = rsum / nr; - - diff = lmean - rmean; - return(diff); - } - -float calc_mean(NT_node nptr, float *maxdist, int nseq,NT_node **lu) - { - float dist , lsum = 0.0, rsum = 0.0, lmean,rmean,diff; - NT_node p, *path2root; - float *dist2node; - int depth = 0, i,j , n; - int nl , nr; - int direction, found; - - - path2root = (NT_node *)vcalloc(nseq,sizeof(Treenode)); - dist2node = (float *)vcalloc(nseq,sizeof(float)); - - depth = (*maxdist) = dist = 0; - nl = nr = 0; - p = nptr; - while (p != NULL) - { - path2root[depth] = p; - dist += p->dist; - dist2node[depth] = dist; - p = p->parent; - depth++; - } - -/*************************************************************************** - *nl = *nr = 0; - for each leaf, determine whether the leaf is left or right of the node. - (RIGHT = descendant, LEFT = not descendant) -****************************************************************************/ - for (i=0; i< nseq; i++) - { - p = lu[2][i]; - if (p == nptr) - { - direction = RIGHT; - dist = 0.0; - } - else - { - direction = LEFT; - dist = 0.0; - - found = FALSE; - n = 0; - while ((found == FALSE) && (p->parent != NULL)) - { - for (j=0; j< depth; j++) - if (p->parent == path2root[j]) - { - found = TRUE; - n = j; - } - dist += p->dist; - p = p->parent; - } - - if (p == nptr) direction = RIGHT; - - } - if (direction == LEFT) - { - lsum += dist; - lsum += dist2node[n-1]; - nl++; - } - else - { - rsum += dist; - nr++; - } - - if (dist > (*maxdist)) *maxdist = dist; - } - - vfree(dist2node); - vfree(path2root); - - - - if ( nl==0 || nr==0) - { - myexit (EXIT_FAILURE); - } - lmean = lsum / nl; - rmean = rsum / nr; - - diff = lmean - rmean; - return(diff); -} - -NT_node insert_root(NT_node p, float diff) -{ - NT_node newp, prev, q, t; - float dist, prevdist,td; - - - newp = declare_tree_node( p->maxnseq); - t = p->parent; - - - prevdist = t->dist; - p->parent = newp; - - dist = p->dist; - - p->dist = diff / 2; - if (p->dist < 0.0) p->dist = 0.0; - if (p->dist > dist) p->dist = dist; - - t->dist = dist - p->dist; - - newp->left = t; - newp->right = p; - newp->parent = NULL; - newp->dist = 0.0; - newp->leaf = NODE; - - if (t->left == p) t->left = t->parent; - else t->right = t->parent; - - prev = t; - q = t->parent; - - t->parent = newp; - - while (q != NULL) - { - if (q->left == prev) - { - q->left = q->parent; - q->parent = prev; - td = q->dist; - q->dist = prevdist; - prevdist = td; - prev = q; - q = q->left; - } - else - { - q->right = q->parent; - q->parent = prev; - td = q->dist; - q->dist = prevdist; - prevdist = td; - prev = q; - q = q->right; - } - } - -/* - remove the old root node -*/ - q = prev; - if (q->left == NULL) - { - dist = q->dist; - q = q->right; - q->dist += dist; - q->parent = prev->parent; - if (prev->parent->left == prev) - prev->parent->left = q; - else - prev->parent->right = q; - prev->right = NULL; - } - else - { - dist = q->dist; - q = q->left; - q->dist += dist; - q->parent = prev->parent; - if (prev->parent->left == prev) - prev->parent->left = q; - else - prev->parent->right = q; - prev->left = NULL; - } - - return(newp); -} - - - - -/*********************************************************************/ -/* */ -/* TrimTC3 */ -/* */ -/* */ -/*********************************************************************/ - -int *aln2seq_chain (Alignment *A, int **sim,int seq1, int seq2, int limit, int max_chain); - -Alignment *seq2seq_chain (Alignment *A,Alignment*T, char *arg) -{ - int **sim=NULL; - int *buf=NULL, *seq2keep, *list, *tname; - int a, b, c, nl; - int sim_limit; - int min_sim=15; - int max_chain=20; - - /*Estimate Similarity within the incoming sequences*/ - sim=seq2comp_mat (aln2seq(A), "blosum62mt", "sim2"); - - /*Read and store the list of sequences to keep*/ - seq2keep=vcalloc (A->nseq, sizeof (int)); - tname=vcalloc (T->nseq, sizeof (int)); - for ( a=0; a< T->nseq; a++) - { - tname[a]=name_is_in_list ( T->name[a], A->name, A->nseq, 100); - if (tname[a]>=0)seq2keep[tname[a]]=1; - } - - /*Consider Every Pair of Sequences within the list of sequences to keep*/ - - fprintf ( stderr, "\n"); - for ( a=0; a< T->nseq-1; a++) - { - if (tname[a]<0) continue; - for ( b=a+1;bnseq; b++) - { - - if (tname[b]<0) continue; - - buf=NULL;sim_limit=90; - while (!buf && sim_limit>min_sim) - { - buf=aln2seq_chain ( A, sim,tname[a],tname[b],sim_limit, max_chain); - sim_limit-=5; - } - - if ( buf) - { - for (c=0; c< A->nseq; c++)seq2keep[c]+=buf[c]; - vfree (buf); - } - else - { - fprintf ( stderr, "\n#Could Not Find any Intermediate sequence [MAx chain %d MinID %d\n", max_chain, min_sim); - } - } - } - - list=vcalloc (A->nseq, sizeof (int)); - for ( nl=0,a=0; a< A->nseq; a++) - if ( seq2keep[a]) - list[nl++]=a; - - A=extract_sub_aln (A, nl, list); - - free_int (sim, -1); - vfree (list); - return A; -} -int max_explore=10000000;/*Limits the number of explorations that tends to increase when id is small*/ -int n_explore; - -int *aln2seq_chain (Alignment *A, int **sim, int seq1, int seq2, int limit, int max_chain) -{ - int *used; - int **chain; - char output1[10000]; - char output2[10000]; - int a; - int *list; - int n, nseq=0; - - - output1[0]=output2[0]='\0'; - used=vcalloc (A->nseq, sizeof(int)); - used[seq1]=1; - - if (find_seq_chain ( A, sim,used,seq1,seq1, seq2,1,limit, max_chain, &nseq)) - { - list=vcalloc (A->nseq, sizeof (int)); - chain=declare_int (A->nseq, 2); - for (n=0, a=0; a< A->nseq; a++) - { - if ( used[a]) - { - chain[n][0]=used[a]; - chain[n][1]=a; - list[used[a]-1]=a;n++; - } - } - - sprintf ( output2, "#%s %s N: %d Lower: %d Sim: %d DELTA: %d\n", A->name[list[0]], A->name[list[n-1]],n, limit,sim[list[0]][list[n-1]],limit-sim[list[0]][list[n-1]]);strcat (output1, output2); - - sort_int ( chain, 2, 0, 0, n-1); - sprintf ( output2, "#");strcat(output1, output2); - - for ( a=0; a< n-1; a++) - { - sprintf (output2, "%s -->%d -->", A->name[chain[a][1]],sim[chain[a][1]][chain[a+1][1]]);strcat ( output1, output2); - } - sprintf ( output2, "%s\n", A->name[chain[n-1][1]]);strcat (output1, output2); - - free_int (chain, -1); - vfree (list); - } - else - { - vfree (used); - used=NULL; - } - /* fprintf ( stdout, "%s", output1);*/ - fprintf ( stderr, "%s", output1); - n_explore=0; - return used; -} -static int ***pw_sim; -int find_seq_chain (Alignment *A, int **sim,int *used,int seq0,int seq1, int seq2,int chain_length, int limit, int max_chain, int *nseq) -{ - int a,b, seq, seq_sim; - - n_explore++; - if ( n_explore>=max_explore) - { - return 0; - } - if (!pw_sim) - { - pw_sim=declare_arrayN(3, sizeof (int), A->nseq, A->nseq, 3); - for ( a=0; a< A->nseq; a++) - { - for ( b=0; bnseq; b++) - { - pw_sim[a][b][0]=b; - pw_sim[a][b][1]=sim[a][b]; - pw_sim[a][b][2]=sim[b][seq2]; - } - sort_int_inv ( pw_sim[a],3, 1, 0, A->nseq-1); - } - } - - if ( chain_length>max_chain)return 0; - else if ( sim[seq1][seq2]>=limit) - { - used[seq2]=chain_length+1; - nseq[0]++; - return 1; - } - else - { - int delta_seq2; - for ( a=0; a< A->nseq; a++) - { - seq=pw_sim[seq1][a][0]; - seq_sim=pw_sim[seq1][a][1]; - delta_seq2=pw_sim[seq1][a][2]-sim[seq1][seq2]; - - - - if ( used[seq])continue; - else if ( seq_simnseq); - vfree (T->file); - T->file=vcalloc ( strlen (treefile)+1, sizeof (char)); - sprintf ( T->file, "%s", treefile); - return T; -} - -//This function codes the tree into lseq and lseq2 -//lseq: list of the N->nseq child sequences of the node -//lsseq2:Array of size Nseq, with lseq[a]=1 if sequence a is child of node N -static int node_index; -NT_node index_tree_node (NT_node T) -{ - if (!T)return T; - if (!T->parent){node_index=tree2nseq (T)+1;} - - index_tree_node(T->left); - index_tree_node(T->right); - - if (!T->left && !T->right)T->index=T->lseq[0]+1; - else T->index=node_index++; - return T; -} - - - -NT_node simple_recode_tree (NT_node T, int nseq) -{ - - //recodes atree wher the leafs are already coded - if (!T) return T; - - - - - T->nseq=0; - - if ( T->isseq) - { - - ; - - } - else - { - NT_node R,L; - int a; - vfree (T->lseq); T->lseq=vcalloc (nseq, sizeof (int)); - vfree (T->lseq2); T->lseq2=vcalloc (nseq, sizeof (int)); - vfree (T->idist); T->idist=vcalloc (nseq, sizeof (int)); - vfree (T->ldist); T->ldist=vcalloc (nseq, sizeof (int)); - - R=simple_recode_tree (T->left,nseq); - - L=simple_recode_tree (T->right,nseq); - - if (R)for (a=0; anseq; a++) - { - T->lseq2[R->lseq[a]]=1; - } - - if (L)for (a=0; anseq; a++) - { - T->lseq2[L->lseq[a]]=1; - } - - for (a=0; alseq2[a])T->lseq[T->nseq++]=a; - if (T->lseq2[a])T->idist[a]=(!R)?0:R->idist[a]+((!L)?0:L->idist[a])+1; - if (T->lseq2[a])T->ldist[a]=(!R)?0:R->ldist[a]+((!L)?0:L->ldist[a])+(int)(T->dist*10000); - } - } - return T; -} - -NT_node recode_tree (NT_node T, Sequence *S) -{ - - - if (!T) return T; - - - vfree (T->lseq); T->lseq=vcalloc (S->nseq, sizeof (int)); - vfree (T->lseq2); T->lseq2=vcalloc (S->nseq, sizeof (int)); - vfree (T->idist); T->idist=vcalloc (S->nseq, sizeof (int)); - vfree (T->ldist); T->ldist=vcalloc (S->nseq, sizeof (int)); - T->nseq=0; - - if ( T->isseq) - { - - int i; - i=name_is_in_list (T->name, S->name, S->nseq, -1); - - if (i!=-1) - { - T->lseq[T->nseq++]=i; - T->lseq2[i]=1; - T->idist[i]=1; - T->ldist[i]=(int)(T->dist*10000);; - } - else - { - printf_exit ( EXIT_FAILURE, stderr, "\nERROR: Sequence %s is in the Tree but Not in the Sequence dataset [code_lseq][FATAL:%s]", T->name, PROGRAM); - } - - } - else - { - NT_node R,L; - int a; - - R=recode_tree (T->left, S); - - L=recode_tree (T->right, S); - - if (R) - for (a=0; anseq; a++) - { - T->lseq2[R->lseq[a]]=1; - } - - if (L)for (a=0; anseq; a++) - { - T->lseq2[L->lseq[a]]=1; - } - - for (a=0; anseq; a++) - { - //don't count the root - int d; - - if ( !(T->parent) || !(T->parent)->parent)d=0; - else if ( T->dist==0)d=0; - else d=1; - - if (T->lseq2[a])T->lseq[T->nseq++]=a; - if (T->lseq2[a])T->idist[a]=(!R)?0:(R->idist[a]+((!L)?0:L->idist[a])+d); - if (T->lseq2[a])T->ldist[a]=(!R)?0:R->ldist[a]+((!L)?0:L->ldist[a])+(int)(T->dist*10000); - - } - } - return T; -} -int tree2split_list (NT_node T, int ns,int **sl, int* n) -{ - if (!T) return 0; - if (!sl) return 0; - - tree2split_list (T->right, ns, sl, n); - tree2split_list (T->left , ns, sl, n); - - if (!T->right) return 1; - else if (T->parent && !(T->parent)->parent)return 1; - else if ( T->dist==0)return 1; - else - { - int t=0,t2=0, c=0, a; - - for (a=0; a< ns; a++) - { - t2+=(a+1)*T->lseq2[a]; - t+=T->lseq2[a]; - } - - if (t2==0) HERE ("0"); - c=(t>(ns-t))?1:0; - sl[n[0]][ns]=t2;//Hash value for quick comparison; - - for (a=0; a< ns; a++)sl[n[0]][a]=(c==0)?T->lseq2[a]:(1-T->lseq2[a]); - n[0]++; - - } - return 1; -} - -NT_node display_splits (NT_node T,Sequence *S, FILE *fp) -{ - int a; - if (!T) return T; - - if (!S)S=tree2seq (T,NULL); - - display_splits (T->right,S, fp); - display_splits (T->left, S, fp); - - - - if (!T->right); - else if (T->parent && !(T->parent)->parent); - else - { - int t=0; - for (a=0; a< S->nseq; a++) - { - fprintf (fp, "%d", T->lseq2[a]); - t+=T->lseq2[a]; - } - - fprintf ( fp, " %5d \n", MIN(t,((S->nseq)-t))); - } - return T; -} -NT_node display_leaf_nb (NT_node T, int n, FILE *fp, char * name) -{ - int a; - if (!T) return T; - - - display_leaf_nb (T->right, n, fp, name); - display_leaf_nb (T->left, n, fp, name); - - - if (!T->isseq); - else - { - NT_node P; - - P=T->parent; - fprintf (fp, "%s ", T->name); - for (a=0; a< n; a++)fprintf (fp, "%d", P->lseq2[a]); - fprintf ( fp," %s\n", name); - } - return T; -} -static int root4dc; -NT_node display_code (NT_node T, int n, FILE *fp) -{ - int a, debug=0, t=0; - if (!T) return T; - - if (!T->parent) - root4dc=0; - - - - if (!T->parent && debug) fprintf ( fp, "\nDISPLAY TREE: START"); - display_code (T->right, n, fp); - display_code (T->left, n, fp); - - fprintf ( fp, "\n"); - if (!T->parent) return T; - else if ( !(T->parent)->parent && root4dc==1)return T; - else if ( !(T->parent)->parent && root4dc==0)root4dc=1; - - for (a=0; a< n; a++) - t+=T->lseq2[a]; - if ( t<=n/2) - for (a=0; a< n; a++)fprintf (fp, "%d", T->lseq2[a]); - else - for (a=0; a< n; a++)fprintf (fp, "%d", 1-T->lseq2[a]); - if (T->isseq && debug)fprintf (fp, "%s", T->name); - - if (!T->parent && debug) fprintf (fp, "\nDISPLAY TREE: FINISHED"); - return T; -} -NT_node display_dist (NT_node T, int n, FILE *fp) -{ - int a; - if (!T) return T; - - if (!T->parent) - root4dc=0; - - display_dist (T->right, n, fp); - display_dist (T->left, n, fp); - - fprintf ( stdout, "\n"); - for ( a=0; a< n; a++) - fprintf ( stdout, " %2d ", T->idist[a]); - fprintf ( stdout, "\n"); - - return T; -} - -NT_node check_tree (NT_node T) -{ - if (T) HERE("CHECK %s", T->name); - if (!T) - { - HERE ("ERROR: Empty Group"); - } - - else if (T->isseq)return T; - else - { - HERE ("R"); - check_tree (T->right); - HERE ("L"); - check_tree (T->left); - return NULL; - } - return 0;} - -NT_node new_reroot_tree( NT_node T) -{ - T=unroot_tree (T); - return T; -} -NT_node new_get_node (NT_node T, FILE *fp) -{ - NT_node NN; - int c; - static int n; - - c=fgetc (fp); - if (!T)T=declare_tree_node (100); - - - if ( c==';') - { - - if (!T->right)T=T->left; - else if (!T->left)T=T->right; - vfree (T->parent);T->parent=NULL; - return T; - } - else if ( c==')') - { - --n; - scan_name_and_dist (fp, T->name, &T->dist); - return new_get_node (T->parent, fp); - } - else if ( c==',') - { - return new_get_node (T, fp); - } - else - { - NN=new_insert_node (T); - - if ( c=='(') - { - ++n; - return new_get_node (NN, fp); - } - else - { - ungetc (c, fp); - scan_name_and_dist (fp, NN->name, &NN->dist); - - NN->leaf=1; - NN->isseq=1; - return new_get_node (T, fp); - } - } -} -int scan_name_and_dist ( FILE *fp, char *name, float *dist) -{ - int a, c; - char number [1000]; - - a=0; - c=fgetc (fp);ungetc (c, fp); - - - if ( c==';')return 0; - - while ((c=fgetc(fp))!=':' && c!=EOF && c!=')' && c!=';' && c!=',') - { - name[a++]=c; - } - name [a]='\0'; - - if ( c!=':') - { - ungetc (c, fp); - dist[0]=FLT_MIN; - return 1; - } - a=0; - while (isdigit((c=fgetc(fp))) || c=='.' || c=='-') - { - number[a++]=c; - } - - ungetc (c, fp); - number[a]='\0'; - - dist[0]=atof (number); - return 2; -} -NT_node new_insert_node (NT_node T) -{ - NT_node NN; - - - NN=new_declare_tree_node (); - NN->parent=T; - if (!T) - { - return NN; - } - else if (T->left==NULL) - { - T->left=NN; - - } - else if ( T->right==NULL) - { - T->right=NN; - } - else - { - NT_node NN2; - NN2=new_declare_tree_node (); - NN2->left=T->left; - NN2->right=T->right; - NN2->parent=T; - - T->left=NN2; - T->right=NN; - } - - /* - - else - { - NN->right=T->right; - (T->right)->parent=NN; - - NN->parent=T; - T->right=NN; - NN->left=new_declare_tree_node (); - (NN->left)->parent=NN; - return NN->left; - } - */ - /* - This caused a crash when internal undefined nodes, removed 19/02/08 - else - { - NT_node P; - NN->right=T; - P=NN->parent=T->parent; - T->parent=NN; - - if (P && P->right==T)P->right=NN; - else if ( P && P->left==T)P->left=NN; - - NN->left=new_declare_tree_node (); - (NN->left)->parent=NN; - return NN->left; - } - */ - return NN; -} - -NT_node new_declare_tree_node () -{ - NT_node p; - static int node_index; - p= (NT_node)vcalloc (1, sizeof ( Treenode)); - p->left = NULL; - p->right = NULL; - p->parent = NULL; - p->dist = 0.0; - p->leaf = 0; - p->order = 0; - p->index=++node_index; - p->maxnseq=1000; - p->name=(char*)vcalloc (MAXNAMES+1,sizeof (char)); - p->name[0]='\0'; - return p; - - } -int new_display_tree (NT_node T, int n) -{ - int in; - - in=n; - - - if ( T->parent)fprintf (stdout, "\nNode %d: has parents)", in); - else fprintf (stdout, "\nNode %d: NO parents)", in); - - if ( T->right) - { - fprintf (stdout, "\nNode %d has Right Child", in); - n=new_display_tree (T->right, n+1); - } - else fprintf ( stdout, "\nNode %d No Right\n", in); - - if ( T->left) - { - fprintf (stdout, "\nNode %d has Left Child", in); - n=new_display_tree (T->left, n+1); - } - else fprintf ( stdout, "\nNode %d No Left\n", in); - - if ( T->bot) - { - fprintf (stdout, "\nNode %d has Bot Child", in); - n=new_display_tree (T->bot, n+1); - } - else fprintf ( stdout, "\nNode %d No Bot\n", in); - - - if (T->isseq) - { - fprintf (stdout, "\nNode %d is %s", in, T->name); - return in; - } - else return 0;} -int display_tree_duplicates (NT_node T) -{ - static Sequence *S; - static int *dup; - int a, b; - - free_sequence (S, -1); - vfree (dup); - - S=tree2seq (T, NULL); - dup=vcalloc ( S->nseq, sizeof (int)); - - for (a=0; a< S->nseq-1; a++) - for ( b=a+1; bnseq; b++) - { - if ( strm (S->name[a], S->name[b])) - { - dup[a]++; - } - } - for (a=0; a< S->nseq-1; a++) - for ( b=a+1; bnseq; b++) - { - if ( strm (S->name[a], S->name[b]) && dup[a]) - { - fprintf ( stderr, "\nSequence %s is duplicated %d Times in the tree", S->name[a], dup[a]); - dup[a]=0; - } - } - return 0; -} -int tree_contains_duplicates (NT_node T) -{ - static Sequence *S; - int a, b; - - free_sequence (S, -1); - - S=tree2seq (T, NULL); - for (a=0; a< S->nseq-1; a++) - for ( b=a+1; bnseq; b++) - { - if ( strm (S->name[a], S->name[b]))return 1; - } - return 0; -} - -float display_avg_bootstrap ( NT_node T) -{ - float tot; - int n; - - tot=tree2tot_dist (T, BOOTSTRAP); - n=tree2n_branches (T, BOOTSTRAP); - fprintf ( stdout, "\nAVERAGE BOOTSRAP: %.3f on %d Branches\n", (n>0)?tot/n:0, n); - return (n>0)?tot/n:0; -} - - -int tree2n_branches(NT_node T, int mode) -{ - int n=0; - - if (!T) return 0; - if (!T->parent); - else if ((T->isseq && mode !=BOOTSTRAP) || !T->isseq) - { - n++; - } - n+=tree2n_branches(T->right, mode); - n+=tree2n_branches(T->left, mode); - - return n; -} - -float tree2tot_dist ( NT_node T, int mode) -{ - float t=0; - - - if ( !T)return 0; - - if ( !T->parent); - else if ((T->isseq && mode !=BOOTSTRAP) || !T->isseq) - { - t+=T->dist; - } - - t+=tree2tot_dist(T->right, mode); - t+=tree2tot_dist(T->left, mode); - return t; -} - -//This function displays all the sequences within the tree sorted by node label -int cmp_tree_array ( const void *vp, const void *vq); -int node_sort ( char *name, NT_node T) -{ - NT_node N; - int nseq; - int **array, a; - Sequence *S; - while (T->parent)T=T->parent; - - nseq=tree2nseq (T); - array=declare_int (nseq, 2); - N=tree2node (name, T); - - if (N==NULL)printf_exit (EXIT_FAILURE, stderr, "ERROR: %s is not in the tree [FATAL:%s]\n", name, PROGRAM); - array=display_tree_from_node (N,0,0, array); - qsort ( array, nseq, sizeof (int**), cmp_tree_array); - S=tree2seq(T, NULL); - for (a=0; a%s %d %d\n", S->name[array[a][0]], array[a][1], array[a][2]); - exit (EXIT_SUCCESS); -} - -NT_node tree2root ( NT_node R) -{ - if (R)while (R->parent)R=R->parent; - return R; -} - -NT_node tree2node (char *name, NT_node T) -{ - NT_node T1, T2; - if ( !T) return T; - else if (T->leaf && strm (T->name, name)) return T; - else - { - - T1=tree2node ( name, T->right); - T2=tree2node ( name, T->left); - return (T1>T2)?T1:T2; - } - -} -NT_node * tree2node_list (NT_node T, NT_node *L) -{ - if (!T) return NULL; - if (!L) L=vcalloc (T->node+1, sizeof (NT_node)); - - tree2node_list (T->left, L); - tree2node_list (T->right, L); - L[T->index]=T; - return L; -} - - - -int ** display_tree_from_node (NT_node T, int up, int down, int **array) -{ - - if (!T || T->visited)return array; - - T->visited=1; - if (T->isseq) - { - array[T->lseq[0]][0]=T->lseq[0]; - array[T->lseq[0]][1]=up; - array[T->lseq[0]][2]=down; - - } - else - { - array=display_tree_from_node ( T->left ,up, down+1, array); - array=display_tree_from_node ( T->right,up, down+1, array); - } - array=display_tree_from_node ( T->parent,up+1, 0, array); - T->visited=0; - return array; - -} - -int cmp_tree_array ( const void *p, const void *q) -{ - const int **vp=(const int**)p; - const int **vq=(const int**)q; - if (vp[0][1]>vq[0][1])return 1; - else if ( vp[0][1]vq[0][2]) return 1; - else if ( vp[0][2]nseq+1, sizeof (NT_node)); - - for ( a=0; anseq; a++) - { - char *fname; - if (S->seq && S->seq[a] && strlen (S->seq[a])<2) - fname=S->name[a]; - else - fname=string2file (S->seq[a], NULL, "w"); - - T[a]=main_read_tree (fname); - T[a]->file=vcalloc (strlen (S->name[a])+1, sizeof (char)); - sprintf (T[a]->file, "%s", S->name[a]); - } - return T; -} - -int treelist2dmat ( Sequence *S) -{ - NT_node *T; - int n=0, a, b; - float v; - Sequence *TS; - - - - n=S->nseq; - T=read_tree_list (S); - TS=tree2seq(T[0], NULL); - fprintf (stdout, "\n%d", S->nseq); - for (a=0; aname[a]); - for ( b=0; bnseq*10, sizeof (char)); - ref_group=vcalloc (TS->nseq*10, sizeof (char)); - list=vcalloc (100*S->nseq, sizeof (char)); - split_file=vtmpnam (NULL); - sorted_split_file =vtmpnam (NULL); - - n=S->nseq; - used=vcalloc (n, sizeof (int)); - - T=read_tree_list (S); - if (!TS)TS=tree2seq(T[0], NULL); - nseq=TS->nseq; - fp=vfopen (split_file, "w"); - - for ( a=0; a< S->nseq; a++) - { - - T[a]=prune_tree (T[a], TS); - T[a]=recode_tree (T[a], TS); - display_leaf_nb (T[a], TS->nseq,fp, S->name[a]); - } - vfclose (fp); - - - for (s=0; s< TS->nseq; s++) - { - int i; - - - if (taxon && !(strm (taxon, TS->name[s]) ))continue; - else - printf_system ( "cat %s | grep %s| sort > %s", split_file,TS->name[s], sorted_split_file); - - vfopen (sorted_split_file, "r"); - ref_group[0]=group[0]='\0'; - - while ( (c=fgetc (fp))!=EOF) - { - - ungetc (c, fp); - buf=vfgets (buf, fp); - sscanf (buf, "%s %s %s\n", name, group, fname); - - if ( !ref_group[0]|| !strm (group, ref_group)) - { - if (ref_group[0]) - - {fprintf (stdout, "%s %6.2f %s",name, (((float)n*100)/(float)S->nseq), ref_group); - for (i=0,a=0; aname[a]); - fprintf ( stdout, "\n"); - fprintf (stdout, "\nLIST: %s\n", list); - } - list[0]='\0'; - sprintf ( ref_group, "%s", group); - list=strcatf (list, " %s", fname); - n=1; - } - else - { - - list=strcatf (list, " %s", fname); - n++; - } - } - - fprintf (stdout, "%s %6.2f %s",name, (((float)n*100)/(float)S->nseq), group); - for (i=0,a=0; aname[a]); - fprintf (stdout, "\nLIST %s\n", list); - fprintf ( stdout, "\n"); - vfclose (fp); - } - - exit (0); -} -int count_tree_groups( Sequence *LIST, char *group_file) -{ - NT_node *T; - Sequence *S; - int a, b, c,n, w, wo, ng=0; - int **list, ***rlist, **blist; - char ***l; - int *gs; - - - - T=read_tree_list (LIST); - S=tree2seq(T[0], NULL); - for ( a=0; a< LIST->nseq; a++) - { - T[a]=prune_tree (T[a], S); - T[a]=recode_tree (T[a], S); - } - - - - gs=vcalloc (2, sizeof (int)); - list=declare_int (LIST->nseq*S->nseq*2, S->nseq+1); - - blist=declare_int (2, S->nseq+1); - for ( n=0, a=0; a< LIST->nseq; a++) - { - int n2=0; - tree2split_list (T[a], S->nseq, list+n, &n2); - n+=n2; - - - for (b=0; bnseq; c++) - list[n+b][c]=1-list[n-n2+b][c]; - } - n+=n2; - } - - if ( group_file) - { - rlist=declare_arrayN(3, sizeof (int), 2,LIST->nseq*S->nseq, S->nseq+1); - l=file2list (group_file, " "); - - while (l[ng]) - { - int i, b, g; - if (!strstr (l[ng][1], "group")){ng++;continue;} - g=(strm (l[ng][1], "group2"))?0:1; - - for (b=2; bname, S->nseq, 100))!=-1)rlist[g][gs[g]][i]=1; - } - gs[g]++; - ng++; - } - } - else - { - rlist=vcalloc ( 2, sizeof (int**)); - rlist[1]=count_int_strings (list, n, S->nseq); - gs[1]=read_array_size_new (rlist[1]); - - rlist[0]=declare_int (S->nseq, S->nseq); - gs[0]=S->nseq; - for ( a=0; anseq; a++)rlist[0][a][a]=1; - } - - - for (wo=w=0,a=0; anseq; b++) - { - blist[0][b]=blist[1][b]=rlist[1][c][b]; - blist[0][b]=(rlist[0][a][b]==1)?1:blist[0][b]; //WITH GROUP 1 - blist[1][b]=(rlist[0][a][b]==1)?0:blist[1][b]; //wiTHOUT gROUP 1 - - } - for (b=0; bnseq)==0)?1:0; - w+=x1; - x2=(memcmp (blist[1], list[b], sizeof (int)*S->nseq)==0)?1:0; - wo+=x2; - - } - fprintf ( stdout, "\n%d ", MIN(wo, w)); - fprintf ( stdout, "("); - for (b=0; bnseq; b++)if (rlist[1][c][b])fprintf ( stdout, "%s ",S->name[b]); - fprintf ( stdout, ") +/- ("); - for (b=0; bnseq; b++)if (rlist[0][a][b])fprintf ( stdout, "%s ",S->name[b]); - - fprintf (stdout , ") + %d - %d Delta %d", w, wo, FABS((wo-w))); - } - } - exit (0); -} -Split * print_split ( int n, int **list, Sequence *LIST, Sequence *S, char *buf, char *file); - -NT_node split2tree ( NT_node RT,Sequence *LIST, char *param) -{ - Split **S; - Alignment *A; - S=count_splits (RT, LIST, param); - A=seq2aln ((S[0])->S,NULL, KEEP_GAP); - - return split2upgma_tree (S,A, A->nseq, "no"); -} - -Split** count_splits( NT_node RT,Sequence *LIST, char *param) -{ - NT_node *T, OrderT; - Sequence *S=NULL; - int a, b, c, d, n1, n2; - int **list1, **list2; - Split **SL; - int nb, tlist; - char *main_buf; - char *in=NULL,*in2=NULL, *out=NULL, order[100], filter[100]; - FILE *fp, *fp2; - char *def_param; - char *cache=NULL; - //+count_splits _NB_x_FILTER_ - //_ - if (!def_param)def_param=vcalloc ( 10, sizeof (char)); - - - - if (!param)param=def_param; - - - strget_param (param, "_NB_", "0", "%d", &nb); - strget_param (param, "_TLIST_", "0", "%d", &tlist); - strget_param (param, "_ORDER_", "NO", "%s", order); - strget_param (param, "_FILTER_", "NO", "%s", filter); - - fprintf ( stderr, "\nREAD TREE LIST [%d Trees...", LIST->nseq); - T=read_tree_list (LIST); - fprintf ( stderr, "..]"); - - if ( !(strm (order, "NO"))) - { - if (is_newick (order)) - { - OrderT=main_read_tree (order); - } - else - { - S=main_read_seq (order); - } - } - else - { - OrderT=(RT)?RT:T[0]; - } - fprintf ( stderr, "\nTrees Ordered according to: %s", (strm (order, "NO"))?"First Tree":order); - - - if (!S)S=tree2seq(OrderT, NULL); - - for (a=0; anseq; a++) - { - fprintf ( stdout, "\n#ORDER %15s : %3d", S->name[a], a+1); - } - if ( !strm (filter, "NO")) - { - Sequence *F; - int i; - - F=main_read_seq (filter); - cache=vcalloc (S->nseq, sizeof (int)); - for ( a=0; anseq; a++) - { - if ( (i=name_is_in_list (F->name[a], S->name, S->nseq, 100))!=-1) - cache[i]=1; - } - free_sequence (F, -1); - } - - main_buf=vcalloc ( S->nseq*(STRING+1), sizeof(int)); - - list1=declare_int (S->nseq*3, S->nseq+1); - list2=declare_int (S->nseq*3, S->nseq+1); - - for ( a=0; a< LIST->nseq; a++) - { - T[a]=prune_tree (T[a], S); - T[a]=recode_tree (T[a], S); - } - - - - if (!RT) - { - char *buf; - int i,nl; - - in=vtmpnam (NULL);in2=vtmpnam(NULL); out=vtmpnam (NULL); - - fp=vfopen (in, "w"); - fp2=vfopen (in2, "w"); - for ( a=0; a< LIST->nseq; a++) - { - n2=0; - tree2split_list (T[a], S->nseq, list2, &n2); - for ( b=0; bnseq; c++) - {fprintf (fp, "%d", list2[b][c]);} - fprintf (fp, "\n"); - for (c=0; c< S->nseq; c++) - {fprintf (fp, "%d", 1-list2[b][c]);} - fprintf (fp, "\n"); - - for (c=0; c< S->nseq; c++) - {fprintf (fp2, "%d", list2[b][c]);} - fprintf (fp2, " "); - for (c=0; c< S->nseq; c++) - {fprintf (fp2, "%d", 1-list2[b][c]);} - fprintf (fp2, " %s\n",LIST->name[a]); - } - } - vfclose (fp2); - vfclose (fp); - - count_strings_in_file (in, out); - nl=count_n_line_in_file(out); - list1=declare_int (nl+1, S->nseq+2); - - fp=vfopen (out, "r"); - n1=0; - buf=vcalloc (measure_longest_line_in_file (out)+1, sizeof (char)); - while ( fscanf (fp, "%s %d",buf, &i)==2) - { - for (a=0; anseq; a++)list1[n1][a]=buf[a]-'0'; - list1[n1++][S->nseq+1]=i; - } - vfclose (fp); - vfree (buf); - } - else - { - - RT=prune_tree (RT, S); - RT=recode_tree (RT, S); - n1=0; - tree2split_list (RT, S->nseq, list1,&n1); - for ( a=0; a< LIST->nseq; a++) - { - n2=0; - tree2split_list (T[a], S->nseq, list2, &n2); - for (b=0; bnseq; d++) - { - if (list1[b][d]!=list2[c][d])di++; - } - list1[b][S->nseq+1]+=(di==0 || di== S->nseq)?1:0; - } - - } - } - } - SL=vcalloc ( n1+1, sizeof (Split*)); - - for (a=0; anseq; b++)if (cache[b]!=list1[a][b])cont=0; - if (!cont) continue; - - SL[a]=print_split (a, list1, LIST, S, main_buf, (tlist==1)?in2:NULL); - for (b=0; bnseq; c++) - { - s1+=list1[b][c]; - s2+=list1[a][c]; - d+=(list1[a][c]!=list1[b][c])?1:0; - } - - } - if (d<=nb &&((s1==s2)|| ((S->nseq-s1)==s2)))print_split (b, list1, LIST, S, main_buf, (tlist==1)?in2:NULL); - } - } - a=0; - vfree (cache); - return SL; -} -Split * declare_split (int nseq, int ntrees); -Split* print_split ( int a, int **list1, Sequence *LIST, Sequence *S, char *buf, char *split_file) - { - int f1,t,b; - Split *SP=NULL; - - - - SP=declare_split (S->nseq, LIST->nseq); - - fprintf ( stdout, "\n>"); - for (t=0,b=0; bnseq; b++){fprintf ( stdout, "%d", list1[a][b]);t+=list1[a][b];SP->split[b]='0'+list1[a][b];} - fprintf ( stdout, " NumberSplit %5d SplitSize %5d Score %5.2f %s ", list1[a][S->nseq+1],t, (float)(list1[a][S->nseq+1]*100)/LIST->nseq, (buf)?buf:""); - SP->n= list1[a][S->nseq+1]; - SP->score=(float)(list1[a][S->nseq+1]*100)/LIST->nseq; - SP->S=S; - - for (f1=1,b=0; b< S->nseq; b++) - { - - if (list1[a][b]) - { - if (f1==1)fprintf ( stdout, "("); - else fprintf (stdout, ","); - f1=0; - fprintf ( stdout, "%s", S->name [b]); - } - } - fprintf ( stdout, ")"); - if (split_file) - { - char *buf=NULL; - FILE *fp; - - char c; - fp=vfopen (split_file, "r"); - while ( (c=fgetc(fp))!=EOF) - { - - c=ungetc (c, fp); - buf=vfgets (buf, fp); - if ( strstr (buf, SP->split)) - { - char **list; - list=string2list (buf); - fprintf ( stdout, "\n\t%s %s", SP->split, list[3]); - free_char (list, -1); - } - } - vfclose (fp); - } - - return SP; - } -Split * declare_split (int nseq, int ntrees) -{ - Split *S; - S=vcalloc (1, sizeof (Split)); - S->split=vcalloc ( nseq+1, sizeof (char)); - return S; -} -int treelist2splits( Sequence *S, Sequence *TS) -{ - NT_node *T; - int n=0,nseq, a, c; - - int *used; - - char *split_file, *sorted_split_file; - char *buf=NULL, *ref_buf=NULL; - FILE *fp; - - split_file=vtmpnam (NULL); - sorted_split_file =vtmpnam (NULL); - - n=S->nseq; - used=vcalloc (n, sizeof (int)); - - T=read_tree_list (S); - if (!TS)TS=tree2seq(T[0], NULL); - nseq=TS->nseq; - fp=vfopen (split_file, "w"); - - - for ( a=0; a< S->nseq; a++) - { - - T[a]=prune_tree (T[a], TS); - T[a]=recode_tree (T[a], TS); - display_splits (T[a], TS,fp); - } - - vfclose (fp); - printf_system ("cp %s split_file", split_file); - - printf_system ( "cat %s | grep 1| sort > %s", split_file, sorted_split_file); - - fp=vfopen (sorted_split_file, "r"); - fprintf (stdout, "LEGEND: <#occurences> <(group1,)> <(group2,>\n"); - - for ( a=0; anseq; a++)fprintf ( stdout, "SEQ_INDEX %d %s\n", a+1, TS->name[a]); - while ( (c=fgetc (fp))!=EOF) - { - - ungetc (c, fp); - buf=vfgets (buf, fp); - buf [strlen(buf)-1]='\0'; - - if ( ref_buf==NULL) - { - ref_buf=vcalloc (strlen (buf)+1, sizeof (char)); - sprintf ( ref_buf, "%s", buf); - n=1; - } - else if ( !strm (buf, ref_buf)) - { - int i; - fprintf ( stdout, "SPLIT_COUNT %3d %s (", n, ref_buf); - for (i=0,a=0; aname[a]); - i=1; - } - fprintf ( stdout, "),("); - for (i=0,a=0; aname[a]); - i=1; - } - - fprintf (stdout, ")\n"); - sprintf ( ref_buf, "%s", buf); - n=1; - } - else - { - n++; - } - } - vfclose (fp); - - - exit (0); -} - -int treelist2splits_old ( Sequence *S, Sequence *TS) -{ - NT_node *T; - int n=0,nseq, a,c; - - int *used; - - char *split_file, *sorted_split_file; - char *buf=NULL, *ref_buf=NULL; - FILE *fp; - - split_file=vtmpnam (NULL); - sorted_split_file =vtmpnam (NULL); - - n=S->nseq; - used=vcalloc (n, sizeof (int)); - - T=read_tree_list (S); - if (!TS)TS=tree2seq(T[0], NULL); - nseq=TS->nseq; - fp=vfopen (split_file, "w"); - - for ( a=0; a< S->nseq; a++) - { - - T[a]=prune_tree (T[a], TS); - T[a]=recode_tree (T[a], TS); - display_leaf_nb (T[a], TS->nseq,fp, S->name[a]); - } - vfclose (fp); - printf_system ("cp %s split_file", split_file);exit (0); - - printf_system ( "cat %s | grep 1| sort > %s", split_file, sorted_split_file); - - vfopen (sorted_split_file, "r"); - - while ( (c=fgetc (fp))!=EOF) - { - - ungetc (c, fp); - buf=vfgets (buf, fp); - buf [strlen(buf)-1]='\0'; - - if ( ref_buf==NULL) - { - ref_buf=vcalloc (strlen (buf)+1, sizeof (char)); - sprintf ( ref_buf, "%s", buf); - n=1; - } - else if ( !strm (buf, ref_buf)) - { - int i; - fprintf ( stdout, "%3d %s(", n, ref_buf); - for (i=0,a=0; aname[a]); - i=1; - } - fprintf ( stdout, "),("); - for (i=0,a=0; aname[a]); - i=1; - } - - fprintf (stdout, ")\n"); - sprintf ( ref_buf, "%s", buf); - n=1; - } - else - { - n++; - } - } - vfclose (fp); - - - exit (0); -} - -NT_node *treelist2prune_treelist (Sequence *S, Sequence *TS, FILE *out) -{ - NT_node *T; - int a, b, c; - - T=read_tree_list (S); - T=vrealloc (T, (S->nseq+1)*sizeof (NT_node)); - for (b=0,a=0; anseq; a++) - { - T[a]=prune_tree (T[a], TS); - if (tree2nleaf(T[a])nseq) - { - ; - } - else - { - char *s; - T[b]=T[a]; - T[b]=recode_tree (T[b], TS); - sprintf ( S->name[b], "%s", S->name[a]); - s=tree2string (T[a]); - S->seq[b]=vrealloc (S->seq[b], (strlen (s)+1)*sizeof (char)); - sprintf (S->seq[b], "%s",s); - sprintf (S->seq_comment[b], " NSPECIES: %d", TS->nseq); - vfree (s); - - b++; - } - - } - - S->nseq=b; - T[S->nseq]=NULL; - - if (out) - { - for (a=0; anseq; a++) - { - print_tree (T[a], "newick", out); - } - } - return T; -} -int** treelist2lti2 ( Sequence *S, Sequence *TS, int ngb, FILE *out); -int treelist2frame (Sequence *S, Sequence *TS) -{ - int n, a, b, c,d, **r, **order; - Sequence *temp; - - temp=duplicate_sequence (S); - order= treelist2lti (temp, TS,0,stdout); - - TS=reorder_seq_2 (TS, order, 0, TS->nseq); - n=TS->nseq; - - for (a=3; anseq=a+1; - temp=duplicate_sequence (S); - r=treelist2groups (temp,TS, NULL, NULL); - fprintf ( stdout, "\n>Tree_%d [%d %%]\n ", a+1,r[0][1]); - tree=main_read_tree (temp->name[r[0][0]]); - tree=prune_tree (tree, TS); - print_tree (tree, "newick",stdout); - - free_int (r, -1); - free_sequence (temp,-1); - } - exit (EXIT_SUCCESS); -} -int** treelist2lti2 ( Sequence *S, Sequence *TS, int ngb, FILE *out) -{ - NT_node *T; - int a,b, c, d, ****dist, i; - int **score, **order; - - score=declare_int (TS->nseq, 3); - order=declare_int (TS->nseq, 2); - vsrand (0); - - for (a=0; a<50; a++) - { - Sequence *seq, *trees; - int **r; - trees=duplicate_sequence (S); - seq=duplicate_sequence (TS); - for (b=0; bnseq; b++){order[b][0]=b;order[b][1]=rand()%10000;} - sort_int (order, 2, 1, 0, TS->nseq-1); - seq=reorder_seq_2(seq, order, 0,5); - r=treelist2groups (trees,seq, NULL, NULL); - - for (b=0; b<5; b++) - { - score[order[b][0]][1]+=r[0][1]; - score[order[b][0]][2]++; - } - HERE ("Score=%d", r[0][1]); - free_int (r, -1); - free_sequence (seq, -1); - free_sequence (trees, -1); - - } - - for ( a=0; a< TS->nseq; a++) - { - score[a][0]=a; - HERE ("%s => %d [%d]",TS->name[a], score[a][1]/score[a][2], score[a][2]); - score[a][1]/=(score[a][2])?score[a][2]:1; - } - sort_int_inv (score, 3, 1, 0, TS->nseq-1); - - return score; -} - - -int** treelist2lti ( Sequence *S, Sequence *TS, int ngb, FILE *out) -{ - NT_node *T; - int a,b, c, d, ****dist, i; - float score0=0, score1=0; - int **result; - - - i=S->nseq; - T=treelist2prune_treelist (S, TS,NULL); - - if (!ngb)ngb=TS->nseq*2; - dist=vcalloc ( S->nseq, sizeof (int****)); - result=declare_int (TS->nseq, 2); - for (a=0; anseq; a++) - { - float score_seq=0; - float n_seq=0; - for (b=0; bnseq;b++) - { - float score_pair=0; - float n_pair=0; - for (c=0; cnseq; c++) - { - if (!dist[c])dist[c]=tree2dist(T[c], TS, NULL); - for (d=0; dnseq; d++) - { - float score, d1, d2; - - if (!dist[d])dist[d]=tree2dist(T[d], TS, NULL); - d1=dist[c][0][a][b]; - d2=dist[d][0][a][b]; - score=FABS((d1-d2)); - if (d1>ngb || d2>ngb); - else - { - score_seq+=score; - score_pair+=score; - n_seq++; - n_pair++; - } - // if (d1 && d2) HERE ("%d %d", (int)d1, (int)d2); - } - } - score_pair=(score_pair*100)/(float)n_pair; - if (out)fprintf ( stdout, "\n>%-20s %-20s LTI: %7.3f [Kept %d Trees Out of %d] ", TS->name[a],TS->name[b], score_pair, S->nseq,i); - } - - score_seq=(score_seq*100)/n_seq; - result[a][0]=a; - result[a][1]=(int)(100*score_seq); - if (out)fprintf ( stdout, "\n>%-20s %-20s LTI: %7.3f [Kept %d Trees Out of %d] ", TS->name[a],"*", score_seq, S->nseq, i); - } - sort_int (result,2,1,0, TS->nseq-1); - return result; -} - - -int ***tree2dist (NT_node T, Sequence *S, int ***d) -{ - int *l0, *r0,*l1, *r1, a, b; - - - if (!T) return d; - if (!S)S=tree2seq(T, NULL); - if (!d) - { - d=declare_arrayN (3, sizeof (float),2, S->nseq, S->nseq); - T=prune_tree(T, S); - T=recode_tree (T, S); - } - - if (!T->left)return d; - if (!T->right) return d; - - l0=(T->left)->idist; - r0=(T->right)->idist; - - l1=(T->left)->ldist; - r1=(T->right)->ldist; - - - - for (a=0; a< S->nseq; a++) - for (b=0; bnseq; b++) - { - if (l0[a]>0 && r0[b]>0)d[0][a][b]=d[0][b][a]=l0[a]+r0[b]; - if (l0[a]>0 && r0[b]>0)d[1][a][b]=d[1][b][a]=l1[a]+r1[b]; - } - - d=tree2dist (T->left, S, d); - d=tree2dist (T->right, S, d); - - - return d; -} - - - -int **tree2dist_split ( NT_node T, Sequence *S, int **dist) -{ - - FILE *fp; - int a, b, c, n=0; - char *buf=NULL, **list=NULL, *split_file; - - - if (!S)S=tree2seq(T, NULL); - - T=prune_tree (T, S); - T=recode_tree (T, S); - - split_file=vtmpnam (NULL); - fp=vfopen (split_file, "w"); - display_code (T, S->nseq,fp); - vfclose (fp); - - list=declare_char (2*S->nseq, S->nseq+1); - fp=vfopen (split_file, "r"); - - while ((buf=vfgets (buf,fp))!=NULL) - { - if (buf[0]=='1' || buf[0]=='0')sprintf (list[n++], "%s", buf); - } - vfclose (fp); - dist=declare_int ( S->nseq, S->nseq); - for (a=0; a< S->nseq; a++) - for ( b=0; bnseq; b++) - for (c=0; cnseq; - T=treelist2prune_treelist (S, TS,NULL); - nsn=(star_node)?atoi(star_node):0; - - results=declare_int (S->nseq+1, 2); - - if (nsn) - { - for (a=0; a< S->nseq; a++)tree2star_nodes(T[a],nsn); - } - - used=vcalloc (S->nseq, sizeof (int)); - for (ntop=0,a=0; anseq; a++) - { - - if (used[a]==0) - { - ntop++; - if (out)fprintf ( out, "\nTree %s:",S->name[a]); - used[a]=1; - } - else continue; - tot=1; - for ( b=0; bnseq; b++) - { - v=0; - - v=(int)simple_tree_cmp (T[a], T[b], TS, 1); - if ( v==100) - { - used[b]=1; - used[a]++; - if (out)fprintf (stdout," %s ", S->name[b]); - tot++; - } - } - - if (out)fprintf ( stdout, "__ N=%d\n", tot-1); - } - - - for (n=0,a=0; anseq; a++) - { - if ( used[a]>1) - { - if (out)fprintf ( out, "\n>%-15s %4d %6.2f TOPOLOGY_LIST\n", S->name[a], used[a]-1, (float)(((float)used[a]-1)*100/(float)S->nseq)); - if (out)print_tree (T[a], "newick_tree", out); - results[n][0]=a; - results[n][1]=((used[a]-1)*100)/i; - n++; - } - } - - for (a=0; anseq; a++) free_tree(T[a]); - vfree (T); - - if (out)fprintf ( stdout, "\nTotal Number of different topologies: %d\n", ntop); - results[n][0]=-1; - sort_int_inv (results,2,1,0, n-1); - for (a=0; anseq; a++) free_tree(T[a]); - vfree (T); - return results; - } -float simple_tree_cmp (NT_node T1, NT_node T2,Sequence *S, int mode) -{ - Tree_sim *TS1, *TS2; - float t, w, l, n; - - TS1=vcalloc (1, sizeof (Tree_sim)); - TS2=vcalloc (1, sizeof (Tree_sim)); - - - T1=recode_tree(T1, S); - T2=recode_tree(T2, S); - - n=new_compare_trees ( T1, T2, S->nseq, TS1); - new_compare_trees ( T2, T1, S->nseq, TS2); - - - - t=(TS1->uw+TS2->uw)*100/(TS1->max_uw+TS2->max_uw); - w=(TS1->w+TS2->w)*100/(TS1->max_w+TS2->max_w); - l=(TS1->d+TS2->d)*100/(TS1->max_d+TS2->max_d); - - vfree (TS1); vfree (TS2); - if ( mode ==1)return t; - else if (mode ==2) return w; - else return l; -} -int treelist2n (NT_node *L) -{ - int n=0; - while (L[n])n++; - return n; -} -int **treelist2avg_treecmp (NT_node *L, char *file) -{ - int a, b, n; - int **score; - - if (file) L=read_tree_list (main_read_seq(file)); - n=treelist2n (L); - - score=declare_int (n, 2); - for (a=0; auw; - score[b][1]+=ts->uw; - vfree (ts); - } - } - sort_int_inv (score, 2, 1, 0, n-1); - if (file)free_treelist(L); - return score; -} -NT_node treelist2filtered_bootstrap ( NT_node *L,char *file, int **score, float t) -{ - NT_node BT, *L2; - int n,a; - - if (t==1 || t==0 || !score)return treelist2bootstrap (L, file); - - if (file)L=read_tree_list (main_read_seq(file)); - - n=treelist2n(L)*t; - - if (n==0) return NULL; - - L2=vcalloc ( n+1, sizeof (NT_node)); - for (a=0; a/dev/null 2>/dev/null", file, outfile); - - T=main_read_tree (outfile); - T=tree_dist2normalized_tree_dist (T,treelist2n(L)); - - - return T; -} - - - -Sequence * treelist2seq (Sequence *S) -{ - int a, b, c, n, i; - char **name; - NT_node *T; - Sequence *TS; - char *fname; - FILE *fp; - - name=vcalloc (1, sizeof (char*)); - fp=vfopen ((fname=vtmpnam (NULL)), "w"); - - T=read_tree_list (S); - for (n=0,a=0; a< S->nseq; a++) - { - TS=tree2seq(T[a], NULL); - for (b=0; bnseq; b++) - { - if ( (i=name_is_in_list (TS->name[b], name, n, 100))==-1) - { - name[n]=vcalloc (100, sizeof (int)); - sprintf ( name[n], "%s", TS->name[b]); - n++; - name=vrealloc (name, (n+1)*sizeof (char*)); - fprintf ( fp, ">%s\n", TS->name[b]); - } - } - free_sequence(TS, TS->nseq); - free_tree (T[a]); - } - - vfclose (fp); - vfree (T); - return get_fasta_sequence (fname, NULL); -} - - -Sequence * treelist2sub_seq ( Sequence *S, int f) -{ - NT_node *T; - int a,b,c, s, i, n, maxnseq, tot; - int **count, **grid; - char *fname; - Sequence *FS, *TS; - FILE *fp; - if (!f)return treelist2seq(S); - - - //keep as many taxons as possible so that f% of the trees are kept - //1: count the frequency of each taxon - - FS=treelist2seq (S); - maxnseq=FS->nseq; - - count=declare_int (maxnseq, 3); - grid=declare_int (S->nseq,maxnseq+1); - T=read_tree_list (S); - - - - for (a=0; anseq; a++){count[a][0]=a;count[a][2]=1;} - for (n=0,a=0; a< S->nseq; a++) - { - TS=tree2seq(T[a], NULL); - for (b=0; bnseq; b++) - { - i=name_is_in_list (TS->name[b], FS->name, FS->nseq, 100); - if ( i==-1){exit (EXIT_FAILURE);} - count[i][1]++; - grid[a][i]=1; - } - free_sequence(TS, TS->nseq); - free_tree (T[a]); - } - vfree (T); - sort_int ( count,3,1, 0, maxnseq-1); - - for (a=0; anseq; b++)grid[b][maxnseq]=1;//prepare to keep everything - for ( tot=S->nseq, b=0; b< S->nseq; b++) - { - for (c=0; cnseq; - if ( tot>=f)break; - } - if (tot%s LIMIT: %d %%\n", FS->name[count[a][0]], f); - - } - } - vfclose (fp); - free_int (grid, -1); free_int (count, -1); - free_sequence (FS, FS->nseq); - - return get_fasta_sequence (fname, NULL); -} -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util.c b/binaries/src/tcoffee/t_coffee_source/util.c deleted file mode 100644 index f6e90a1..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util.c +++ /dev/null @@ -1,7986 +0,0 @@ -#define FILE_CHECK 1 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "perl_header_lib.h" - -int my_vsscanf(char *buf, char *fmt, va_list parms); -static int get_vtmpnam2_root(); - -static char CACHE_4_TCOFFEE[1000]; -static char TMP_4_TCOFFEE[1000]; -static char DIR_4_TCOFFEE[1000]; -static int global_exit_signal; -static int no_error_report; -/*********************************************************************/ -/* */ -/* DICHOTOMY */ -/* */ -/* */ -/*********************************************************************/ -double dichotomy (double value, double target_value, double middle, double *bottom,double *top) -{ - if ( value> target_value)top[0]=middle; - else if ( value= 0; sp--) { - char *lb, *ub, *m; - char *P, *i, *j; - - lb = lbStack[sp]; - ub = ubStack[sp]; - - while (lb < ub) { - - /* select pivot and exchange with 1st element */ - offset = (ub - lb) >> 1; - P = lb + offset - offset % size; - exchange (lb, P, size); - - /* partition into two segments */ - i = lb + size; - j = ub; - while (1) { - while (i < j && compar(lb, i) > 0) i += size; - while (j >= i && compar(j, lb) > 0) j -= size; - if (i >= j) break; - exchange (i, j, size); - j -= size; - i += size; - } - - /* pivot belongs in A[j] */ - exchange (lb, j, size); - m = j; - - /* keep processing smallest segment, and stack largest */ - if (m - lb <= ub - m) { - if (m + size < ub) { - lbStack[sp] = m + size; - ubStack[sp++] = ub; - } - ub = m - size; - } else { - if (m - size > lb) { - lbStack[sp] = lb; - ubStack[sp++] = m - size; - } - lb = m + size; - } - } - } -} - -int pstrcmp(char *p1, char *p2); - - - -/*********************************************************************/ -/* */ -/* HEAPSORT */ -/* */ -/* */ -/*********************************************************************/ -FILE * hsort_file ( FILE *fp,int n,int len, size_t size,int first_comp_field, int n_comp_fields,int (*compare)(const void *, const void*,int, int, size_t),void * (*copy)(void *,void*, size_t)) - { - unsigned long i, ir, j, l; - void *rra, *rrb, *ra_j, *ra_j_1; - void *tp; - long start; - int FE=1; - - - - start=ftell(fp); - rra =vcalloc ( len, size); - rrb =vcalloc ( len, size); - ra_j =vcalloc ( len, size); - ra_j_1=vcalloc ( len, size); - - if ( n<2)return fp; - l=(n >>1)+1; - ir=n; - - for (;;) - { - if ( l>FE) - { - l--; - fseek( fp, start+(((l-1)*len)*size), SEEK_SET); - fread( rra, size, len,fp); /*rra=ra[--l]*/ - } - else - { - fseek( fp, start+((ir-1)*len*size), SEEK_SET); - fread( rra, size, len,fp); /*rra=ra[ir]*/ - - fseek( fp, start, SEEK_SET); - fread( rrb, size, len,fp); /*rrb=ra[0]*/ - - fseek( fp, start+((ir-1)*len*size), SEEK_SET); - fwrite(rrb,size, len, fp); /*ra[ir]=rrb=ra[0]*/ - - if (--ir ==FE) - { - fseek ( fp,start, SEEK_SET); - fwrite(rra,size, len, fp); /*ra[0]=rra*/ - break; - } - } - i=l; - j=l+l; - while ( j<=ir) - { - fseek ( fp, start+((j-1)*len*size), SEEK_SET); - fread (ra_j, size, len, fp); - - if ( j>1)+1; - ir=n; - - for (;;) - { - if ( l>FE) - { - copy ( rra, ra[--l],len); - } - else - { - copy ( rra, ra[ir],len); - copy ( ra[ir], ra[FE], len); - if (--ir ==FE) - { - copy ( ra[FE],rra,len); - break; - } - } - i=l; - j=l+l; - while ( j<=ir) - { - if ( j1) - { - i=(lower+upper) >> 1; - - fseek ( fp,start+(i*el_size*entry_len), SEEK_SET); - fread ( key2, el_size, entry_len,fp); - c=compare(key2,key, comp_first, comp_len,el_size); - - if ( c==0){p[0]=i;return key2;} - else if ( c< 0)upper=i; - else if ( c> 0)lower=i; - } - return NULL; - } - -void * bsearch_array ( const void *key,int *p,int comp_first, int comp_len,void**list,int len, int entry_len,size_t el_size, int (*compare)(const void *, const void*,int, int, size_t)) - { - int upper, lower, c, i; - void *key2; - - upper=-1; - lower=len; - while ((lower-upper)>1) - { - i=(lower+upper) >>1; - key2=list[i]; - c=compare(key2,key, comp_first,comp_len,el_size); - - if ( c==0){p[0]=i;return key2;} - else if ( c< 0)upper=i; - else if ( c> 0)lower=i; - } - return NULL; - } - -/**********************************************************************/ -/* */ -/* HSORT/BSEARCH WRAPPERS */ -/* */ -/* */ -/**********************************************************************/ -void **search_in_list_file ( void *key, int *p,int comp_len,FILE *fp, int len, size_t size, int entry_len) - { - static void **l; - - - if ( l==NULL)l=vcalloc ( 1, sizeof (int*)); - - l[0]=bsearch_file (key,p,0,comp_len,fp,len,entry_len,size,hsort_cmp); - if (l[0]==NULL)return NULL; - else return l; - } -void **search_in_list_array ( void *key,int *p, int comp_len,void **L, int len, size_t size, int entry_len) - { - static void **l; - - if ( l==NULL)l=vcalloc ( 1, sizeof (int*)); - - l[0]=bsearch_array (key,p,0,comp_len,L,len,entry_len,size,hsort_cmp); - if (l[0]==NULL)return NULL; - else return l; - } -void **hsort_list_array ( void **L, int len, size_t size, int entry_len, int first_comp_field, int n_comp_fields) - { - return hsort_array (L, len,entry_len, size,first_comp_field, n_comp_fields,hsort_cmp , hsort_cpy); - } -FILE *hsort_list_file ( FILE*fp , int len, size_t size, int entry_len, int first_comp_field, int n_comp_fields) - { - - return hsort_file (fp, len,entry_len, size,first_comp_field, n_comp_fields,hsort_cmp , hsort_cpy); - } - -int hsort_cmp ( const void *a, const void *b, int first, int clen, size_t size) - { - int*ax; - int*bx; - int p; - - ax=(int*)a; - bx=(int*)b; - for ( p=first; pb[0][c])return 1; - else if ( a[0][c]b[0][c])return 1; - else if ( a[0][c]=max_len[0]) - list=vrealloc ( list, (n[0]+100)*sizeof (int)); - max_len[0]=(n[0]+100); - - fscanf ( fp, "%d",&list[n[0]++]); - } - } - return fp; - } -/*********************************************************************/ -/* */ -/* QUANTILE */ -/* */ -/* */ -/*********************************************************************/ -int quantile (int argc, char *argv[]) -{ - FILE *fp; - int a,n,c, t; - int **list; - char *** string_list; - char *name, s1[1000], s2[1000]; - - - if ( argc<2) - { - - fprintf (stderr, "\nquantile []"); - fprintf (stderr, "\nSplits your data in two according to the quantile"); - fprintf (stderr, "\nReturns the top quantile or the bottom quantile"); - fprintf (stderr, "\nData must be in with two fields/line: Field1=index, Field2=value\n"); - fprintf (stderr, "\n1 27\n2 46\n3 5\n...\n"); - fprintf (stderr, "\nValue can either be integer or float"); - - - myexit (EXIT_FAILURE); - } - - if (strm (argv[1], "stdin")) - { - name=vtmpnam(NULL); - fp=vfopen (name, "w"); - while ( (c=fgetc(stdin))!=EOF) - { - fprintf ( fp, "%c", c); - } - vfclose (fp); - } - else - name=argv[1]; - - - - - n=count_n_line_in_file (name); - list=declare_int (n, 2); - string_list=declare_arrayN(3,sizeof (char), n, 2, 10); - - fp=vfopen (name, "r"); - n=0; - while ( (c=fgetc (fp))!=EOF) - { - ungetc(c,fp); - fscanf ( fp, "%s %s\n", s1, s2); - list[n][0]=(int)(atof(s1)*1000); - list[n][1]=(int)(atof(s2)*1000); - list[n][2]=n; - - sprintf (string_list[n][0],"%s",s1); - sprintf (string_list[n][1],"%s",s2); - - - n++; - } - sort_int_inv ( list,3, 1, 0, n-1); - t=quantile_rank ( list,1,n, atof (argv[2])); - if ( argc!=4 || (argc==4 && strm (argv[3], "bottom"))) - { - for (a=t; a=q)nr++; - vfree(l); - } - return nr; -} -/*********************************************************************/ -/* */ -/* DUPLICATION */ -/* */ -/* */ -/*********************************************************************/ -short* ga_memcpy_short ( short *array1, short *array2, int n) - { - int a; - - for ( a=0; a< n; a++) - array2[a]=array1[a]; - - return array2; - } -int * ga_memcpy_int ( int *array1, int *array2, int n) - { - int a; - - for ( a=0; a< n; a++) - array2[a]=array1[a]; - - return array2; - } - -float* ga_memcpy_float ( float *array1, float *array2, int n) - { - int a; - - for ( a=0; a< n; a++) - array2[a]=array1[a]; - - return array2; - } -double* ga_memcpy_double (double *array1, double*array2, int n) - { - int a; - - for ( a=0; a< n; a++) - array2[a]=array1[a]; - - return array2; - } - - - -/*recycle: get the bottom pointer on the top of the heap*/ - -void ** recycle (void **A, int l, int cycle) -{ - void **B; - int a,b,c; - B=vcalloc (l, sizeof (void*)); - - for ( c=0; c< cycle; c++) - { - for ( a=1, b=0; amax)max=index[i];\ - }\ - va_end(ap);\ - if (list==NULL)list=vcalloc ( max+1, sizeof (type));\ - for ( i=0; inseq; - rec_list=realloc_aln_array ( rec_list, end-first); - for ( a=first, b=rec_list_start; a, UNDEFINED_SHORT) -RETURN_MAX_COOR(char,write_size_char,read_size_char,return_max_coor_char,>, UNDEFINED_CHAR) -RETURN_MAX_COOR(int,write_size_int,read_size_int,return_max_coor_int,>, UNDEFINED_INT) -RETURN_MAX_COOR(float,write_size_float,read_size_float,return_max_coor_float,>, UNDEFINED_FLOAT) -RETURN_MAX_COOR(double,write_size_double,read_size_double,return_max_coor_double,>, UNDEFINED_DOUBLE) -RETURN_MAX_COOR(short,write_size_short,read_size_short,return_min_coor_short,<, UNDEFINED_SHORT) -RETURN_MAX_COOR(char,write_size_char,read_size_char,return_min_coor_char,<, UNDEFINED_CHAR) -RETURN_MAX_COOR(int,write_size_int,read_size_int,return_min_coor_int,<, UNDEFINED_INT) -RETURN_MAX_COOR(float,write_size_float,read_size_float,return_min_coor_float,<, UNDEFINED_FLOAT) -RETURN_MAX_COOR(double,write_size_double,read_size_double,return_min_coor_double,<, UNDEFINED_DOUBLE) -#define RETURN_MAX(type,wf,rf,function,comparison,undef)\ -type function ( type ** array, int len_array, int field)\ - {\ - type max;\ - int a;\ -\ - if (array==NULL || len_array==0)return 0;\ - else\ - {\ - if (len_array==-1)len_array=rf(array,sizeof (type*));\ - max=array[0][field];\ - for ( a=1; a< len_array; a++)\ - if ( max==undef)max=array[a][field];\ - else if ( array[a][field]!=undef)max=( array[a][field] comparison max)?array[a][field]:max;\ - }\ - return (max==undef)?0:max;\ - } - -RETURN_MAX(short,write_size_short,read_size_short,return_max_short,>,UNDEFINED_SHORT) -RETURN_MAX(char,write_size_char,read_size_char,return_max_char,>,UNDEFINED_CHAR) -RETURN_MAX(int,write_size_int,read_size_int,return_max_int,>,UNDEFINED_INT) -RETURN_MAX(float,write_size_float,read_size_float,return_max_float,>,UNDEFINED_FLOAT) -RETURN_MAX(double,write_size_double,read_size_double,return_max_double,>,UNDEFINED_DOUBLE) -RETURN_MAX(short,write_size_short,read_size_short,return_min_short,<,UNDEFINED_SHORT) -RETURN_MAX(char,write_size_char,read_size_char,return_min_char,<,UNDEFINED_CHAR) -RETURN_MAX(int,write_size_int,read_size_int,return_min_int,<,UNDEFINED_INT) -RETURN_MAX(float,write_size_float,read_size_float,return_min_float,<,UNDEFINED_FLOAT) -RETURN_MAX(double,write_size_double,read_size_double,return_min_double,<,UNDEFINED_DOUBLE) - - - -#define RETURN_2DMAX(type,wf,rf,function,comparison,undef)\ -type function ( type ** array, int start, int len_array, int first_field, int number_field)\ - {\ - type max;\ - int a,b;\ - if (array==NULL || len_array==0 || first_field<0 || number_field==0)return 0;\ - else\ - {max=array[start][first_field];\ - for ( a=start; a< start+len_array; a++)\ - for (b=first_field; b< first_field+number_field; b++)\ - if (array[a][b]!=undef)max=( array[a][b] comparison max)?array[a][b]:max;\ - }\ - return max;\ - } -RETURN_2DMAX(short,write_size_short,read_size_short,return_2Dmax_short,>, UNDEFINED_SHORT) -RETURN_2DMAX(char,write_size_char,read_size_char,return_2Dmax_char,>,UNDEFINED_CHAR) -RETURN_2DMAX(int,write_size_int,read_size_int,return_2Dmax_int,>,UNDEFINED_INT) -RETURN_2DMAX(float,write_size_float,read_size_float,return_2Dmax_float,>,UNDEFINED_FLOAT) -RETURN_2DMAX(double,write_size_double,read_size_double,return_2Dmax_double,>,UNDEFINED_DOUBLE) -RETURN_2DMAX(short,write_size_short,read_size_short,return_2Dmin_short,<,UNDEFINED_SHORT) -RETURN_2DMAX(char,write_size_char,read_size_char,return_2Dmin_char,<,UNDEFINED_CHAR) -RETURN_2DMAX(int,write_size_int,read_size_int,return_2Dmin_int,<,UNDEFINED_INT) -RETURN_2DMAX(float,write_size_float,read_size_float,return_2Dmin_float,<,UNDEFINED_FLOAT) -RETURN_2DMAX(double,write_size_double,read_size_double,return_2Dmin_double,<,UNDEFINED_DOUBLE) - -#define RETURN_2DMAX_COOR(type,wf,rf,function,compare,undef)\ -type function ( type **array, int start1 , int end1, int start2, int end2,int *i, int *j)\ - {\ - int a, b;\ - double max=undef;\ - if ( start1==-1)start1=0;\ - if ( start2==-1)start2=0;\ - if ( end1==-1)end1=rf(array,sizeof (type*));\ - if ( end2==-1)end2=rf(array[0],sizeof (type));\ - if ( array==NULL || (end1-start1)==0 || (end1-start1)>rf ( array,sizeof (type*)) || (end2-start2)==0)\ - {\ - return 0;\ - i[0]=0;\ - j[0]=0;\ - }\ - i[0]=0;\ - j[0]=0;\ - for ( a=start1; a,UNDEFINED_SHORT) -RETURN_2DMAX_COOR(char,write_size_char,read_size_char,return_2Dmax_coor_char,>,UNDEFINED_CHAR) -RETURN_2DMAX_COOR(int,write_size_int,read_size_int,return_2Dmax_coor_int,>,UNDEFINED_INT) -RETURN_2DMAX_COOR(float,write_size_float,read_size_float,return_2Dmax_coor_float,>,UNDEFINED_FLOAT) -RETURN_2DMAX_COOR(double,write_size_double,read_size_double,return_2Dmax_coor_double,>,UNDEFINED_DOUBLE) -RETURN_2DMAX_COOR(short,write_size_short,read_size_short,return_2Dmin_coor_short,<,UNDEFINED_SHORT) -RETURN_2DMAX_COOR(char,write_size_char,read_size_char,return_2Dmin_coor_char,<,UNDEFINED_CHAR) -RETURN_2DMAX_COOR(int,write_size_int,read_size_int,return_2Dmin_coor_int,<,UNDEFINED_INT) -RETURN_2DMAX_COOR(float,write_size_float,read_size_float,return_2Dmin_coor_float,<,UNDEFINED_FLOAT) -RETURN_2DMAX_COOR(double,write_size_double,read_size_double,return_2Dmin_coor_double,<,UNDEFINED_DOUBLE) - -#define RETURN_WMEAN(type,wf,rf,function,sum_function,undef)\ -double function ( type **array, int len, int wfield,int sfield)\ - {\ - double b;\ - int a, c;\ - if ( len==0 ||array==NULL || len>rf ( array,sizeof (type*)))return 0;\ - else\ - {\ - if ( len==-1)len=rf(array,sizeof (type*));\ - for ( b=0, c=0,a=0; a< len; a++)\ - {\ - if (array[a][sfield]!=undef && array[a][wfield]!=undef )\ - {\ - b+=array[a][sfield];\ - c+=array[a][wfield];\ - }\ - }\ - }\ - return (c==0)?0:(b/c);\ - } -RETURN_WMEAN(short,write_size_short,read_size_short,return_wmean_short, return_sum_short,UNDEFINED_SHORT) -RETURN_WMEAN(char,write_size_char,read_size_char, return_wmean_char,return_sum_char,UNDEFINED_CHAR) -RETURN_WMEAN(int,write_size_int,read_size_int,return_wmean_int,return_sum_int,UNDEFINED_INT) -RETURN_WMEAN(float,write_size_float,read_size_float,return_wmean_float,return_sum_float,UNDEFINED_FLOAT) -RETURN_WMEAN(double,write_size_double,read_size_double,return_wmean_double,return_sum_double,UNDEFINED_DOUBLE) - - -#define RETURN_MEAN(type,wf,rf,function,sum_function,undef)\ -double function ( type **array, int len, int field)\ - {\ - double b;\ - int a, c;\ - if ( len==0 ||array==NULL || len>rf ( array,sizeof(type*)))return 0;\ - else\ - {\ - for ( b=0, c=0,a=0; a< len; a++)\ - {\ - if (array[a][field]!=undef)\ - {\ - b+=array[a][field];\ - c++;\ - }\ - }\ - }\ - return (c==0)?0:(b/c);\ - } -RETURN_MEAN(short,write_size_short,read_size_short,return_mean_short, return_sum_short,UNDEFINED_SHORT) -RETURN_MEAN(char,write_size_char,read_size_char, return_mean_char,return_sum_char,UNDEFINED_CHAR) -RETURN_MEAN(int,write_size_int,read_size_int,return_mean_int,return_sum_int,UNDEFINED_INT) -RETURN_MEAN(float,write_size_float,read_size_float,return_mean_float,return_sum_float,UNDEFINED_FLOAT) -RETURN_MEAN(double,write_size_double,read_size_double,return_mean_double,return_sum_double,UNDEFINED_DOUBLE) - -#define RETURN_SUM(type,wf,rf,function,undef)\ -type function(type **array, int len, int field)\ -{\ - int a;\ - type b=0;\ - if ( len==0 ||array==NULL)return 0;\ - else\ - {\ - if ( len==-1)len=rf ( array,sizeof (type*));\ - for ( a=0; a< len; a++)\ - if ( array[a][field]!=undef)b+=array[a][field];\ - }\ - return b;\ - } -RETURN_SUM(short,write_size_short,read_size_short, return_sum_short,UNDEFINED_SHORT) -RETURN_SUM(char,write_size_char,read_size_char,return_sum_char,UNDEFINED_CHAR) -RETURN_SUM(int,write_size_int,read_size_int,return_sum_int,UNDEFINED_INT) -RETURN_SUM(float,write_size_float,read_size_float,return_sum_float,UNDEFINED_FLOAT) -RETURN_SUM(double,write_size_double,read_size_double,return_sum_double,UNDEFINED_DOUBLE) - -#define RETURN_SD(type,wf,rf,function,undef)\ - type function ( type **array, int len, int field,type mean) \ - {\ - int a;\ - double c=0;\ - if ( len==0 ||array==NULL || len>rf ( array,sizeof(type*)))return 0;\ - else\ - {\ - for ( a=0; a< len; a++)\ - {\ - if ((array[a][field]!=undef) && (mean-array[a][field])!=0)\ - c+=((double)mean-array[a][field])*((double)mean-array[a][field]);\ - }\ - c=sqrt(c)/(double)len;\ - return (type)MAX(c,1);\ - }\ - } -RETURN_SD(short,write_size_short,read_size_short, return_sd_short,UNDEFINED_SHORT) -RETURN_SD(char,write_size_char,read_size_char,return_sd_char,UNDEFINED_CHAR) -RETURN_SD(int,write_size_int,read_size_int,return_sd_int,UNDEFINED_INT) -RETURN_SD(float,write_size_float,read_size_float,return_sd_float,UNDEFINED_FLOAT) -RETURN_SD(double,write_size_double,read_size_double,return_sd_double,UNDEFINED_DOUBLE) -double return_z_score( double x,double sum, double sum2, double n) - { - double sd; - double avg; - double z; - - - sd=(n==0)?0:sqrt(sum2*n -sum*sum)/n; - avg=(n==0)?0:(sum/n); - z=(sd==0)?0:(x-avg)/sd; - return z; - } - -double* return_r (double **list, int n) -{ - double Sy, Sx, Sxy, Sx2, Sy2,r_up, r_low, x, y; - double *r; - int a; - - r=vcalloc ( 3, sizeof (double)); - Sy=Sx=Sxy=Sx2=Sy2=0; - - for ( a=0; a,UNDEFINED_SHORT) -RETURN_MAX_HORIZ(char,write_size_char,read_size_char,return_max_char_hor,>,UNDEFINED_CHAR) -RETURN_MAX_HORIZ(int,write_size_int,read_size_int,return_max_int_hor,>,UNDEFINED_INT) -RETURN_MAX_HORIZ(float,write_size_float,read_size_float,return_max_float_hor,>,UNDEFINED_FLOAT) -RETURN_MAX_HORIZ(double,write_size_double,read_size_double,return_max_double_hor,>,UNDEFINED_DOUBLE) - -RETURN_MAX_HORIZ(short,write_size_short,read_size_short,return_min_short_hor,<,UNDEFINED_SHORT) -RETURN_MAX_HORIZ(char,write_size_char,read_size_char,return_min_char_hor,<,UNDEFINED_CHAR) -RETURN_MAX_HORIZ(int,write_size_int,read_size_int,return_min_int_hor,<,UNDEFINED_INT) -RETURN_MAX_HORIZ(float,write_size_float,read_size_float,return_min_float_hor,<,UNDEFINED_FLOAT) -RETURN_MAX_HORIZ(double,write_size_double,read_size_double,return_min_double_hor,<,UNDEFINED_DOUBLE) - - - -#define BEST_OF_MANY(type,wf,rf,function,undef)\ -type function (int n, ...)\ - {\ - va_list ap;\ - int *fop,a;\ - type v, best;\ - int maximise;\ - /*first Arg: number of values\ - 2nd Arg: maximise(1)/minimise(0)\ - 3rd Arg: *int contains the indice of the best value\ - ... Arg: n type values\ - */\ - va_start (ap, n);\ - maximise=va_arg (ap, int);\ - fop=va_arg (ap, int*);\ - best=va_arg (ap, type);\ - fop[0]=0;\ - for ( a=1; abest)\ - {\ - fop[0]=a;\ - best=v;\ - }\ - else if ( maximise==0 && vmax)?strlen ( array[a]):max; - - return max; - } - - -int return_minlen ( char ** array, int number) - { - int a; - int min; - - min=strlen( array[0]); - for ( a=1; a< number; a++) - min=( strlen ( array[a])>min)?strlen ( array[a]):min; - - return min; - } - - - -float return_mean_diff_float ( float **array, int len, int field,float mean) - { - int a; - float b=0; - - for ( a=0; a< len; a++) - { - if ( (mean-array[a][field])!=0) - b+=sqrt((double)((float) ( mean-array[a][field])*(float)(mean-array[a][field]))); - } - - return ((float)b/(float)len); - } - - - -void inverse_int ( int**array, int len, int field, int max, int min) - { - int a; - for ( a=0; a< len; a++) - array[a][field]=max-array[a][field]+min; - } -void inverse_float ( float**array, int len, int field, int max, int min) - { - int a; - for ( a=0; a< len; a++) - array[a][field]=max-array[a][field]+min; - } -void inverse_2D_float ( float **array, int start, int len, int start_field, int number_field, float max,float min) - { - int a, b; - for ( a=0; a< start+len; a++) - for ( b=start_field; b< start_field+ number_field; b++) - array[a][b]=max-array[a][b]+min; - } - -int max_int (int*i, ...) -{ - va_list ap; \ - int index, best_value=0, value; - int a=0; - // expects n values : n, &index, i1, v1, i2, v2...., -1 - va_start(ap, i); - while ((index=va_arg(ap,int))!=-1) - { - value=va_arg(ap, int); - if ( a==0 || value>best_value) - { - i[0]=index; - best_value=value; - a=1; - } - } - va_end (ap); - return best_value; -} - -/*********************************************************************/ -/* */ -/* SHELL INTERFACES */ -/* */ -/* */ -/*********************************************************************/ -char* getenv4debug (const char * val) -{ - /*efficient mean of getting an environment variable: checks only if one DEBUG is on*/ - static int check; - - if ( !check) - { - - if (getenv ("DEBUG_BLAST"))check=1; - else if ( getenv ("DEBUG_TREE_COMPARE"))check=1; - else if ( getenv ("DEBUG_MALN"))check=1; - else if ( getenv ("DEBUG_EXTRACT_FROM_PDB"))check=1; - else if ( getenv ("DEBUG_LIBRARY"))check=1; - else if ( getenv ("DEBUG_FUGUE"))check=1; - - else if ( getenv ("DEBUG_REFORMAT"))check=1; - else if ( getenv ("DEBUG_RECONCILIATION"))check=1; - else if ( getenv ("DEBUG_TMP_FILE"))check=1; - else if ( getenv ("DEBUG_TREE"))check=1; - - else if ( getenv ("DEBUG_SEQ_REFORMAT") && strm (PROGRAM, "SEQ_REFORMAT"))check=2; - else if ( getenv ("DEBUG_TCOFFEE") && strm (PROGRAM, "T-COFFEE"))check=2; - else check=-1; - } - - if ( check>0 && strm ( val, "DEBUG_TMP_FILE")) - { - return "1"; - } - - else if ( check==1) - { - return getenv (val); - } - else if ( check==2) - { - return "1"; - } - else - return NULL; -} - -char* get_env_variable ( const char *var, int mode) - { - /*mode 0: return NULL if variable not set*/ - /*mode 1: crash if variable not set*/ - if ( !getenv (var)) - { - if (mode==NO_REPORT)return NULL; - else if ( mode ==IS_NOT_FATAL) - { - fprintf ( stderr, "\nYou must set the variable %s [FATAL]\n", var); - return NULL; - } - else - { - fprintf ( stderr, "\nYou must set the variable %s [FATAL]\n", var); - myexit (EXIT_FAILURE); - return NULL; - } - } - else return getenv (var); - } - -void get_pwd ( char *name) - { - char *string; - char command[1000]; - FILE *fp; - - - string=vtmpnam(NULL); - sprintf ( command, "pwd > %s", string); - my_system (command); - fp=vfopen ( string, "r"); - fscanf ( fp, "%s",name); - vfclose (fp); - sprintf ( command, "rm %s", string); - my_system ( command); - } -int pg_is_installed ( char *pg) - { - char *fname; - char command[1000]; - FILE *fp; - int r=0; - - return 1; - - fname= vtmpnam(NULL); - - sprintf ( command, "which %s > %s", pg, fname); - my_system ( command); - - - if ((fp=find_token_in_file ( fname, NULL, "Command"))){r=1;vfclose(fp);} - - - return r; - - } - - -/*********************************************************************/ -/* */ -/* MISC */ -/* */ -/*********************************************************************/ -char *num2plot (int value, int max, int line_len) - { - int len; - int value_len; - char *buf; - static char *string; - - if ( string==NULL)string=vcalloc (1000, sizeof(char)); - - if ( line_len==-1)len=30; - else len=line_len; - - value_len=((float)value/(float)max)*(float)len; - if ( value==0) - sprintf ( string, "|"); - else - { - buf=generate_string(value_len, '*'); - sprintf ( string,"%s", buf); - vfree(buf); - } - return string; - } - -int perl_strstr ( char *string, char *pattern) -{ - char *tmp; - FILE *fp; - int r; - char command[10000]; - char *string2; - - if (!string) return 0; - if (!pattern) return 0; - - - - string2=vcalloc ( strlen (string)+1, sizeof (char)); - sprintf ( string2,"%s", string); - string2=substitute (string2, "(", " "); - string2=substitute (string2, ")", " "); - string2=substitute (string2, "'", " "); - tmp=vtmpnam(NULL); - sprintf (command, "perl -e '$s=\"%s\";$x=($s=~/%s/);$x=($x==1)?1:0;print $x;'>%s", string2, pattern,tmp); - system ( command); - if (check_file_exists(tmp)) - { - fp=vfopen (tmp, "r"); - fscanf (fp, "%d", &r); - vfclose (fp); - } - else - { - fprintf ( stderr, "COM: %s\n"); - r=0; - } - vfree (string2); - return r; -} -float grep_function ( char *pattern, char *file) - { - char command [1000]; - int a, b, l; - char buf1[100]; - char buf2[100]; - FILE*fp; - char *s; - float f; - - - - s=vtmpnam(NULL); - - sprintf ( command, "grep %s %s > %s",pattern,file, s); - my_system ( command); - if ((fp=vfopen (s, "r"))==NULL )return 0; - else - { - fgets ( command, 900, fp); - l=strlen ( command); - while ( !isdigit (command[l]))l--; - a=0; - while ( isdigit (command[l]) || command[l]=='.') - { - buf1[a++]=command[l]; - l--; - } - buf1[a]='\0'; - l=strlen (buf1); - for ( a=0, b=l-1;a< l; a++, b--) - buf2[b]=buf1[a]; - buf2[l]='\0'; - - sscanf ( buf2, "%f", &f); - sprintf ( command,"rm %s", s); - my_system ( command); - vfclose (fp); - return f; - } - } - -void crash_if ( int val, char *s) - { - if ( val==0)crash(s); - } -void crash ( char *s) - { - int *a; - - - - fprintf ( stderr, "%s",s); - a=vcalloc ( 10, sizeof (int)); - a[20]=1; - error_exit(); - } - -static int *local_table; -int ** make_recursive_combination_table ( int tot_n_param, int *n_param, int *nc, int**table, int field) - { - int a, b, c; - - /* makes a table of all possible combinations*/ - - if ( tot_n_param==0) - { - nc[0]=1; - fprintf ( stderr, "\nNULL RETURNED"); - return NULL; - } - if (table==NULL) - { - if ( local_table!=NULL)vfree (local_table); - local_table=vcalloc ( tot_n_param, sizeof (int)); - field=0; - for ( a=0; a< tot_n_param; a++)local_table[a]=-1; - for ( a=0; a< tot_n_param; a++)nc[0]=nc[0]*n_param[a]; - - - table=declare_int ( nc[0],tot_n_param); - nc[0]=0; - } - - for ( b=0; bsuffix)sprintf ( name, "%s.%s", F->name, F->suffix); - else sprintf (name, "%s", F->name); - free_fname (F); - return name; -} -Fname* parse_fname ( char *array) - { - int l; - Fname *F; - - - - F=declare_fname (sizeof (array)); - - sprintf ( F->full, "%s", array); - sprintf ( F->path, "%s", array); - l=strlen (array); - while (l!=-1 && (F->path)[l]!='/')(F->path)[l--]='\0'; - - sprintf ( F->name, "%s", array+l+1); - l=strlen (F->name); - while (l!=-1) - { - if((F->name)[l]=='.') - { - F->name[l]='\0'; - sprintf ( F->suffix, "%s", F->name+l+1); - break; - } - else l--; - } - - return F; - } -char *filename2path (char *name) -{ - char *nname; - int x; - if (isdir (name))return name; - - x=strlen (name)-1; - nname=vcalloc (x+2, sizeof (char)); - sprintf ( nname, "%s", name); - while ( x >=0 && nname[x]!='/')nname[x--]='\0'; - - if ( !isdir (nname) || !nname[0]){vfree (nname); return NULL;} - return nname; -} - - - - - -char *extract_suffixe ( char *array) - { - int l; - char *new_string; - char *x; - l=strlen (array); - new_string=vcalloc ( l+1, sizeof (char)); - sprintf (new_string, "%s",array); - - x=new_string+l; - while (x!=new_string && x[0]!='.' && x[0]!='/' )x--; - if ( x[0]=='.')x[0]='\0'; - else if (x[0]=='/')return x+1; - - while ( x!=new_string && x[0]!='/')x--; - - return (x[0]=='/')?x+1:x; - } -void string_array_upper ( char **string, int n) - { - int a; - for ( a=0; a< n; a++)upper_string (string[a]); - } -void string_array_lower ( char **string, int n) - { - int a; - for ( a=0; a< n; a++)lower_string (string[a]); - } - -char *upper_string ( char *string) - { - int len, a; - - len=strlen ( string); - for ( a=0; a< len; a++)string[a]=toupper ( string[a]); - return string; - } -char *lower_string ( char *string) - { - int len, a; - - len=strlen ( string); - for ( a=0; a< len; a++)string[a]=tolower ( string[a]); - return string; - } -void string_array_convert ( char **array, int n_strings, int ns, char **sl) - { - int a; - - for ( a=0; a< n_strings; a++)string_convert ( array[a], ns, sl); - } -void string_convert( char *string, int ns, char **sl) - { - int a, l; - l=strlen ( string); - for ( a=0; a< l; a++) - string[a]=convert(string[a], ns, sl); - } -int convert ( char c, int ns, char **sl) - { - int a; - int return_char; - - for ( a=0; a< ns; a++) - { - if ((return_char=convert2 ( c, sl[a]))!=-1) - return return_char; - } - return c; - - - } -int convert2 ( char c, char *list) - { - int a; - int l1; - int return_char; - - l1=strlen ( list); - - return_char=(list[l1-1]=='#')?c:list[l1-1]; - - for ( a=0; a< l1; a++) - if (list[a]=='#')return return_char; - else if ( list[a]==c)return return_char; - - return -1; - } -char* substitute_old ( char *string_in, char *t, char *r) -{ - char *string_out; - char *p, *heap_in; - int delta, l; - /*REplaces every occurence of token t with token r in string_in*/ - - if ( string_in==NULL || t==NULL || r==NULL) return string_in; - - heap_in=string_in; - - l=read_array_size_new ((void*)string_in)+1; - - string_out=vcalloc (l, sizeof (char)); - delta=strlen(r)-strlen (t); - delta=(delta<0)?0:delta; - - while ( (p=strstr ( string_in, t))!=NULL) - { - - p[0]='\0'; - if ( delta) - { - l+=delta; - string_out=vrealloc(string_out, sizeof (char)*l); - } - - strcat ( string_out, string_in); - strcat ( string_out, r); - string_in=p+strlen (t); - } - strcat ( string_out, string_in); - if (l0)?(lr-lt):0; - nt=0; - while ( (p=strstr (string_in, t))!=NULL) - { - string_in=p+lt; - nt++; - } - string_in=heap_in; - - lso=nt*delta+lsi; - string_out=vcalloc (lso+1, sizeof (char)); - - while ((N==0 ||n end1)string1[a]=x; - for ( a=0; a< l2; a++)if ( a end2)string2[a]=x; - - free_int ( array, l1); - - return max_val; - } - } - -int get_string_line ( int start, int n_lines, char *in, char *out) - { - int nl=0; - int a=0; - int c=0; - - while ( nlA\nthecat\n>B\nthecat\n"); - vfclose (fp); - result=safe_system (command); - printf_system ( "rm %s.*", name); - vfree (name); - if (result) {myexit (EXIT_FAILURE);return 0;} - else return 1; - } -} - -char** merge_list ( char **argv, int *argc) - { - int a, b; - int n_in; - char **out; - char current [STRING]; - - out=declare_char (argc[0], STRING); - n_in=argc[0]; - argc[0]=0; - - a=0; - while (a< n_in && !is_parameter ( argv[a])) - { - sprintf (out[argc[0]++], "%s", argv[a]); - argv[a][0]='\0'; - a++; - } - - - for ( a=0; a< n_in; a++) - { - if ( is_parameter (argv[a])) - { - sprintf ( out[argc[0]++], "%s", argv[a]); - sprintf ( current, "%s", argv[a]); - - for ( b=0; b< n_in;) - { - if ( is_parameter (argv[b]) && strm (current, argv[b])) - { - argv[b][0]='\0'; - b++; - while ( b=MAX_N_PARAM) - { - fprintf ( stderr, "\nERROR: too many parameters, recompile with MAX_N_PARAM set at a higher velue [FATAL:%s]\n", PROGRAM);\ - myexit (EXIT_FAILURE); - } - - for ( a=0; a< n_in; a++) - { - - - - if (cont)ar=get_list_of_tokens( argv[a], separators,&n_ar); - else ar=get_list_of_tokens( argv[a],"",&n_ar); - - - for ( b=0; b< n_ar; b++) - { - out[argc[0]]=vcalloc( strlen (ar[b])+1, sizeof (char)); - sprintf (out[argc[0]++], "%s", ar[b]); - } - free_char (ar, -1); - ar=NULL; - if ( strstr (argv[a], "-other_pg"))cont=0; - } - free_char (ar, -1); - return out; - } - -char *invert_string2 (char *string) -{ - char *buf; - int a, b, l; - - l=strlen (string); - buf=vcalloc ( l+1, sizeof (char)); - for ( a=l-1, b=0; a>=0; a--, b++) - buf[b]=string[a]; - sprintf (string, "%s", buf); - vfree (buf); - return string; -} -char *invert_string (char *string) -{ - return string2inverted_string(string); -} -char* string2inverted_string(char *string) -{ - char *buf; - int a, b, l; - - l=strlen (string); - buf=vcalloc ( l+1, sizeof (char)); - for ( a=l-1, b=0; a>=0; a--, b++) - buf[b]=string[a]; - return buf; -} - -char ** get_list_of_tokens ( char *in_string, char *separators, int *n_tokens) -{ - char **list=NULL; - char *p=NULL; - char *string; - - - n_tokens[0]=0; - if ( in_string==NULL || strm(in_string, "")); - else if ( in_string[0]=='[') - { - list=declare_char (1, strlen ( in_string)+1); - sprintf ( list[n_tokens[0]], "%s",in_string); - n_tokens[0]++; - } - else - { - list=declare_char (strlen ( in_string)+1, 1); - string=vcalloc ( strlen(in_string)+1, sizeof (char)); - sprintf ( string, "%s", in_string); - - while ( (p=strtok ((p==NULL)?string:NULL, ((separators==NULL)?SEPARATORS:separators)))!=NULL) - { - list[n_tokens[0]]=vrealloc ( list[n_tokens[0]], sizeof (char) *strlen (p)+1); - sprintf ( list[n_tokens[0]], "%s", p); - n_tokens[0]++; - } - - vfree (string); - } - return list; - } - -char **ungap_array ( char **array, int n) - { - int a; - for ( a=0; a< n; a++)ungap(array[a]); - return array; - } - -void ungap ( char *seq) -{ - remove_charset ( seq, "ungap"); -} -int seq2len (char *seq, char *pset,char *nset) -{ - int a, l, t=0; - //count all the residues in pset and NOT in nset - if ( !seq) return 0; - - l=strlen (seq); - //returns the len of the string - for (a=0; a< l; a++) - { - char c=seq[a]; - if ( pset && nset && strchr (pset, c) && !strchr (nset, c))t++; - else if ( pset && strchr (pset, c))t++; - else if ( nset && !strchr (nset, c))t++; - } - return t; -} -int seq2res_len (char *seq) -{ - return seq2len (seq, NULL, GAP_LIST); -} -char* remove_charset_from_file (char *fname, char *set) -{ - char *tmp; - char c; - FILE *fp1; - FILE *fp2; - - fp1=vfopen (fname, "r"); - fp2=vfopen (tmp=vtmpnam (NULL), "w"); - while ( (c=fgetc(fp1))!=EOF) - { - if (!strchr ( set,c))fprintf ( fp2, "%c", c); - } - vfclose (fp1); - vfclose (fp2); - return tmp; -} - -void remove_charset ( char *seq, char *set) - { - int a, b, l; - char *set2; - - set2=vcalloc (256, sizeof (char)); - if ( strm (set, "!alnum")) - { - for ( b=0,a=1;a< 256; a++)if ( !isalnum (a))set2[b++]=a; - } - else if ( strm ( set, "ungap")) - { - sprintf ( set2, "%s", GAP_LIST); - } - else - { - sprintf ( set2, "%s", set); - } - - l=strlen ( seq); - for (b=0, a=0; a<=l; a++) - { - if ( strchr ( set2, seq[a])); - else seq[b++]=seq[a]; - } - seq[b]='\0'; - vfree (set2); - } - - -char **char_array2number ( char ** array, int n) - { - int a; - for ( a=0; a< n; a++)array[a]=char2number(array[a]); - return array; - } -char *char2number ( char * array) - { - int a, l; - - - l=strlen ( array); - for ( a=0; a< l; a++) - { - if ( isdigit(array[a]) && array[a]!=NO_COLOR_RESIDUE && array[a]!=NO_COLOR_GAP )array[a]-='0'; - else if ( array[a]<9); - else if ( array[a]==NO_COLOR_RESIDUE || array[a]==NO_COLOR_GAP)array[a]=NO_COLOR_RESIDUE; - } - return array; - } -long atop (char*p) -{ - /*turns a char into a pointer*/ - if ( p==NULL) return 0; - else return atol(p); -} - -char *mark_internal_gaps(char *seq, char symbol) -{ - int l, a, gap; - int in_seq; - char *cache_seq; - - l=strlen(seq); - cache_seq=vcalloc ( l+1, sizeof (char)); - sprintf ( cache_seq, "%s", seq); - - for ( gap=0, in_seq=0,a=0; a< l; a++) - { - gap=is_gap(seq[a]); - if ( !gap && !in_seq)in_seq=1; - if (gap && in_seq)seq[a]=symbol; - } - - for (gap=0, in_seq=0,a=l-1; a>=0; a--) - { - gap=is_gap(seq[a]); - if ( !gap && !in_seq)break; - if (gap && !in_seq)seq[a]=cache_seq[a]; - } - vfree(cache_seq); - return seq; -} - -void splice_out ( char *seq, char x) - - { - int a, b, l; - - l=strlen ( seq); - for (b=0, a=0; a<=l; a++) - if ( seq[a]==x); - else seq[b++]=seq[a]; - seq[b]='\0'; - } -char *splice_out_seg ( char *seq, int pos, int len) -{ - int l, a; - - if (seq==NULL || pos<0) return seq; - l=strlen (seq); - if ( l<(pos+len)) - printf_exit ( EXIT_FAILURE, stderr, "Splice_out_seg out of bound: Length %d seg: [%d %d] [splice_out_seg::util.c][FATAL:%s]\n", l, pos, pos+len, PROGRAM); - l-=len; - for (a=pos; a< l; a++) - seq[a]=seq[a+len]; - seq[a]='\0'; - return seq; -} - -int isblanc ( char *buf) - { - int a, l; - - if ( buf==NULL)return 0; - l=strlen (buf); - for ( a=0; a< l; a++) - if (isalnum (buf[a]))return 0; - return 1; - } - - - -int is_number ( char *num) - { - int a, l; - l=strlen (num); - - for (a=0;amax_len) - { - max_len=l; - max_index=a; - } - } - if (index!=NULL)index[0]=max_index; - if (len!=NULL)len[0]=max_len; - } - - return max_len; - } - -int get_shortest_string (char **array,int n, int *len, int *index) - { - int a, l; - int min_len; - - if ( n==0|| array==NULL || read_size_char ( array,sizeof (char*))tms_stime*milli_sec_conv; - tms_utime=(long)time_buf->tms_utime*milli_sec_conv; - - - - - if ( ref==0) - { - ref=(tms_stime+tms_utime); - return 0; - } - else - { - time=(tms_utime+tms_stime)-ref; - return (int) ((time)/ticks); - } - } -int get_ctime () - { - static long time; - struct tms time_buf[1]; - long tms_cutime, tms_cstime; - - if ( ticks==0)ticks = sysconf(_SC_CLK_TCK); - times ( time_buf); - - - - tms_cstime=(long)time_buf->tms_cstime*milli_sec_conv; - tms_cutime=(long)time_buf->tms_cutime*milli_sec_conv; - - if ( ref==0) - { - child=1; - ref=tms_cstime+tms_cutime; - return 0; - } - else - { - time=(tms_cutime+tms_cstime)-ref; - return (int)((time)/ticks); - } - } -int reset_time() - { - ref=0; - return (int)get_time(); - } -int increase_ref_time(int increase) - { - if ( ref==0)get_time(); - - ref-=(long)ticks*(long)increase; - if (ref==0)ref++; - return (int)ref; - } - -/*********************************************************************/ -/* */ -/* SYSTEM CALLS */ -/* */ -/* */ -/*********************************************************************/ -int evaluate_sys_call_io ( char *out_file, char *com, char *fonc) - { - if ( check_file_exists (out_file))return 1; - else - { - fprintf ( stderr, "\nCommand\n%s\nFailed to produce File\n%s\n", com, out_file); - return 0; - } - } -void HERE (char *string, ...) -{ - va_list ap; - - va_start (ap, string); - fprintf ( stderr, "HERE: "); - vfprintf (stderr, string, ap); - fprintf ( stderr, "\n"); - va_end (ap); - -} -void printf_exit (int exit_code, FILE *fp, char *string, ...) -{ - - - va_list ap; - - va_start (ap, string); - vfprintf (fp, string, ap); - va_end (ap); - myexit (exit_code); -} - - -int fprintf_fork (FILE *fp, char *string, ...) -{ - va_list ap; - static char *openF; - static char *closeF; - - char *pid_file; - FILE *flag; - struct flock fl; - int fd,a; - - char buf[100000]; - - if (!openF) - { - openF=vcalloc (100, sizeof (char)); - sprintf (openF, "cedric1"); - closeF=vcalloc (100, sizeof (char)); - sprintf (closeF, "cedric2"); - - //openF =vtmpnam (NULL); - //closeF=vtmpnam (NULL); - vfclose(vfopen (openF,"w")); - } - while ((rename (openF,closeF))==-1); - - va_start (ap, string); - vsprintf (buf, string, ap); - va_end (ap); - fprintf ( fp, "%s", buf); - fflush (fp); - rename (closeF, openF); - - return 0; -} -int fprintf_fork2 (FILE *fp, char *string, ...) -{ - va_list ap; - static char *openF; - static struct flock fl; - char buf[100000]; - int fd; - va_start (ap, string); - vsprintf (buf, string, ap); - va_end (ap); - - fprintf ( fp, "%s", buf); - fflush (fp); - return 0; -} - -int printf_file (char *file,char *mode, char *string,...) -{ - FILE *fp; - va_list ap; - - if (!(fp=vfopen (file, mode)))return 0; - va_start (ap, string); - vfprintf (fp, string, ap); - va_end (ap); - vfclose (fp); - return 1; - } -int printf_system_direct (char *string, ...) -{ - char buf[10000]; - - va_list ap; - - va_start (ap, string); - vsprintf (buf, string, ap); - va_end (ap); - return safe_system (buf); -} - -int printf_system (char *string, ...) -{ - char buf[10000]; - - va_list ap; - - va_start (ap, string); - vsprintf (buf, string, ap); - va_end (ap); - return my_system (buf); -} - -int my_system_cl (int argc, char *argv[]) -{ - int a,l; - char *command; - - for ( a=0, l=0; a< argc; a++)l+=(strlen(argv[a])+2); - command=vcalloc (l+1, sizeof(char)); - for ( a=0; a< argc; a++) - { - command=strcat (command, argv[a]); - command=strcat (command, " "); - } - a=my_system ( command); - vfree (command); - return a; -} - -int my_system ( char *command0) -{ - static char ***unpacked_list; - static int n_unpacked; - static int proxy_set; - static int email_set; - int email=0, proxy=0, update_env=0; - - //Set the net and E-mail status - //if ( strstr (command0, "wget"))proxy=1; - //if ( strstr (command0, "curl"))proxy=1; - - - - if ( strstr (command0, "extract_from_pdb"))proxy=1; - if ( strstr (command0, "tc_generic_method"))proxy=1; - if ( strstr (command0, "install"))proxy=1; - if ( strstr (command0, "tc_generic_method"))email=1; - - - - if (!unpacked_list) - { - unpacked_list=declare_arrayN(3, sizeof (char), 3, 200,300); - } - - if ( getenv ("DEBUG_PERL"))return safe_system (command0); - else - { - char **list; - int is_command; - int a, c=0; - char *command1; - char *command2; - int return_val; - - command1=vcalloc ( 3*strlen (command0)+1, sizeof (char)); - command2=vcalloc ( 100000, sizeof (char)); - sprintf ( command1, "%s", command0); - - command1=substitute (command1, "|", " | "); - command1=substitute (command1, ";", " ; "); - - list=string2list (command1); - if ( !list) return EXIT_SUCCESS; - is_command=1; - - //Identify T-Coffee self threads and install threads - if ( strstr (list[1], "t_coffee"))update_env=1; - else if ( strstr (list[1], "install.pl"))check_internet_connection (IS_FATAL); - - - for ( a=1; a< atoi(list[0]); a++) - { - if ( is_command) - { - if ( strstr ( list[a], "unpack_")) - { - unpack_all_perl_script (list[a]+strlen ("unpack_")); - myexit (EXIT_SUCCESS); - } - else if ((c=name_is_in_list (list[a], unpacked_list[0], n_unpacked, 100))!=-1); - else - { - n_unpacked=unpack_perl_script (list[a], unpacked_list, n_unpacked);c=n_unpacked-1; - } - //if non unpacked script check pg is installed: - - if ( strm (unpacked_list[2][c], "shell")) - { - check_program_is_installed (list[a], NULL, NULL, NULL, INSTALL_OR_DIE); - } - strcat (command2, ((c!=-1)?unpacked_list[1][c]:list[a])); - strcat (command2, " "); - is_command=0; - - } - else - { - strcat (command2, list[a]); - strcat (command2, " "); - if ( strm (list[a], ",") ||strm (list[a], "|")) is_command=1; - } - } - - free_char (list,-1); - vfree ( command1); - command2=substitute ( command2, "//", "/"); - - return_val=safe_system (command2); - - //INTERCEPT POTENTIAL NETWORK CALLS - - if (return_val!=EXIT_SUCCESS && proxy==1 && proxy_set==0) - { - if (simple_check_internet_connection (NULL))proxy_set=1; - else - { - check_internet_connection (IS_NOT_FATAL);proxy_set=1; - return_val=safe_system (command2); - } - } - //Intercept potential Missing E-mails - if (return_val!=EXIT_SUCCESS && email==1 && email_set==0) - { - Email(INPUT, RESET);email_set=1; - return_val=safe_system (command2); - } - - //update the environement that may have been modified by a thread - - if (update_env)get_t_coffee_environement (NULL); - - vfree ( command2); - return return_val; - } -} -int safe_system (const char * com) -{ - pid_t pid; - int status; - if (com == NULL) - return (1); - - if ((pid = fork ()) < 0) - return (-1); - - if (pid == 0) { - - char * argv [4]; - - argv [0] = "sh"; - argv [1] = "-c"; - argv [2] =(char*) com; - argv [3] = 0; - execvp ("/bin/sh", argv); - } - else - { - set_pid(pid); - } - - - while (1) { - - if (vwaitpid (pid, &status, 0) == -1) - { - if (errno != EINTR) - return (EXIT_FAILURE); - } - else - { - return (status); - } - } -} -static int **pidtable; -static int pidtable_s; -pid_t **declare_pidtable () -{ - int a; - pidtable_s=MAX_N_PID; - - pidtable=vcalloc (pidtable_s, sizeof (pid_t*)); - for (a=0; a< pidtable_s; a++) - { - pidtable[a]=vcalloc (2, sizeof (pid_t)); - } - return pidtable; -} -pid_t set_pid (pid_t p) -{ - int cpid; - - if (!pidtable)declare_pidtable(); - if ( p<=0) return; - else if ( p>=MAX_N_PID)printf_exit (EXIT_FAILURE,stderr,"ERROR PID=%d superior to MAX_N_PID=%d [FATAL]",p, MAX_N_PID); - pidtable[(int)p][0]=getpid(); - pidtable[(int)p][1]=1; -} -pid_t vfork () -{ - pid_t p; - static int attempt; - - if ( attempt==1000) printf_exit (EXIT_FAILURE, stderr,"\nERROR: Could not fork processes. Run again with -multi_core=no\n"); - - - p=fork(); - if (p==-1) - { - attempt++; - wait(-1); - return vfork(); - } - else - { - attempt=0; - return p; - } -} -int vwait_npid (int sub, int max, int min) -{ - if (max==0) - { - while (sub>0) - { - vwait (NULL); - sub--; - } - } - else if ( sub>=max) - { - while (sub>=min) - { - vwait (NULL); - sub--; - } - } - else{;} - return sub; -} - -pid_t vwaitpid (pid_t p, int *status, int options) -{ - pid_t p2; - - p=waitpid (p, status, options); - - if (pidtable)pidtable[(int)p][0]=pidtable[(int)p][1]=0; - return p; -} -pid_t vwait (pid_t *p) -{ - pid_t p2; - - p2=wait (p); - - if (pidtable)pidtable[(int)p2][0]=pidtable[(int)p2][1]=0; - return p2; -} -int kill_child_pid() -{ - int n; - - if ( !pidtable)return 0; - else - { - int a; - pid_t cpid; - cpid=getpid(); - for (a=0; a%s", tmp); - sprintf ( string, "%s", file2string (tmp)); - chomp (string); - nproc=atoi (string); - } - else - nproc=1; - - return nproc; -} -char * get_os() -{ - static char os[100]; - char *file; - - if ( os[0])return os; - else - { - char *command; - char *s; - - command=vcalloc (100, sizeof (char)); - file=tmpnam (NULL); - sprintf ( command, "uname > %s", file); - safe_system (command); - s=file2string (file); - lower_string (s); - - if (strstr (s, "cygwin"))sprintf ( os, "windows"); - else if ( strstr (s, "linux"))sprintf ( os, "linux"); - else if ( strstr (s, "osx"))sprintf ( os, "macosx"); - else if ( strstr (s, "darwin"))sprintf ( os, "macosx"); - else sprintf (os, "%s", s); - vfree (s); - vfree (command); - vremove (file); - } - return os; -} - -char *file_putenv (char *file) -{ - if (!file) return NULL; - else if ( !check_file_exists (file))return NULL; - else - { - char ***list; - int n=0; - list=file2list (file, "\n"); - while (list[n]) - { - if ( list[n][1][0]!='#')cputenv ("%s",list[n][1]); - n++; - } - free_arrayN ((void ***)list, 3); - } - - return NULL; -} -int cputenv (char * string, ...) -{ - va_list ap; - char *file; - char *s; - FILE *fp; - if (!string)return 0; - file=vtmpnam (NULL); - va_start (ap, string); - fp=vfopen (file, "w"); - vfprintf (fp, string, ap); - vfclose (fp); - va_end (ap); - s=file2string (file); - if (!s) return 0; - putenv (s); - return 1; -} - -int check_dir_getenv ( char *string) -{ - char *p; - - - - p=getenv ( string); - if ( !p) return 0; - if ( !p || access (p, F_OK)==-1 || access (p, W_OK)==-1 || access(p, R_OK)==-1 || access (p, X_OK)==-1)return 0; - - return 1; -} - -char *get_dir_4_tcoffee() -{ - static char dir_4_tcoffee[1000]; - if (dir_4_tcoffee[0])return dir_4_tcoffee; - else - { - if ( getenv ("DIR_4_TCOFFEE"))sprintf (dir_4_tcoffee, "%s", getenv("DIR_4_TCOFFEE")); - else sprintf ( dir_4_tcoffee, "%s/.t_coffee",get_home_4_tcoffee()); - sprintf ( DIR_4_TCOFFEE, "%s", dir_4_tcoffee); - my_mkdir (dir_4_tcoffee); - } - return dir_4_tcoffee; -} -char *get_tmp_4_tcoffee () -{ - static char tmp_4_tcoffee [1000]; - - if ( tmp_4_tcoffee[0])return tmp_4_tcoffee; - else - { - - if ( getenv ("TMP_4_TCOFFEE"))sprintf (tmp_4_tcoffee, "%s", getenv("TMP_4_TCOFFEE")); - else - { - char command [1000]; - - if ( strm (get_os(), "windows")) - { - sprintf ( tmp_4_tcoffee, ".TCtmp"); - } - else - { - sprintf ( tmp_4_tcoffee, "%s/tmp", get_dir_4_tcoffee()); - } - } - - sprintf ( TMP_4_TCOFFEE, "%s", tmp_4_tcoffee); - my_mkdir (tmp_4_tcoffee); - } - - return tmp_4_tcoffee; -} -char *get_cache_4_tcoffee () -{ - - static char cache_4_tcoffee [1000]; - if ( cache_4_tcoffee[0])return cache_4_tcoffee; - else - { - if ( getenv ("CACHE_4_TCOFFEE"))sprintf (cache_4_tcoffee, "%s", getenv("CACHE_4_TCOFFEE")); - else sprintf ( cache_4_tcoffee, "%s/cache/", get_dir_4_tcoffee()); - sprintf ( CACHE_4_TCOFFEE, "%s", cache_4_tcoffee); - my_mkdir(cache_4_tcoffee); /*Do not use mkdir: not yet initialized*/ - } - return cache_4_tcoffee; -} -char *get_mcoffee_4_tcoffee () -{ - static char mcoffee_4_tcoffee [1000]; - if ( mcoffee_4_tcoffee[0])return mcoffee_4_tcoffee; - else - { - if ( getenv ("MCOFFEE_4_TCOFFEE"))sprintf (mcoffee_4_tcoffee, "%s", getenv("MCOFFEE_4_TCOFFEE")); - else sprintf ( mcoffee_4_tcoffee, "%s/mcoffee/", get_dir_4_tcoffee()); - my_mkdir (mcoffee_4_tcoffee); - } - return mcoffee_4_tcoffee; -} -char *get_methods_4_tcoffee () -{ - static char methods_4_tcoffee [1000]; - if ( methods_4_tcoffee[0])return methods_4_tcoffee; - else - { - if ( getenv ("METHODS_4_TCOFFEE"))sprintf (methods_4_tcoffee, "%s", getenv("METHODS_4_TCOFFEE")); - else sprintf ( methods_4_tcoffee, "%s/methods/", get_dir_4_tcoffee()); - my_mkdir(methods_4_tcoffee); - } - return methods_4_tcoffee; -} - -char ** standard_initialisation ( char **in_argv, int *in_argc) -{ - return standard_initialisation_end (standard_initialisation_start (in_argv, in_argc), in_argc); -} -int getpid_ref() -{ - static int pid; - if (!pid)pid=getpid(); - return pid; -} - -char ** standard_initialisation_start ( char **in_argv, int *in_argc) - { - static int standard_initialisation_done; - char **out_argv; - FILE *fp, *fp2; - int a,c, stdi; - //Break the command line, intercept the pipe, prepare the exit - - getpid_ref(); - if ( in_argv==NULL) - { - standard_initialisation_done=0; - return NULL; - } - - else if ( standard_initialisation_done==1) - { - return in_argv; - } - else standard_initialisation_done=1; - /*1 Check for the cache and tmp directories*/ - - get_dir_4_tcoffee(); - get_tmp_4_tcoffee(); - - for (c=0,a=0; a=max_n_warning) - { - max_n_warning+=100; - warning_list=vrealloc ( warning_list,sizeof (char*)*max_n_warning); - } - - va_start (ap, string); - if (fp) - { - fprintf ( fp, "\n"); - vfprintf (fp, string, ap); - } - va_end(ap); - - va_start (ap, string); - vsprintf (buf, string, ap); - va_end (ap); - - warning_list[n_warning]=vcalloc (strlen (buf)+1, sizeof (char)); - sprintf ( warning_list[n_warning], "%s", buf); - n_warning++; - - return fp; -} -void output_warning_list() -{ - int a; - if ( n_warning==0){;} - else - { - fprintf ( stderr, "\nWARNING RECAPITULATION: %d Warning%c [PROGRAM: %s]\n", n_warning, (n_warning>1)?'s':' ', PROGRAM); - for (a=0; a< n_warning; a++) - { - fprintf (stderr, "**WARNING: %3d** %s\n",a+1, warning_list[a]); - } - } -} - - -int count_n_res_in_array (char *array, int len) - { - return count_n_symbol_in_array(array, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", len); - } -int count_n_gap_in_array (char *array, int len) - { - int l; - if ( len<=0 ||len>strlen(array) )l=strlen(array); - else l=len; - - return l- count_n_res_in_array (array,len); - } -int count_n_symbol_in_array ( char *array, char *array_list, int len) - { - int a=0, t=0; - int l; - - if ( len<=0 ||len>strlen(array) )l=strlen(array); - else l=len; - - for ( a=0; a< l; a++)t+=is_in_set (array[a], array_list); - return t; - } - -char* count_strings_in_file ( char *in, char *out) -{ - FILE *fp; - int n,c; - char **list, **result; - - - if (!out) out=vtmpnam (NULL); - list=declare_char (count_n_line_in_file(in)+1, measure_longest_line_in_file (in)+1); - - n=0; - fp=vfopen (in, "r"); - while ((c=fgetc (fp))!=EOF) - { - ungetc (c, fp); - fscanf (fp, "%s\n",list[n]); - n++; - } - vfclose (fp); - - result=count_strings (list, n); - - - n=0; - fp=vfopen (out, "w"); - while (result[n])fprintf ( fp,"%s\n", result[n++]); - vfclose (fp); - - free_char (list, -1); - free_char (result, -1); - - return out; -} - -int ** count_int_strings (int **array, int len, int s) -{ - int **result; - int a,n; - - sort_list_int (array,s, s,0, len-1); - result=vcalloc (len, sizeof (int*)); - for (n=-1,a=0; a %s", def_env); - file_putenv (def_env); - file_putenv (file); - if (!getenv ("DIR_4_TCOFFEE")) cputenv("DIR_4_TCOFFEE=%s", get_dir_4_tcoffee()); - if (!getenv ("TMP_4_TCOFFEE")) cputenv("TMP_4_TCOFFEE=%s", get_tmp_4_tcoffee()); - if (!getenv ("CACHE_4_TCOFFEE")) cputenv("CACHE_4_TCOFFEE=%s", get_cache_4_tcoffee()); - - set_path_4_plugins (NULL); - get_email_from_env(); - get_proxy_from_env(); -} -char * set_path_4_plugins (char *plugins) -{ - if (plugins)get_plugins_4_tcoffee (plugins); - - //set various important variables - if (!getenv ("MAFFT_BINARIES"))cputenv ("MAFFT_BINARIES=%s", get_plugins_4_tcoffee(NULL)); - if (!getenv ("PLUGINS_4_TCOFFEE")) cputenv("PLUGINS_4_TCOFFEE=%s", get_plugins_4_tcoffee(NULL)); - - if (!getenv ("MCOFFEE_4_TCOFFEE")) cputenv("MCOFFEE_4_TCOFFEE=%s", get_mcoffee_4_tcoffee()); - if (!getenv ("METHODS_4_TCOFFEE")) cputenv("METHODS_4_TCOFFEE=%s", get_methods_4_tcoffee()); - cputenv ("PATH=%s:%s", get_plugins_4_tcoffee(NULL), getenv ("PATH")); - //set various packages - add_package2_tcoffee_env ("CLUSTALW_4_TCOFFEE"); - add_package2_tcoffee_env ("CLUSTALW2_4_TCOFFEE"); - add_package2_tcoffee_env ("FUGUE_4_TCOFFEE"); - add_package2_tcoffee_env ("CLUSTALW2_4_TCOFFEE"); - add_package2_tcoffee_env ("TMALIGN_4_TCOFFEE"); - add_package2_tcoffee_env ("SAP_4_TCOFFEE"); - add_package2_tcoffee_env ("DALILITEc_4_TCOFFEE"); - add_package2_tcoffee_env ("MUSTANG_4_TCOFFEE"); - add_package2_tcoffee_env ("NCBIBLAST_4_TCOFFEE"); - add_package2_tcoffee_env ("MAFFT_4_TCOFFEE"); - add_package2_tcoffee_env ("DIALIGNTX_4_TCOFFEE"); - add_package2_tcoffee_env ("POA_4_TCOFFEE"); - add_package2_tcoffee_env ("PROBCONS_4_TCOFFEE"); - add_package2_tcoffee_env ("PROBCONSRNA_4_TCOFFEE"); - add_package2_tcoffee_env ("TCOFFEE_4_TCOFFEE"); - add_package2_tcoffee_env ("PCMA_4_TCOFFEE"); - add_package2_tcoffee_env ("KALIGN_4_TCOFFEE"); - add_package2_tcoffee_env ("AMAP_4_TCOFFEE"); - add_package2_tcoffee_env ("PRODA_4_TCOFFEE"); - add_package2_tcoffee_env ("PRANK_4_TCOFFEE"); - add_package2_tcoffee_env ("CONSAN_4_TCOFFEE"); - add_package2_tcoffee_env ("RNAPlfold_4_TCOFFEE"); - add_package2_tcoffee_env ("HMMtop_4_TCOFFEE"); - add_package2_tcoffee_env ("GOR4_4_TCOFFEE"); - return NULL; -} - -int add_package2_tcoffee_env (char *package) -{ - char *v1, *v2; - - if ((v1=getenv (package))==NULL)return 0; - - v2=filename2path (v1); - if (!v2){vfree (v1);return 0;} - cputenv ("PATH=%s:%s",v2, getenv ("PATH")); - vfree (v1); vfree (v2); - return 1; -} - - -char * Proxy( int input_mode, int write_mode) -{ - static char *proxy; - static int set; - - if ( write_mode==RESET){set=0;vfree (proxy); proxy=NULL;return Proxy(input_mode, SET);} - - if (set); - else if ( input_mode ==INPUT){proxy=get_proxy();} - else if ( input_mode ==ENV ){proxy=get_proxy_from_env (); } - else printf_exit (EXIT_FAILURE,stderr, "Unknown mode for Proxy [FATAL:%s(%s)]", PROGRAM, VERSION); - - set=1; - if (proxy)set_proxy (proxy); - else proxy=vcalloc (1, sizeof (char)); - - return proxy; -} -char * get_proxy_from_env () -{ - char *proxy=NULL; - - if ((proxy=get_string_variable ("cl_proxy"))){;}//Command line proxy always wins - else if ((proxy=getenv ("http_proxy_4_TCOFFEE"))); - else if ((proxy=get_string_variable ("proxy")));//use default T-Coffee proxy - else if ( getenv ("HTTP_proxy") && getenv ("http_proxy")){return getenv ("HTTP_proxy");}//get environement proxy - else if ((proxy=getenv ("HTTP_proxy")));//id - else if ((proxy=getenv ("http_proxy")));//id - else if ((proxy=getenv ("HTTP_PROXY")));//id - else if ((proxy=getenv ("ALL_proxy")));//id - else if ((proxy=getenv ("all_proxy")));//id - else if ((proxy=getenv ("ALL_PROXY")));//id - - if (proxy)set_proxy(proxy); - return proxy; -} -char *get_proxy () -{ - - char *proxy=NULL; - - if ( (proxy=get_proxy_from_env()) && simple_check_internet_connection(NULL)); - else //read in the proxy - { - proxy_message(); - input_proxy(); - proxy=get_proxy(); - } - return proxy; -} -int set_proxy (char *proxy) -{ - char *http; - if (!proxy) return 0; - - cputenv ("HTTP_proxy_4_TCOFFEE=%s", proxy); - cputenv ("HTTP_proxy=%s", proxy); - cputenv ("http_proxy=%s", proxy); - cputenv ("HTTP_PROXY=%s", proxy); - cputenv ("ALL_proxy=%s", proxy); - cputenv ("ALL_PROXY=%s", proxy); - cputenv ("all_proxy=%s", proxy); - - return 1; -} -char *proxy_message () -{ - fprintf ( stderr, "\n\n"); - fprintf ( stderr, "*************************************************************************************************\n"); - fprintf ( stderr, "* IMPORTANT: Please Read Carefuly *\n"); - fprintf ( stderr, "* *\n"); - fprintf ( stderr, "* ------------------------------FIRST RUN CONFIGURATION---------------------------------------- *\n"); - fprintf ( stderr, "* *\n"); - fprintf ( stderr, "* *\n"); - fprintf ( stderr, "* If you are behind a firewall, you must enter your proxy address to use webservices *\n"); - fprintf ( stderr, "* This address is usualy something like: http://some.place.here:8080 *\n"); - fprintf ( stderr, "* *\n"); - fprintf ( stderr, "* The proxy you will provide can be changed anytime by editing the file: *\n"); - fprintf ( stderr, "* %s/t_coffee_env \n",get_dir_4_tcoffee()); - fprintf ( stderr, "*************************************************************************************************\n"); - return NULL; -} -char *input_proxy () -{ - char *proxy; - char *answer; - static int ntries; - - ntries++; - if ( ntries==MAX_N_TRIES) - printf_exit (EXIT_FAILURE,stderr, "\nERROR: Could not use provided proxy. Provide proxy via the -email flag [FATAL:%s]\n", PROGRAM); - - fprintf ( stderr, "\n ### Tip: If you work from home (ADSL) you probably do no need a proxy. "); - fprintf ( stderr, "\n ### Tip: The proxy is the address you may have had to enter in your navigator. "); - - fprintf ( stderr, "\n ### Proxy Input, Enter your proxy and type Return (Type Return if you do not need a proxy): "); - proxy=input_name(); - - if ( !proxy) - { - proxy=vcalloc ( 100, sizeof (char)); - sprintf ( proxy, ""); - } - else if (!strstr (proxy, "."))return input_proxy(); - else - { - int a, c; - a=0; - while ((c=proxy[a])!='\0') - { - if (!isalnum(c) && c!='.' && c!='_' )return input_proxy(); - a++; - } - } - fprintf ( stderr, "\n\tYou have entered:[%s]%s \nIs this correct ([Y] or N)?: ",proxy, (strm (proxy, "")?"(No Proxy Needed)":"")); - answer=input_name(); - - if ( !answer || answer[0]=='y' ||answer[0]=='Y') - { - vfree (answer); - } - else - { - vfree (answer); - return input_proxy (); - } - add2t_coffee_environement ("http_proxy_4_TCOFFEE=%s", proxy); - set_string_variable ("proxy", proxy); - return proxy; -} - - - -char *input_email () -{ - char *email; - char *answer; - static int ntries; - - ntries++; - - if ( ntries==MAX_N_TRIES) - printf_exit (EXIT_FAILURE, stderr,"\nERROR: Could not use provided Email. Provide Email via the -email flag [FATAL:%s]\n", PROGRAM); - - - - fprintf ( stderr, "\n ### EMAIL Input, Enter your email and type Return: "); - email=input_name(); - fprintf ( stderr, "\n\tYou have entered:%s \nIs this correct ([Y] or N)?: ", email); - answer=input_name(); - - if ( !answer || answer[0]=='y' ||answer[0]=='Y') - { - vfree (answer); - } - else - { - vfree (answer); - return input_email (); - } - add2t_coffee_environement("EMAIL_4_TCOFFEE=%s", email); - set_string_variable ("email", email); - return email; -} -static char *ebi_email_message (); -char *ebi_email_message () -{ - fprintf ( stderr, "\n\n"); - fprintf ( stderr, "*************************************************************************************************\n"); - fprintf ( stderr, "* IMPORTANT: Please Read Carefuly *\n"); - fprintf ( stderr, "* *\n"); - fprintf ( stderr, "* ------------------------------FIRST RUN CONFIGURATION---------------------------------------- *\n"); - fprintf ( stderr, "* *\n"); - fprintf ( stderr, "* Some commands of T-Coffee use the EBI BLAST webservices. The EBI requires a valid E-mail *\n"); - fprintf ( stderr, "* address for this service to be used (check: www.ebi.ac.uk/Tools/webservices/). *\n"); - fprintf ( stderr, "* T-Coffee will keep it for further run but only to be used with the EBI webservices. *\n"); - fprintf ( stderr, "* *\n"); - fprintf ( stderr, "* !!!!!!!!!!!!!!!!!! Your Email will not be sent to us, ONLY to the EBI !!!!!!!!!!!!!!!!!!!!!!!!*\n"); - fprintf ( stderr, "* *\n"); - fprintf ( stderr, "* -blast_server=EBI is the default mode of T-Coffee. If you do NOT want to provide your E-mail *\n"); - fprintf ( stderr, "* you can use: *\n"); - fprintf ( stderr, "* -blast_server=NCBI (NCBI netblast) *\n"); - fprintf ( stderr, "* -blast_server=LOCAL Local NCBI BLAST *\n"); - fprintf ( stderr, "* *\n"); - fprintf ( stderr, "*The address you provide can be changed anytime by editing the file: *\n"); - fprintf ( stderr, "* %s/t_coffee_env \n",get_dir_4_tcoffee()); - fprintf ( stderr, "*************************************************************************************************\n"); - return NULL; -} -char *Email( int input_mode, int write_mode) -{ - static char *email; - static int set; - - if ( write_mode==RESET){set=0;vfree (email); email=NULL;return Email(input_mode, SET);} - - if (set); - else if (input_mode ==INPUT){email=get_email();} - else if (input_mode ==ENV ){email=get_email_from_env (); } - else printf_exit (EXIT_FAILURE, stderr,"Unknown mode for Email [FATAL:%s]",PROGRAM); - - set=1; - - if ( email)set_email (email); - else email=vcalloc (1, sizeof (char)); - return email; -} -char *get_email_from_env () -{ - char *email; - - if ( (email=get_string_variable ("cl_email"))); - else if ( (email=get_string_variable ("email"))); - else if ( (email=getenv ("EMAIL_4_TCOFFEE"))); - else if ( (email=getenv ("EMAIL"))); - return email; -} -char *get_email () -{ - char file [1000]; - static char *email; - - if ( (email=get_email_from_env ())); - else - { - ebi_email_message(); - email=input_email(); - } - - if (!strstr (email, "@")) - { - ebi_email_message(); - email=input_email(); - } - set_email (email); - - return email; -} -int set_email (char *email) -{ - if (!email) return 0; - - cputenv ("EMAIL_4_TCOFFEE=%s", email); - cputenv ("EMAIL=%s",email); - return 1; -} -char *chomp (char *name) -{ - int a=0; - while ( name[a]!='\n' && name[a]!='\0')a++; - name[a]='\0'; - return name; -} -static Tmpname *tmpname; -static Tmpname *ntmpname; - -static int n_tmpname; -static int file2remove_flag; - -char *set_file2remove_extension (char *extension, int mode) -{ - static char ext[100]; - if (mode==SET)sprintf (ext, "%s", extension); - else if ( mode==UNSET) ext[0]='\0'; - else if ( mode==GET); - return ext; -} -int flag_file2remove_is_on () -{ - return file2remove_flag; -} -void set_file2remove_on() -{ - file2remove_flag=1; -} -void set_file2remove_off() -{ - file2remove_flag=0; -} - -char *add2file2remove_list (char *name) -{ - - - if ( !tmpname || !name)ntmpname=tmpname=vcalloc ( 1, sizeof (Tmpname)); - else if (!ntmpname->name); - else ntmpname=ntmpname->next=vcalloc ( 1, sizeof (Tmpname)); - - if (!name) return NULL; - - ntmpname->name=vcalloc(strlen(name)+1, sizeof (char)); - - sprintf (ntmpname->name, "%s", name); - return ntmpname->name; -} -//char *short_tmpnam_2(char *s);//used to generate very compact tmp names -void initiate_vtmpnam (char *file) -{ - add2file2remove_list (NULL); - tmpnam_2(NULL); -} -char *vtmpnam ( char *s1) -{ - char *s,*s2; - - n_tmpname++; - - standard_initialisation(NULL, NULL); - - s=vcalloc ( VERY_LONG_STRING, sizeof (char)); - s[0]='\0'; - - s=tmpnam_2 (s); - - s2=add2file2remove_list (s); - if (s!=s2)vfree (s); - if (s1){sprintf (s1, "%s",s2);return s1;} - else return s2; -} - - - -int get_vtmpnam2_root() -{ - int MAX_TMPNAM_ROOT=10000; - static int v; - - if (v) ; - else - { - vsrand(0); - v=rand()%MAX_TMPNAM_ROOT; - } - return v; -} -char *tmpnam_2 (char *s) - { - static int root; - static int file; - char buf[VERY_LONG_STRING]; - static char root2[VERY_LONG_STRING]; - static char *tmpdir; - static int name_size; - - if ( !root || !s) - { - char *vtmpnam_prefixe; - - name_size=MAX( 2*L_tmpnam, MAXNAMES*2)+1; - root=get_vtmpnam2_root(); - sprintf ( root2, "%d%d_", root, (int)getpid()); - - vtmpnam_prefixe=vcalloc (strlen (root2)+strlen (get_tmp_4_tcoffee())+2, sizeof (char)); - sprintf (vtmpnam_prefixe, "%s/%s", get_tmp_4_tcoffee(), root2); - set_string_variable ("vtmpnam_prefixe1", vtmpnam_prefixe); - set_string_variable ("vtmpnam_prefixe2", root2); - vfree (vtmpnam_prefixe); - } - - if (!s)return NULL; - tmpdir=get_tmp_4_tcoffee(); - - sprintf (buf, "%s/%s%d_TCtmp%s",tmpdir,root2, file++,set_file2remove_extension (NULL, GET)); - if ( strlen(buf)>=name_size)s=vrealloc (s,(strlen(buf)+1)*sizeof (char)); - sprintf (s, "%s", buf); - return s; - } -char *short_tmpnam_2(char *s) -{ - static int root; - static int file; - char buf[VERY_LONG_STRING]; - static char root2[VERY_LONG_STRING]; - static char *tmpdir; - static int name_size; - - if ( !root || !s) - { - char *vtmpnam_prefixe; - - name_size=MAX( 2*L_tmpnam, MAXNAMES*2)+1; - root=get_vtmpnam2_root(); - sprintf ( root2, "%d%d", root,getpid()); - - vtmpnam_prefixe=vcalloc (strlen (root2)+strlen (get_tmp_4_tcoffee())+2, sizeof (char)); - sprintf (vtmpnam_prefixe, "%s", root2); - set_string_variable ("vtmpnam_prefixe1", vtmpnam_prefixe); - set_string_variable ("vtmpnam_prefixe2", root2); - vfree (vtmpnam_prefixe); - } - if (!s) return NULL; - - sprintf (buf, "%s%d%s",root2, file++,set_file2remove_extension (NULL, GET)); - if ( strlen(buf)>=name_size)s=vrealloc (s,(strlen(buf)+1)*sizeof (char)); - sprintf (s, "%s", buf); - return s; -} - -char *vremove2 (char *s) -{ - char command[1000]; - char list_file[1000]; - char ***list; - int a; - - - //Remove filenames with a wildcard - - sprintf (list_file, "list_file_%d", (int)getpid()); - sprintf (command, "ls -1 %s>%s 2>/dev/null", s, list_file); - safe_system (command); - - list=file2list (list_file, " "); - - a=0; - while (list && list[a]) - { - if ( check_file_exists (list[a][1])) - { - vremove (list[a][1]); - } - a++; - } - vremove (list_file); - return NULL; -} -char *vremove (char *s) -{ - - - if ( s && strstr (s, "*"))return vremove2(s); - else if ( !s || !check_file_exists(s) ) return NULL; - else if ( isdir (s)) - { - rmdir (s); - return NULL; - } - else - { - remove (s); - return NULL; - } - return NULL; -} -int log_function ( char *fname) -{ - char command[1000]; - - if ( check_file_exists (error_file)) - { - - sprintf ( command, "cp %s %s", error_file, fname); - my_system ( command); - fprintf( stderr,"\n\t******************************************************************"); - fprintf( stderr, "\n\t* Full Log of [%s, %s] in File [%s]",PROGRAM, VERSION, fname); - fprintf( stderr, "\n\t******************************************************************\n"); - } - return 1; -} - -void clean_exit () -{ - - myexit (global_exit_signal); -} -void error_exit ( ) - { - char command[1000]; - char final_report[1000]; - - if ( no_error_report)return; - if ( check_file_exists (error_file)) - { - sprintf ( final_report, "error_report.%s",PROGRAM); - if ( !getenv ("NO_ERROR_REPORT_4_TCOFFEE"))sprintf ( command, "cp %s %s", error_file, final_report); - my_system ( command); - fprintf( stderr,"\n\t******************************************************************"); - fprintf( stderr, "\n\t* Job NOT Completed:[%s, %s]",PROGRAM, VERSION); - fprintf( stderr, "\n\t* Please CHECK: "); - fprintf( stderr, "\n\t* \t-1 The format of your Input Files "); - fprintf( stderr, "\n\t* \t-2 The parameters "); - fprintf( stderr, "\n\t* \t-3 The use of special characters in sequence names:"); - fprintf( stderr, "\n\t* \t\t (@, |, %%...)"); - - fprintf( stderr, "\n\t* \t-4 The Online Doc (%s) ", URL); - fprintf( stderr, "\n\t* \t-5 Send the file %s to:", final_report); - fprintf( stderr, "\n\t* \t\t%s",EMAIL); - - fprintf( stderr, "\n\t* If you run T-Coffee over the WEB:"); - fprintf( stderr, "\n\t* \tWindows Cut and Paste is sometimes erratic and"); - fprintf( stderr, "\n\t* \tit can loose carriage returns. If you suspect this,"); - fprintf( stderr, "\n\t* \ttry to cut and paste through an intermediate application"); - fprintf( stderr, "\n\t* \t(word pad) and inspect the results\n\n"); - fprintf( stderr, "\n\t* CONFIDENTIALITY:"); - fprintf( stderr, "\n\t* \tThe File %s may contain your personnal DATA", final_report); - fprintf( stderr, "\n\t* \tRemove ALL confidential DATA from this file BEFORE sending it"); - fprintf( stderr, "\n\t******************************************************************\n"); - print_command_line(stderr); - } - print_exit_failure_message (); - myexit (global_exit_signal=EXIT_FAILURE); - } -void main_exit () -{ - - clean_function (); - exit (global_exit_signal); -} - - -void clean_function () -{ - Tmpname *b; - char *tmp; - static int done; - Tmpname *start; - - start=tmpname; - - if ( done==1) return; - else done=1; - - kill_child_pid(); - add_method_output2method_log (NULL, NULL, NULL, NULL, decode_name (NULL, CODELIST)); - - //Kill all child processes (if any) - - - if (getenv ("DEBUG_TMP_FILE") && atoi (getenv("DEBUG_TMP_FILE"))==1) - { - fprintf ( stderr, "\n[DEBUG_TMP_FILE:%s] TEMPORARY FILES HAVE NOT Been Removed:", PROGRAM); - while (start) - { - if ( getenv("PRINT_TMPFILE_NAME") && atoi(getenv("PRINT_TMPFILE_NAME"))==1) - { - fprintf ( stderr, "\n\t%s [EXISTS:%s]", tmpname->name, (check_file_exists(tmpname->name))?"YES":"NO"); - } - - b=start; - start=start->next; - vfree(b->name); - vfree(b); - } - fprintf ( stderr, "\n"); - return; - } - else - { - char name[10000]; - char *x; - - //printf_system ("clean_cache.pl -dir=%s -size=0 -age=10 -force",get_tmp_4_tcoffee ()); - //Remove all the temporary files generated during THIS run - while (start) - { - - remove (start->name); - if (isdir(start->name))rrmdir (start->name); - - b=start; - start=start->next; - vfree(b->name);vfree(b); - } - - //Remove potential log - //remove ( TO_NULL_DEVICE); - //Remove all the tmp/tmp* files generated by this process [potentialy generated by secondary processes] - //if ((x=get_string_variable("vtmpnam_prefixe1"))){ sprintf ( name, "%s*", x); vremove(name);} - //Remove all the tmp* file that may have sneaked in (from clustalw for instance) and could be in the current dir - //if ((x=get_string_variable("vtmpnam_prefixe2"))){ sprintf ( name, "%s*", x); vremove(name);} - //remove (get_tmp_4_tcoffee ());//removes tmpdir if empty - } - return; - } -FILE *NFP;/*Null file pointer: should only be open once*/ - -/*********************************************************************/ -/* */ -/* CACHE_FUNCTION */ -/* */ -/* */ -/*********************************************************************/ -static char *cache; -char * prepare_cache ( const char *mode) -{ - char command[1000]; - - cache =vcalloc ( 10000, sizeof(char)); - - if (strm (mode, "use")) - { - sprintf (cache, "%s",get_cache_4_tcoffee()); - } - - else if ( strm (mode, "ignore") || strm (mode, "no")) - { - - cache=vtmpnam(cache); - strcat (cache, "/"); - sprintf ( command, "mkdir %s",cache); - my_system ( command); - } - else if ( strm (mode, "update")) - { - cache=vtmpnam(cache); - strcat (cache, "/"); - sprintf ( command, "mkdir %s",cache); - my_system ( command); - } - else if ( strm (mode, "local")) - { - cache[0]='\0'; - } - else - { - sprintf ( cache, "%s/",mode); - my_mkdir ( cache); - } - return cache; - -} - -char * get_cache_dir() -{ - if ( cache==NULL){cache=vcalloc (1, sizeof (char));cache[0]='\0';} - return cache; -} - -void update_cache () -{ - char command[1000]; - char old_cache[1000]; - - sprintf ( old_cache, "%s", get_cache_dir()); - prepare_cache( "use"); - sprintf ( command, "mv %s* %s",old_cache, get_cache_dir()); - my_system (command); - sprintf ( command, "rmdir %s",old_cache); - my_system (command); -} -void ignore_cache() -{ - char command[1000]; - - if (getenv4debug ("DEBUG_TMP_FILE")) - { - fprintf ( stderr, "\n[DEBUG_TMP_FILE:%s] TEMPORARY CACHE HAS NOT Been Removed:\n\t%s\n", PROGRAM,get_cache_dir()); - } - else - { - - sprintf ( command, "rm -r %s",get_cache_dir()); - my_system (command); - } - return; - -} - - -FILE * vfopen ( char *name_in, char *mode) - { - FILE *fp; - int get_new_name; - int tolerate_mistake; - int cache_used=0; - FILE *tmp_fp; - int c; - static char *name; - static char *name2; - static char *stdin_file; - - - if ( !name_in)return NULL; - if (!name){name=vcalloc (1000, sizeof (char));} - if (!name2){name2=vcalloc (1000, sizeof (char));} - - sprintf ( name, "%s", name_in); - tild_substitute (name, "~", get_home_4_tcoffee()); - - get_new_name=tolerate_mistake=0; - if ( mode[0]=='g'){get_new_name=1; mode++;} - else if ( mode[0]=='t'){tolerate_mistake=1;mode++;} -/*Use the cached version from CACHE_4_TCOFFEE*/ - else if ( mode[0]=='c'){cache_used=1;mode++;} - - if (name==NULL ||strm5 ( name, "no","NO","No","NULL","/dev/null") || strm2 (name, "no_file", "NO_FILE")) - { - if ( NFP==NULL)NFP=fopen (NULL_DEVICE, mode); - return NFP; - } - else if ( strm3 (name,"stderr","STDERR","Stderr"))return stderr; - else if ( strm3 (name,"stdout","STDOUT","Stdout"))return stdout; - else if ( strm3 ( name, "stdin","STDIN","Stdin")) - { - if (!stdin_file) - { - stdin_file=vtmpnam (NULL); - tmp_fp=vfopen ( stdin_file, "w"); - while ( (c=fgetc(stdin))!=EOF)fprintf (tmp_fp, "%c", c); - vfclose ( tmp_fp); - } - return vfopen (stdin_file, "r"); - } - - else if ( strm (name, "") && (strm (mode, "w") ||strm (mode, "a")) )return stdout; - else if ( strm (name, "") && strm (mode, "r"))return stdin; - else if ( (fp= fopen ( name, mode))==NULL) - { - if ( strcmp (mode, "r")==0 && cache_used==0) - { - sprintf ( name2, "%s%s",get_cache_dir(), name); - return vfopen ( name2, "cr"); - } - else if ( strcmp (mode, "r")==0 && cache_used==1) - { - fprintf (stderr, "\nCOULD NOT READ %s\n", name); - if ( get_new_name){fprintf ( stderr, "\nNew name: ");return vfopen (input_name(), mode-1);} - else if ( tolerate_mistake)return NULL; - else - { - fprintf (stderr, "\nFORCED EXIT (NON INTERACTIVE MODE)\n"); - if ( getenv ( "DEBUG_TCOFFEE"))crash ("DEBUG"); - else myexit (EXIT_FAILURE); - } - } - else if ( strcmp (mode, "a")==0 && cache_used==0) - { - sprintf ( name2, "%s%s",get_cache_dir(), name); - return vfopen ( name, "ca"); - } - else if ( strcmp (mode, "a")==0 && cache_used==1) - { - fprintf (stderr, "\nCOULD NOT Append anything to %s\n", name); - if ( get_new_name){fprintf ( stderr, "\nNew name: ");return vfopen (input_name(), mode-1);} - else if ( tolerate_mistake)return NULL; - else - { - fprintf (stderr, "\nFORCED EXIT (NON INTERACTIVE MODE)\n"); - myexit (EXIT_FAILURE); - } - } - else if ( strcmp (mode, "w")==0) - { - fprintf (stderr, "\nCANNOT WRITE %s\n", name); - if ( get_new_name==1){fprintf ( stderr, "\nNew name: ");return vfopen (input_name(), mode-1);} - else if ( tolerate_mistake)return NULL; - else - { - fprintf (stderr, "\nFORCED EXIT (NON INTERACTIVE MODE): %s %s\n", (strcmp ( mode, "r")==0)?"READ":"WRITE", name); - myexit(EXIT_FAILURE); - } - } - } - else - return fp; - - return NULL; - } - -FILE * vfclose ( FILE *fp) - { - if ( fp==NFP)return NULL; - if ( fp==stdout)return stdout; - if ( fp==stderr)return stderr; - if ( fp==stdin) return stdin; - if ( fp==NULL)return NULL; - else fclose (fp); - return NULL; - } - - -int echo ( char *string, char *fname) -{ -int a; -/* -description: -prints the content of string into file fname - -in: -string= string to print -fname =name of the file to create -*/ - -FILE *fp; - - fp=vfopen ( fname, "w"); - fprintf (fp, "%s", string); - a=fclose (fp); - return a; - -} - -int file_cat ( char *from, char *to) -{ - FILE *fp; - //appends the content of file1 to file 2 - if (!(fp=vfopen (to, "a")))return 0; - if (!display_file_content (fp, from)) return 0; - vfclose (fp); - return 1; -} - -FILE* display_file_content (FILE *output, char *name) -{ - FILE *fp; - int c; - if ( !name || !check_file_exists (name) || !(fp=vfopen (name, "r")))return NULL; - while ( (c=fgetc(fp))!=EOF)fprintf (output,"%c", c); - vfclose (fp); - return output; -} - -char ***file2list ( char *name, char *sep) -{ - /*Rturns an array where - list[0]: first line - list[0][0]: number of words - list[0][1]:first word; - list[n]=NULL - */ - char **lines, ***list; - int a, n; - - lines=file2lines (name); - if (!lines) return NULL; - else - { - n=atoi (lines[0]); - - list=vcalloc ( n+1, sizeof (char**)); - for ( a=1; anlines; - lines[1]->first_line - */ - char **lines; - char *string; - - - string=file2string (name); - if ( !string) return NULL; - else - { - lines=string2list2(string, "\n"); - vfree ( string); - return lines; - } -} - -char *string2file ( char *string, char *file, char *mode) -{ - FILE *fp; - if (!file) - file=vtmpnam (NULL); - fp=vfopen (file, mode); - fprintf (fp, "%s", string); - vfclose (fp); - return file; -} -char *file2string (char *name) -{ - FILE*fp; - char *string; - int a, c; - - if (!name || !check_file_exists (name))return NULL; - else - { - string=vcalloc ( count_n_char_in_file(name)+1, sizeof (char)); - fp=vfopen (name, "r");a=0; - while ( (c=fgetc(fp))!=EOF) - { - string[a++]=c; - } - string[a]='\0'; - vfclose (fp); - return string; - } -} - -int get_cl_param (int argc, char **argv, FILE **fp,char *para_name, int *set_flag, char *type, int optional, int max_n_val,char *usage, ...) - { - /* - usage: - argc: n_ arg - argv list * - para_name param - set_flag set to 1 if param set; - para_type F, I, S, R_FN (read_file, name), W_FN (written file, name), R_FP (pointer) - max_n_val maximum number of values; - optional 1 for yes, 0 for no - usage usage list with optional value; - val pointer to the varaible holding the value(s) - default1 default value (if value id not there) - default2 default value if the flag is there but no value set ("")indicates an error - range_left min value ( "any" for any); - range_right max_value ( "any" for any); - */ - int pos=0; - int a; - va_list ap; - - int *int_val=NULL; - float *float_val=NULL; - char **string_val=NULL; - - - char *range_right; - char *range_left; - - - char *default_value1; - char *default_value2; - int n_para=0; - double max, min; - - static char **parameter_list; - static int number_of_parameters; - - char **para_name_list; - int n_para_name; - - char **para_val; - int n_para_val; - - char **pv_l=NULL; - int n_pv_l; - char **pv_r=NULL; - int n_pv_r; - char value[STRING]; - - - -/*CHECK THAT ALL THE PARAM IN ARG EXIST*/ - if ( para_name==NULL) - { - for ( a=1; a< argc; a++) - { - if ( is_parameter ( argv[a])) - if ( name_is_in_list ( argv[a], parameter_list, number_of_parameters, STRING)==-1) - { - fprintf ( stderr, "\n%s IS NOT A PARAMETER OF %s [FATAL/%s %s]\n",argv[a], argv[0], argv[0], VERSION); - myexit(EXIT_FAILURE); - } - - - } - - free_char (parameter_list,-1); - return 0; - } - - if ( parameter_list==NULL)parameter_list=declare_char(MAX_N_PARAM,STRING); - para_name_list=get_list_of_tokens(para_name,NULL, &n_para_name); - for ( a=0; a< n_para_name; a++) - { - sprintf ( parameter_list[number_of_parameters++],"%s", para_name_list[a]); - } - free_char(para_name_list,-1); - - - - - - set_flag[0]=0; - va_start (ap, usage); - - if (strm3 (type, "S","R_F","W_F")) - string_val=va_arg(ap, char**); - else if (strm2 (type, "D","FL")) - int_val=va_arg(ap, int*); - else if (strm (type, "F")) - float_val=va_arg(ap, float*); - else - myexit (EXIT_FAILURE); - - - - default_value1=va_arg(ap, char*); - default_value2=va_arg(ap, char*); - range_left =va_arg(ap, char*); - range_right =va_arg(ap, char*); - va_end(ap); - - - para_name_list=get_list_of_tokens(para_name, NULL, &n_para_name); - for ( a=0; a=max_n_val) - { - n_para=max_n_val-1; - - } - if ( !(strm ( argv[a], "NULL"))) - { - if ( strm3(type, "S", "R_F", "W_F")) - { - sprintf ( string_val[n_para],"%s", argv[a]); - } - else if (strm (type, "D")) - { - int_val[n_para]=atoi(argv[a]); - } - else if (strm ( type,"F")) - { - float_val[n_para]=atof(argv[a]); - } - } - n_para++; - } - } - - if ( n_para==0 && !strm2(default_value2,"","NULL") && !strm(type, "FL")) - { - para_val=get_list_of_tokens(default_value2, NULL, &n_para_val); - for ( n_para=0; n_paramax) - { - fprintf ( stderr, "\n%s out of range [%d %d] [FATAL/%s]\n", para_name, (int)min, (int)max,argv[0]); - myexit (EXIT_FAILURE); - } - } - else - { - sprintf ( value, "%d", int_val[a]); - if ( name_is_in_list(value, pv_l, n_pv_l, STRING)==-1) - fprintf ( stderr, "\n%s out of range [%s: ", para_name, value); - print_array_char (stderr, pv_l, n_pv_l, " "); - fprintf ( stderr, "\n"); - myexit(EXIT_FAILURE); - } - } - else if ( strm (type, "F")) - { - if ( n_pv_l==1) - { - min=(double)atof(range_left); - max=(double)atof(range_right); - if ( float_val[a]max) - { - fprintf ( stderr, "\n%s out of range [%f %f] [FATAL/%s]\n", para_name, (float)min, (float)max,argv[0]); - myexit (EXIT_FAILURE); - } - } - else - { - sprintf ( value, "%f", float_val[a]); - if ( name_is_in_list(value, pv_l, n_pv_l, STRING)==-1) - fprintf ( stderr, "\n%s out of range [%s: ", para_name, value); - print_array_char (stderr, pv_l, n_pv_l, " "); - fprintf ( stderr, "\n"); - myexit(EXIT_FAILURE); - } - } - } - - - if ( fp[0]!=NULL) - { - fprintf (fp[0], "%-15s\t%s\t[%d] ", para_name, type, set_flag[0]); - for (a=0; a=buf_len) - {buf_len+=100;buf=vrealloc (buf, buf_len*sizeof (char));} - buf[l++]=c; - } - /*Add the cariage return*/ - if ( c=='\n') - { - if (l>=buf_len){buf_len+=100,buf=vrealloc (buf, buf_len*sizeof (char));} - buf[l++]='\n'; - } - /*add the terminator*/ - if (l>=buf_len){buf_len+=100,buf=vrealloc (buf, buf_len*sizeof (char));} - buf[l]='\0'; - - if ( bufin!=buf && bufin!=NULL && debug==1) - fprintf ( stderr, "\nPointer change in vfgets..."); - - return buf; -} - - -FILE * find_token_in_file ( char *fname, FILE * fp, char *token) - { - int c; - static char *name; - int token_len; - - int only_start; - - /*Note: Token: any string - If Token[0]=='\n' Then Token only from the beginning of the line - */ - - if (!fp && !check_file_exists(fname))return NULL; - - if ( token[0]=='\n'){token++;only_start=1;} - else only_start=0; - - token_len=strlen (token); - - - - - - if (!fp) - { - if (name)vfree (name); - name = vcalloc (((fname)?measure_longest_line_in_file (fname):10000)+1, sizeof (char)); - fp=vfopen ( fname, "r"); - } - - while ( (fscanf ( fp, "%s", name))!=EOF) - { - - if ( name[0]=='*')while ( ((c=fgetc (fp))!='\n')&& (c!=EOF)); - else if (strncmp ( name, token,token_len)==0){return fp;} - else if (only_start) while ( ((c=fgetc (fp))!='\n')&& (c!=EOF)); - } - - vfclose ( fp); - return NULL; - } -int **get_file_block_pattern (char *fname, int *n_blocks, int max_n_line) - { - int c; - FILE *fp; - char *line; - int lline; - int **l; - int in_block; - - int max_block_size; - int block_size; - int x; - int n_line; - - lline=measure_longest_line_in_file (fname)+1; - line=vcalloc ( sizeof (char),lline+1); - - fp=vfopen (fname, "r"); - max_block_size=block_size=0; - in_block=1; - n_blocks[0]=0; - n_line=0; - while ((c=fgetc(fp))!=EOF && (n_line max_block_size)fprintf ( stderr, "\nERROR %d", l[n_blocks[0]][0]); - - l[n_blocks[0]] [l[n_blocks[0]][0]]=x; - } - else - { - in_block=0; - } - } - n_blocks[0]++; - vfree(line); - vfclose (fp); - return l; - } - -char * strip_file_from_comments (char *com, char *in_file) -{ - /*Removes in file in_file every portion of line to the right of one of the symbols included in com - Writes the striped file into a vtmpnam file - */ - FILE *fp1; - FILE *fp2; - char *out_file; - int c; - - out_file=vtmpnam(NULL); - - - fp1=vfopen (in_file , "r"); - fp2=vfopen (out_file, "w"); - while ( (c=fgetc(fp1))!=EOF) - { - if (strchr(com, c)) - { - while ( (c=fgetc(fp1))!='\n' && c!=EOF); - } - else - { - fprintf (fp2, "%c", c); - while ( (c=fgetc(fp1))!='\n' && c!=EOF)fprintf (fp2, "%c", c); - if ( c!=EOF)fprintf (fp2, "%c", c); - } - } - vfclose (fp1); - vfclose (fp2); - - return out_file; -} -FILE * skip_commentary_line_in_file ( char com, FILE *fp) -{ - int c=0; - - if ( fp==NULL)return NULL; - while ((c=fgetc(fp))==com) - { - while ((c=fgetc(fp))!='\n' && c!=EOF); - } - if ( c!=EOF && c!='\n')ungetc(c, fp); - return fp; -} - -int check_for_update ( char *web_address) -{ - char command[1000]; - char *file; - float new_version, old_version; - FILE *fp; - - check_internet_connection (IS_NOT_FATAL); - file=vtmpnam(NULL); - - sprintf ( command, "%s/%s.version",DISTRIBUTION_ADDRESS, PROGRAM); - url2file ( command, file); - - fp=vfopen ( file, "r"); - fscanf ( fp, "Version_%f", &new_version); - vfclose ( fp); - sscanf ( VERSION, "Version_%f", &old_version); - - if ( old_version new_version) - { - fprintf ( stdout, "\nUpdate Status: beta-release"); - fprintf ( stdout, "\nYour are using a beta-release of %s(%s)\n", PROGRAM, VERSION); - } - else - { - fprintf (stdout, "\nUpdate Status: uptodate"); - fprintf (stdout, "\nProgram %s(%s) is up to date\n", PROGRAM, VERSION); - } - return EXIT_SUCCESS; -} - - - - - -int check_environement_variable_is_set ( char *variable, char *description, int fatal) -{ - if ( getenv (variable)==NULL) - { - fprintf ( stderr, "\nERROR: You must set %s\n%s %s", variable, description, description); - if ( fatal==IS_FATAL) - { - fprintf ( stderr, "\n[%s:FATAL]\n", PROGRAM); - exit (EXIT_FAILURE); - } - else - fprintf ( stderr, "\n[%s:WARNING]\n", PROGRAM); - } - return 1; -} - -int url2file (char *address, char *out) -{ - char command[1000]; - - - if (check_program_is_installed ("wget",NULL, NULL,WGET_ADDRESS, IS_NOT_FATAL))sprintf (command, "wget %s -O%s >/dev/null 2>/dev/null", address, out); - else if (check_program_is_installed ("curl",NULL, NULL,CURL_ADDRESS, IS_NOT_FATAL))sprintf (command, "curl %s -o%s >/dev/null 2>/dev/null", address, out); - else - { - printf_exit (EXIT_FAILURE, stderr, "ERROR: Impossible to fectch external file: Neither wget nor curl is installed on your system [FATAL:%s]\n", PROGRAM); - return EXIT_FAILURE; - } - return safe_system (command); -} - -int wget (char *address, char *out) -{ - return printf_system ( "curl %s -O%s >/dev/null 2>/dev/null", address, out); - } - -int curl (char *address, char *out) -{ - return printf_system ( "curl %s -o%s >/dev/null 2>/dev/null", address, out); -} - - -int simple_check_internet_connection (char *ref_site) -{ - char *test,command[1000]; - int n, internet=0; - - test=vtmpnam (NULL); - if (url2file((ref_site)?ref_site:TEST_WWWSITE_4_TCOFFEE,test)!=EXIT_SUCCESS)internet=0; - else if ((n=count_n_char_in_file(test))<10)internet=0; - else internet =1; - - return internet; -} -int check_internet_connection (int mode) -{ - int internet; - internet=simple_check_internet_connection (NULL); - if (internet)return 1; - else if ( mode == NON_INTERACTIVE)return internet; - else if ( mode == IS_FATAL) - { - add_warning ( stderr,"\nERROR: You do not seem to have an active Internet Connection. Check your Proxy Setting [proxy:%s][%s:SERIOUS]\n", getenv ("http_proxy"), PROGRAM); - return EXIT_FAILURE; - } - else if ( mode==IS_NOT_FATAL) - { - - fprintf ( stderr, "\n\n\n"); - fprintf ( stderr, "*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); - fprintf ( stderr, "*\n"); - fprintf ( stderr, "* Your Internet Connection Does not seem to be available. You may need to reconfigure your Proxy\n"); - fprintf ( stderr, "* ------- Current Proxy Value: [%s]\n*\n", (getenv ("http_proxy")?getenv ("http_proxy"):"NO Proxy Set")); - Proxy(INPUT, RESET); - - return check_internet_connection (IS_FATAL); - } - else - { - return 0; - } -} -char *pg2path (char *pg) -{ - char *path; - char *p; - char *tmp; - FILE *fp; - - if ( !pg) return NULL; - tmp=vtmpnam(NULL); - - printf_system_direct("which %s>%s 2>/dev/null", pg, tmp); - path=file2string (tmp); - chomp (path); - if (!check_file_exists (path) && !strstr (pg, ".exe")) - { - char pg2[1000]; - sprintf ( pg2, "%s.exe", pg); - path=pg2path (pg2); - } - - return path; -} - - -int check_program_is_installed ( char *program_name, char *path_variable, char *path_variable_name, char *where2getit, int fatal) - { - char command[LONG_STRING]; - static char *path; - - if ( strm (where2getit, "built_in"))return 1; - - if (path)vfree (path); - - if ( check_file_exists (path_variable)) - { - return 1; - } - else - { - - path=pg2path (program_name); - if (path && path[0])return 1; - else - { - int install=EXIT_FAILURE; - - if (fatal==INSTALL || fatal==INSTALL_OR_DIE) - { - HERE ("************** %s is missing from your system. T-Coffee will make an attempt to install it.\n", program_name); - install=printf_system ("install.pl %s -plugins=%s -clean", program_name, get_plugins_4_tcoffee(NULL)); - } - if ( install==EXIT_SUCCESS)return 1; - else if ( fatal==INSTALL)return 0; - else if ( fatal==NO_REPORT)return 0; - - if (fatal==IS_FATAL || fatal==INSTALL_OR_DIE)check_configuration4program(); - - fprintf ( stderr, "\n#*****************************************************************"); - if (fatal) fprintf ( stderr, "\n#ERROR [FATAL:%s]", PROGRAM); - else fprintf ( stderr, "\n#WARNING [%s]", PROGRAM); - fprintf ( stderr, "\n# The Program %s Needed by %s Could not be found", program_name, PROGRAM); - fprintf ( stderr, "\n# If %s is installed on your system:", program_name); - fprintf ( stderr, "\n#\t -Make sure %s is in your $path:",program_name); - - fprintf ( stderr, "\n# If %s is NOT installed obtain a copy from:", program_name); - fprintf ( stderr, "\n#\t%s\n#\n#",where2getit); - fprintf ( stderr, "\n# and install it manualy"); - fprintf ( stderr, "\n******************************************************************\n"); - } - } - if ( fatal==IS_FATAL || fatal==INSTALL_OR_DIE) myexit (EXIT_FAILURE); - return 0; - } - -FILE * display_output_filename ( FILE *io, char *type, char *format, char *name, int check_output) -{ - static char ***buf; - static int nbuf; - - if ( strm ( name, "stdout") || strm (name, "stderr"))return io; - - if ( check_output==STORE) - { - int a; - if ( buf==NULL)buf=vcalloc ( 1000, sizeof (char**)); - - for (a=0; a%s 2>/dev/null", (path!=NULL)?path:"", (path!=NULL)?"/":"",file, tmpfile); - safe_system (command); - fp=vfopen (tmpfile, "r"); - if (!fscanf ( fp, "%s", state)) - { - vfclose(fp); return 0; - } - vfclose (fp); - return state; -} - -int my_mkdir ( char *dir_in) -{ - - int dir_sep='/'; - - int a, buf; - char *dir; - - dir=vcalloc ( strlen (dir_in)+strlen (get_home_4_tcoffee())+100, sizeof (char)); - sprintf ( dir, "%s", dir_in); - tild_substitute ( dir, "~",get_home_4_tcoffee()); - - - - a=0; - - while (dir[a]!='\0') - { - - if ( dir[a]==dir_sep || dir[a+1]=='\0') - { - buf= dir[a+1]; - dir[a+1]='\0'; - - if (access(dir, F_OK)==-1) - { - - printf_system_direct("mkdir %s", dir); - - if ( access (dir, F_OK)==-1) - { - fprintf ( stderr, "\nERROR: Could Not Create Directory %s [FATAL:%s]", dir, PROGRAM); - exit (EXIT_FAILURE); - } - } - dir[a+1]=buf; - } - a++; - } - - vfree (dir); - return 1; -} - -int filename_is_special (char *fname) -{ - if ( strm5 (fname, "default", "stdin", "stdout","stderr", "/dev/null"))return 1; - if ( strm3 (fname, "STDIN", "STDOUT", "STDERR"))return 1; - return 0; -} - -char* check_file_exists ( char *fname_in) - { - FILE *fp; - static char *fname1; - static char *fname2; - - - if (!fname_in)return NULL; - if (!fname_in[0])return NULL; - if (fname_in[0]=='-')return NULL; - - if (!fname1){fname1=vcalloc (1000, sizeof (char));} - if (!fname2){fname2=vcalloc (1000, sizeof (char));} - - sprintf ( fname1, "%s", fname_in);tild_substitute (fname1, "~", get_home_4_tcoffee()); - sprintf ( fname2, "%s%s", get_cache_dir(),fname1); - - if ( filename_is_special (fname1))return fname1; - if ( strm5 (fname1, "no", "NO", "No", "NO_FILE","no_file"))return NULL/*fname1*/; - if (!file_exists( NULL,fname1)) - { - if (!file_exists (NULL,fname2))return NULL; - else return fname2; - } - else return fname1; - return NULL; - } - - -void create_file ( char *name) - { - FILE *fp; - - fp=fopen (name, "w"); - fclose (fp); - } -void delete_file ( char *fname) - { - char command[1000]; - FILE * fp; - - fp=fopen ( fname, "w"); - fprintf ( fp, "x"); - fclose ( fp); - - sprintf ( command, "rm %s", fname); - my_system ( command); - - } - -int util_rename ( char *from, char *to) - { - FILE *fp_from; - FILE *fp_to; - int c; - - - if ( !check_file_exists (from))return 0; - else if ( check_file_exists (to) && !vremove (to) && !rename ( from, to)==0 ); - else - { - - fp_from=vfopen ( from, "r"); - fp_to=vfopen ( to, "w"); - - while ( (c=fgetc (fp_from))!=EOF)fprintf ( fp_to, "%c", c); - - fclose (fp_from); - fclose ( fp_to); - - vremove ( from); - return 1; - } - return 0; - } - - -int util_copy ( char *from, char *to) - { - FILE *fp_from; - FILE *fp_to; - int c; - - - if (!check_file_exists (from))return 0; - else - { - - fp_from=vfopen ( from, "r"); - fp_to=vfopen ( to, "w"); - - while ( (c=fgetc (fp_from))!=EOF)fprintf ( fp_to, "%c", c); - - fclose (fp_from); - fclose ( fp_to); - return 1; - } - return 0; - } -FILE * output_completion4halfmat ( FILE *fp,int n, int tot, int n_reports, char *s) - -{ - int max, left, achieved; - int up; - - if (n>=0)up=1; - else up=-1; - - - max=((tot*tot)-tot)/2; - left=((tot-n)*(tot-n)-(tot-n))/2; - - achieved=max-left; - if (up==1); - else - { - int b; - b=achieved; - achieved=left; - left=b; - } - return output_completion (fp,achieved, max, n_reports, s); -} - - -FILE * output_completion ( FILE *fp,int n, int tot, int n_reports, char *string) - { - - static int ref_val; - static int flag; - static int ref_time; - int t, elapsed; - n++; - - if ( n==1) - { - ref_val=flag=0; - ref_time=get_time()/1000; - } - t=get_time()/1000; - elapsed=t-ref_time; - - if ( !ref_val && !flag) - { - fprintf (fp, "\n\t\t[%s][TOT=%5d][%3d %%][ELAPSED TIME: %4d sec.]",(string)?string:"",tot,(tot==1)?100:0, elapsed); - flag=1; - } - else if ( n==tot)fprintf (fp, "\r\t\t[%s][TOT=%5d][%3d %%][ELAPSED TIME: %4d sec.]",(string)?string:"", tot,100, elapsed); - else if ( ((n*100)/tot)>ref_val) - { - ref_val=((n*100)/tot); - t=(ref_val==0)?0:elapsed/ref_val; - t=t*(100-ref_val); - t=0; - fprintf (fp, "\r\t\t[%s][TOT=%5d][%3d %%][ELAPSED TIME: %4d sec.]", (string)?string:"",tot,ref_val, elapsed); - flag=0; - } - return fp; - } -void * null_function (int a,...) -{ - fprintf ( stderr, "\n[ERROR] Attempt to use the Null Function [FATAL:%s]", PROGRAM); - crash (""); - return NULL; -} - -int btoi ( int nc,...) -{ - va_list ap; - int a, b; - va_start (ap, nc); - for ( a=0, b=0; a< nc; a++) - { - b+=pow(2,a)*va_arg (ap,int); - } - va_end(ap); - return b; -} - -/*********************************************************************/ -/* */ -/* Geometric FUNCTIONS */ -/* */ -/* */ -/*********************************************************************/ - -float get_geometric_distance ( float ** matrix, int ncoor, int d1, int d2, char *mode) -{ - float d; - float t=0; - int a; - - if ( strm (mode, "euclidian")) - { - for ( a=0; a< ncoor; a++) - { - d=(matrix[d1][a]-matrix[d2][a]); - t+=d*d; - } - return (float)sqrt((double)t); - } - return 0; -} - - - -/*********************************************************************/ -/* */ -/* MATHEMATICAL FUNCTIONS */ -/* */ -/* */ -/*********************************************************************/ -static double EXP_UNDERFLOW_THRESHOLD = -4.60f; -static double LOG_UNDERFLOW_THRESHOLD = 7.50f; -static double LOG_ZERO = -FLT_MAX; -static double LOG_ONE = 0.0f; -double log_addN (int N, double*L) - -{ - double v; - int a; - if (N==0)return 0; - if ( N==1)return L[0]; - - v=L[0]; - for ( a=1; a= LOG_UNDERFLOW_THRESHOLD)) ? y : log (exp (x-y) + 1) + x; - else - x = (y == LOG_ZERO || ((x - y) >= LOG_UNDERFLOW_THRESHOLD)) ? x : log (exp (x-y) + 1) + y; - return x; -} - - - - -float M_chooses_Nlog ( int m, int N) -{ - /*Choose M elemets in N*/ - float z1, z2,z=0; - if ( m==N) return 0; - else if ( m>N) - { - fprintf ( stderr, "\nERROR: M chosses N out of bounds ( M>N) [FATAL:%s]", PROGRAM); - myexit (EXIT_FAILURE); - } - else - { - z1=factorial_log (m+1, N); - z2=factorial_log (1, N-m); - z=z1-z2; - return z; - } - - return -1; -} - -float factorial_log ( int start, int end) -{ - if ( end==0)return 0; - else if ( end==start) return (float)my_int_log((double)start); - else if ( start>end) - { - fprintf ( stderr, "\nERROR: factorial log out of bounds (%d %d) [FATAL:%s]",start, end, PROGRAM); - myexit (EXIT_FAILURE); - } - else - { - int a=0; - float x=0; - for ( x=0,a=start; a<=end; a++) - { - x+=(float)my_int_log(a); - } - return x; - } - return 0; -} - -float my_int_log(int a) -{ - - if ( a>=100000)return log(a); - else - { - static float *lu; - if (!lu) lu=vcalloc ( 100000, sizeof (float)); - if ( !lu[a]){lu[a]=log(a);} - return lu[a]; - } - return 0; -} - -double factorial (int start, int end); -double M_chooses_N ( int m, int N) -{ - /*Choose M elemets in N*/ - if ( m==N) return 1; - else if ( m>N) - { - fprintf ( stderr, "\nERROR: M chosses N out of bounds ( M>N) [FATAL:%s]", PROGRAM); - myexit (EXIT_FAILURE); - } - else if ( N<50) - { - return factorial (m+1, N)/factorial (1, N-m); - } - else - { - fprintf ( stderr, "\nERROR: M chosses N out of bounds ( N>50). Use log space [FATAL:%s]", PROGRAM); - myexit (EXIT_FAILURE); - } - return -1; -} -double factorial (int start, int end) - { - - if ( start>end || start<0 || end<0) - { - fprintf ( stderr, "\nERROR: Negative Factorial [FATAL:%s]", PROGRAM); - myexit ( EXIT_FAILURE); - } - else if (end==0) return 1; - else if (end==start) return end; - else - { - static double **lu; - if ( !lu)lu=declare_double (100, 100); - - if ( lu[start][end])return lu[start][end]; - else - { - int a; - lu[start][end]=(double)start; - for ( a=start+1; a<=end; a++) - { - lu[start][end]*=(double)a; - } - return lu[start][end]; - } - } - return -1; - } -/*********************************************************************/ -/* */ -/* Fast Log Additions (adapted from Probcons)*/ -/* */ -/* */ -/*********************************************************************/ -double EXP (double x){ - //return exp(x); - if (x > -2){ - if (x > -0.5){ - if (x > 0) - return exp(x); - return (((0.03254409303190190000*x + 0.16280432765779600000)*x + 0.49929760485974900000)*x + 0.99995149601363700000)*x + 0.99999925508501600000; - } - if (x > -1) - return (((0.01973899026052090000*x + 0.13822379685007000000)*x + 0.48056651562365000000)*x + 0.99326940370383500000)*x + 0.99906756856399500000; - return (((0.00940528203591384000*x + 0.09414963667859410000)*x + 0.40825793595877300000)*x + 0.93933625499130400000)*x + 0.98369508190545300000; - } - if (x > -8){ - if (x > -4) - return (((0.00217245711583303000*x + 0.03484829428350620000)*x + 0.22118199801337800000)*x + 0.67049462206469500000)*x + 0.83556950223398500000; - return (((0.00012398771025456900*x + 0.00349155785951272000)*x + 0.03727721426017900000)*x + 0.17974997741536900000)*x + 0.33249299994217400000; - } - if (x > -16) - return (((0.00000051741713416603*x + 0.00002721456879608080)*x + 0.00053418601865636800)*x + 0.00464101989351936000)*x + 0.01507447981459420000; - return 0; -} - -float LOOKUP (float x){ - - if (x <= 1.00f) return ((-0.009350833524763f * x + 0.130659527668286f) * x + 0.498799810682272f) * x + 0.693203116424741f; - if (x <= 2.50f) return ((-0.014532321752540f * x + 0.139942324101744f) * x + 0.495635523139337f) * x + 0.692140569840976f; - if (x <= 4.50f) return ((-0.004605031767994f * x + 0.063427417320019f) * x + 0.695956496475118f) * x + 0.514272634594009f; - - return ((-0.000458661602210f * x + 0.009695946122598f) * x + 0.930734667215156f) * x + 0.168037164329057f; -} -void LOG_PLUS_EQUALS (float *x, float y){ - - if (x[0] < y) - x[0] = (x[0] == LOG_ZERO || y - x[0] >= LOG_UNDERFLOW_THRESHOLD) ? y : LOOKUP(y-x[0]) + x[0]; - else - x[0] = (y == LOG_ZERO || x[0] - y >= LOG_UNDERFLOW_THRESHOLD) ? x[0] : LOOKUP(x[0]-y) + y; -} - -float LOG_ADD (float x, float y){ - if (x < y) return (x == LOG_ZERO || y - x >= LOG_UNDERFLOW_THRESHOLD) ? y : LOOKUP(y-x) + x; - return (y == LOG_ZERO || x - y >= LOG_UNDERFLOW_THRESHOLD) ? x : LOOKUP(x-y) + y; -} - -float LOG_ADD3 (float x1, float x2, float x3){ - return LOG_ADD (x1, LOG_ADD (x2, x3)); -} -float LOG_ADD4 (float x1, float x2, float x3, float x4){ - return LOG_ADD (x1, LOG_ADD (x2, LOG_ADD (x3, x4))); -} -float LOG_ADD5 (float x1, float x2, float x3, float x4, float x5){ - return LOG_ADD (x1, LOG_ADD (x2, LOG_ADD (x3, LOG_ADD (x4, x5)))); -} -float LOG_ADD6 (float x1, float x2, float x3, float x4, float x5, float x6){ - return LOG_ADD (x1, LOG_ADD (x2, LOG_ADD (x3, LOG_ADD (x4, LOG_ADD (x5, x6))))); -} -float LOG_ADD7 (float x1, float x2, float x3, float x4, float x5, float x6, float x7){ - return LOG_ADD (x1, LOG_ADD (x2, LOG_ADD (x3, LOG_ADD (x4, LOG_ADD (x5, LOG_ADD (x6, x7)))))); -} - - -#define LONG_SIZE 2 -#define SHORT_SIZE 1 -#define SPACE_PAD 4 -#define STD_SIZE 0 -char *strscn(char *s, char *pattern); -long unsigned strtou(char *s, int base, char **scan_end); -long int strtoi(char *s, int base, char **scan_end); -int my_isnumber(char c, int base); -int tonumber(char c); - -int my_vsscanf(char *buf, char *fmt, va_list parms) - { - int scanned = 0, size = 0, suppress = 0; - int w = 0, flag = 0, l = 0; - char c, *c_ptr; - long int n1, *n1l; - int *n1b; - short int *n1s; - long unsigned n2, *n2l, parsing = 0; - unsigned *n2b; - short unsigned *n2s; - double n3, *n3l; - float *n3s; - char *base = buf; - while (*fmt != 0) { - if (*fmt != '%' && !parsing) { - /* No token detected */ - fmt++; - } else { - /* We need to make a conversion */ - if (*fmt == '%') { - fmt++; - parsing = 1; - size = STD_SIZE; - suppress = 0; - w = 0; - flag = 0; - l = 0; - } - /* Parse token */ - switch (*fmt) { - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - case '0': - if (parsing == 1) { - w = strtou(fmt, 10, &base); - /* We use SPACE_PAD to parse %10s - * commands where the number is the - * maximum number of char to store! - */ - flag |= SPACE_PAD; - fmt = base - 1; - } - break; - case 'c': - c = *buf++; - c_ptr = va_arg(parms, char *); - *c_ptr = c; - scanned++; - parsing = 0; - break; - case 's': - c_ptr = va_arg(parms, char *); - while (*buf != 0 && isspace(*buf)) - buf++; - l = 0; - while (*buf != 0 && !isspace(*buf)) { - if (!(flag & SPACE_PAD)) - *c_ptr++ = *buf; - else if (l < w) { - *c_ptr++ = *buf; - l++; - } - buf++; - } - *c_ptr = 0; - scanned++; - parsing = 0; - break; - case 'i': - case 'd': - buf = strscn(buf, "1234567890-+"); - n1 = strtoi(buf, 10, &base); - buf = base; - if (!suppress) { - switch (size) { - case STD_SIZE: - n1b = va_arg(parms, int *); - *n1b = (int) n1; - break; - case LONG_SIZE: - n1l = va_arg(parms, - long int *); - *n1l = n1; - break; - case SHORT_SIZE: - n1s = va_arg(parms, - short int *); - *n1s = (short) (n1); - break; - } - scanned++; - } - parsing = 0; - break; - case 'u': - buf = strscn(buf, "1234567890"); - n2 = strtou(buf, 10, &base); - buf = base; - if (!suppress) { - switch (size) { - case STD_SIZE: - n2b = va_arg(parms, - unsigned *); - *n2b = (unsigned) n2; - break; - case LONG_SIZE: - n2l = va_arg(parms, - long unsigned *); - *n2l = n2; - break; - case SHORT_SIZE: - n2s = va_arg(parms, short unsigned - *); - *n2s = (short) (n2); - break; - } - scanned++; - } - parsing = 0; - break; - case 'x': - buf = strscn(buf, "1234567890xabcdefABCDEF"); - n2 = strtou(buf, 16, &base); - buf = base; - if (!suppress) { - switch (size) { - case STD_SIZE: - n2b = va_arg(parms, - unsigned *); - *n2b = (unsigned) n2; - break; - case LONG_SIZE: - n2l = va_arg(parms, - long unsigned *); - *n2l = n2; - break; - case SHORT_SIZE: - n2s = va_arg(parms, short unsigned - *); - *n2s = (short) (n2); - break; - } - scanned++; - } - parsing = 0; - break; - case 'f': - case 'g': - case 'e': - buf = strscn(buf, "1234567890.e+-"); - n3 = strtod(buf, &base); - buf = base; - if (!suppress) { - switch (size) { - case STD_SIZE: - n3l = va_arg(parms, double *); - *n3l = n3; - break; - case LONG_SIZE: - n3l = va_arg(parms, double *); - *n3l = n3; - break; - case SHORT_SIZE: - n3s = va_arg(parms, float *); - *n3s = (float) (n3); - break; - } - scanned++; - } - parsing = 0; - break; - case 'l': - size = LONG_SIZE; - break; - case 'h': - case 'n': - size = SHORT_SIZE; - break; - case '*': - suppress = 1; - break; - default: - parsing = 0; - break; - } - fmt++; - } - } - return (scanned); - } -char *strscn(char *s, char *pattern) - { - char *scan; - while (*s != 0) { - scan = pattern; - while (*scan != 0) { - if (*s == *scan) - return (s); - else - scan++; - } - s++; - } - return (NULL); - } - -long unsigned strtou(char *s, int base, char **scan_end) - { - int value, overflow = 0; - long unsigned result = 0, oldresult; - /* Skip trailing zeros */ - while (*s == '0') - s++; - if (*s == 'x' && base == 16) { - s++; - while (*s == '0') - s++; - } - /* Convert number */ - while (my_isnumber(*s, base)) { - value = tonumber(*s++); - if (value > base || value < 0) - return (0); - oldresult = result; - result *= base; - result += value; - /* Detect overflow */ - if (oldresult > result) - overflow = 1; - } - if (scan_end != 0L) - *scan_end = s; - if (overflow) - result = INT_MAX; - return (result); - } -long int strtoi(char *s, int base, char **scan_end) - { - int sign, value, overflow = 0; - long int result = 0, oldresult; - /* Evaluate sign */ - if (*s == '-') { - sign = -1; - s++; - } else if (*s == '+') { - sign = 1; - s++; - } else - sign = 1; - /* Skip trailing zeros */ - while (*s == '0') - s++; - /* Convert number */ - while (my_isnumber(*s, base)) { - value = tonumber(*s++); - if (value > base || value < 0) - return (0); - oldresult = result; - result *= base; - result += value; - /* Detect overflow */ - if (oldresult > result) - overflow = 1; - } - if (scan_end != 0L) - *scan_end = s; - if (overflow) - result = INT_MAX; - result *= sign; - return (result); - } - -int my_isnumber(char c, int base) - { - static char *digits = "0123456789ABCDEF"; - if ((c >= '0' && c <= digits[base - 1])) - return (1); - else - return (0); - } - - int tonumber(char c) - { - if (c >= '0' && c <= '9') - return (c - '0'); - else if (c >= 'A' && c <= 'F') - return (c - 'A' + 10); - else if (c >= 'a' && c <= 'f') - return (c - 'a' + 10); - else - return (c); - } - -/////////////////////////////////////////////////////////////////////////////////////////// -// Hash function -//////////////////////////////////////////////////////////////////////////////////////////// -unsigned long hash_file(char* file) //returns the hash value for key - { - // Calculate a hash value by the division method: - // Transform key into a natural number k = sum ( key[i]*128^(L-i) ) and calculate i= k % num_slots. - // Since calculating k would lead to an overflow, i is calculated iteratively - // and at each iteration the part divisible by num_slots is subtracted, i.e. (% num_slots is taken). - - unsigned long i=0; // Start of iteration: k is zero - unsigned long num_slots=999999999; - - - FILE *fp; - unsigned long c; - - - if (file==NULL || !check_file_exists (file) ) {printf("Warning from util.c:hasch_file: No File [FATAL:%s]\n", PROGRAM); myexit (EXIT_FAILURE);} - num_slots/=128; - fp=vfopen (file, "r"); - while ( (c=fgetc (fp))!=EOF) - { - i = ((i<<7) + c) % num_slots; - } - vfclose (fp); - - return i; - } -int ** r_generate_array_int_list ( int len, int min, int max,int step, int **array, int f, int *n,FILE *fp, int *c_array); -int **generate_array_int_list (int len, int min, int max, int step, int *n, char *file) - { - int **array, *c_array; - FILE *fp=NULL; - - if (n==NULL) - { - array=NULL; - fp=vfopen (file, "w"); - } - else - { - int a,s; - n[0]=0; - for (s=1, a=0; a -#include -#include -#include -#include -#include -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "dp_lib_header.h" -#include "define_header.h" -/************************************************************************************/ -/* NEW ANALYZE 2 : SAR */ -/************************************************************************************/ -float display_prediction_old (int **prediction, int n, Alignment *A, Alignment *S, int field); - -float display_prediction (int ***count, Alignment *S, int c, int n); -Alignment * filter_aln4sar0 ( Alignment *A, Alignment *S, int c, int leave, char *mode); -Alignment * filter_aln4sar1 ( Alignment *A, Alignment *S, int c, int leave, char *mode); -Alignment * filter_aln4sar2 ( Alignment *A, Alignment *S, int c, int leave, char *mode); -Alignment * filter_aln4sar3 ( Alignment *A, Alignment *S, int c, int leave, char *mode); -Alignment * filter_aln4sar4 ( Alignment *A, Alignment *S, int c, int leave, char *mode); -Alignment * filter_aln4sar5 ( Alignment *A, Alignment *S, int c, int leave, char *mode); - -int **sar2profile ( Alignment *A, Alignment *S, int c, int leave); -int **sar2profile_sim ( Alignment *A, Alignment *S, int **sim, int comp, int leave); -int sar_profile2score ( char *seq, int **profile); -double sar_vs_iseq1( char *sar, int *seq, float gl, int **sim, char *best_aa); -double sar_vs_seq1 ( char *sar, char *seq, float gl, int **sim, char *best_aa); -double sar_vs_seq2 ( char *sar, char *seq, float ng, int **mat, char *a); -double sar_vs_seq3 ( char *sar, char *seq, float ng, int **mat, char *a); - -double sar_vs_iseq4 ( char *sar, int *seq, float ng, int **mat, char *a);//supports an extended alphabet -double sar_vs_seq4 ( char *sar, char *seq, float ng, int **mat, char *a); - -double sar_vs_seq5 ( char *sar, char *seq, float ng, int **mat, char *a); -int make_sim_pred ( Alignment *A,Alignment *S, int comp, int seq); - -int **sar2profile_sim ( Alignment *A, Alignment *S, int **sim, int comp, int leave) -{ - - int a, b, r, c, c1, c2, r1, r2, s, p; - int ***cache, **profile; - - - profile=declare_int (A->len_aln, 26); - cache=declare_arrayN (3,sizeof (int),2,A->len_aln, 26); - - for ( a=0; a< A->len_aln; a++) - for ( b=0; b< A->nseq; b++) - { - r=tolower(A->seq_al[b][a]); - c=( S->seq_al[comp][b]=='I')?1:0; - if (b==leave || is_gap(r)) continue; - cache [c][a][r-'a']++; - } - for (a=0; a< A->nseq; a++) - { - if ( a==leave) continue; - for ( b=0; b< A->nseq; b++) - { - c1=(S->seq_al[comp][a]=='I')?1:0; - c2=(S->seq_al[comp][b]=='I')?1:0; - if ( b==leave || b==a || c1!=1 || c1==c2) continue; - s=sim[a][b]; - - for (p=0; plen_aln; p++) - { - r1=tolower(A->seq_al[a][p]); - r2=tolower(A->seq_al[b][p]); - if ( is_gap(r1) || is_gap(r2) || r1==r2)continue; - r1-='a';r2-='a'; - if (cache[1][p][r2])continue; - if ( s<50)continue; - profile[p][r2]-=s; - } - } - } - - free_arrayN((void***)cache,3); - return profile; - -} -int **sar2profile ( Alignment *A, Alignment *S, int comp, int leave) -{ - - int a, b,c,r, n, v, npos=0; - int ***cache, **profile; - int ncat; - float n_gap, max_gap; - profile=declare_int (A->len_aln, 26); - cache=declare_arrayN (3,sizeof (int),2,A->len_aln, 26); - - - - for ( n=0, a=0; a< A->nseq; a++) - { - if ( a==leave) continue; - else n+=(S->seq_al[comp][a]=='I')?1:0; - } - - for ( a=0; a< A->len_aln; a++) - for ( b=0; b< A->nseq; b++) - { - r=tolower(A->seq_al[b][a]); - c=( S->seq_al[comp][b]=='I')?1:0; - if (b==leave) continue; - else if (is_gap(r))continue; - r-='a'; - cache [c][a][r]++; - } - - ncat=15; /*ncat: limit the analysis to columns containing less than ncat categories of aa*/ - max_gap=0.05; - for (a=0; a< A->len_aln; a++) - { - for (n_gap=0,b=0; b< A->nseq; b++) - n_gap+=(is_gap(A->seq_al[b][a])); - n_gap/=(float)A->nseq; - - if ( n_gap> max_gap)continue; - - for (v=0,r=0; r< 26; r++) - { - if (cache [0][a][r] || cache[1][a][r])v++; - } - - for (n=0,r=0; r< 26 && vnseq, 2); - list2=declare_int ( inA->len_aln, 2); - - cache=declare_arrayN (3,sizeof (int),inA->len_aln,2, 26); - F=copy_aln (inA, NULL); - - A=copy_aln (inA, NULL); - A->nseq=strlen (S->seq_al[comp]); - - strget_param (mode, "_T1_", "5", "%d", &T1); - for ( a=0; a< A->len_aln; a++) - { - n1=n0=g=0; - for (b=0; b< A->nseq; b++) - { - if ( b==leave) continue; - i=(S->seq_al[comp][b]=='I')?1:0; - r=tolower(A->seq_al[b][a]); - if ( r=='-')continue; - cache[a][i][r-'a']++; - } - } - - for (a=0; a< A->nseq; a++) - for ( score=0,b=0; blen_aln; b++) - { - r=tolower (A->seq_al[a][b]); - if ( is_gap(r))continue; - else if ( cache[b][0][r-'a'] && !cache[b][1][r-'a'])list1[a][0]++; - } - - for (a=0; a< A->len_aln; a++) - { - for ( score=0,b=0; b< A->nseq; b++) - { - r=tolower (A->seq_al[b][a]); - if ( r=='-')continue; - else r-='a'; - if ( cache[a][0][r] && !cache[a][1][r])score ++; - } - list2[a][0]=a; - list2[a][1]=score; - } - sort_int (list2, 2, 1, 0, F->len_aln-1); - - Delta=A->len_aln/(100/T1); - for ( a=0; a< F->len_aln-Delta; a++) - { - b=list2[a][0]; - for ( c=0; cnseq; c++) - { - F->seq_al[c][b]='-'; - } - } - - ungap_aln (F); - free_aln (A); - free_arrayN ( (void ***)cache, 3); - free_arrayN ((void**)list1, 2); - free_arrayN ((void**)list2, 2); - - return F; -} -Alignment * filter_aln4sar2 ( Alignment *inA, Alignment *S, int comp, int leave, char *mode) -{ - Alignment *F, *A; - int a,b,r,ncat; - int *cache; - int max_ncat=10; - - /*Keep Low entropy columns that contain less than ncat categories of different amino acids*/ - /*REmove columns containing 10% or more gaps*/ - - cache=vcalloc ( 500, sizeof (char)); - F=copy_aln (inA, NULL); - A=copy_aln (inA, NULL); - A->nseq=strlen (S->seq_al[comp]); - for ( a=0; a< A->len_aln; a++) - { - for (ncat=0,b=0; b< A->nseq; b++) - { - if ( b==leave) continue; - - r=tolower(A->seq_al[b][a]); - if ( !cache[r])ncat++; - cache[r]++; - } - - if ( ncat nseq)<10) - { - ; - } - else - { - for (b=0; bnseq; b++) - { - r=tolower(F->seq_al[b][a]); - F->seq_al[b][a]='-'; - cache[r]=0; - } - } - for (b=0; bnseq; b++) - { - r=tolower(A->seq_al[b][a]); - cache[r]=0; - } - } - - free_aln (A); - ungap_aln (F); - vfree (cache); - return F; -} - -Alignment * filter_aln4sar3 ( Alignment *inA, Alignment *S, int comp, int leave, char *mode) -{ - Alignment *F, *rA, *A; - int a, b,c; - int **list1; - char *bufS, *bufA; - int Delta; - int T3; - - /*Keep the 10% positions most correlated with the 0/1 pattern*/ - - A=copy_aln (inA, NULL); - A->nseq=strlen (S->seq_al[comp]); - F=copy_aln (inA, NULL); - rA=rotate_aln (A, NULL); - - strget_param (mode, "_T3_", "10", "%d", &T3); - - - list1=declare_int ( inA->len_aln, 2); - bufA=vcalloc ( A->nseq+1, sizeof (char)); - bufS=vcalloc ( A->nseq+1, sizeof (char)); - - sprintf ( bufS, "%s", S->seq_al[comp]); - splice_out_seg(bufS,leave, 1); - - - for (a=0; a< A->len_aln; a++) - { - char aa; - list1[a][0]=a; - sprintf (bufA, "%s", rA->seq_al[a]); - splice_out_seg (bufA,leave,1); - list1[a][1]=(int)sar_vs_seq3 ( bufS, bufA,0,NULL, &aa); - } - - sort_int (list1, 2, 1, 0, F->len_aln-1); - Delta=F->len_aln/(100/T3); - for ( a=0; a< F->len_aln-Delta; a++) - { - b=list1[a][0]; - - for ( c=0; cnseq; c++) - { - F->seq_al[c][b]='-'; - } - - } - F->score_aln=list1[F->len_aln-1][1]; - ungap_aln (F); - - free_aln (rA); - free_aln(A); - free_arrayN ((void**)list1, 2); - vfree (bufS);vfree (bufA); - return F; -} -Alignment * filter_aln4sar4 ( Alignment *inA, Alignment *S, int comp, int leave, char *mode) -{ - Alignment *F, *A; - int a, b,c, i,r, n0, n1,g,score; - int ***cache, **list1, **list2; - - /*Keep only the positions where there are residues ONLY associated with 0 sequences*/ - - list1=declare_int ( inA->nseq, 2); - list2=declare_int ( inA->len_aln, 2); - - cache=declare_arrayN (3,sizeof (int),inA->len_aln,2, 26); - F=copy_aln (inA, NULL); - A=copy_aln (inA, NULL); - A->nseq=strlen (S->seq_al[comp]); - - for ( a=0; a< A->len_aln; a++) - { - n1=n0=g=0; - for (b=0; b< A->nseq; b++) - { - if ( b==leave) continue; - i=(S->seq_al[comp][b]=='I')?1:0; - r=tolower(A->seq_al[b][a]); - if ( r=='-')continue; - cache[a][i][r-'a']++; - n1+=i; - } - } - - - for (a=0; a< A->len_aln; a++) - { - for ( score=0,b=0; b< A->nseq; b++) - { - r=tolower (F->seq_al[b][a]); - if ( r=='-')continue; - else r-='a'; - if (cache[a][1][r]>=n1/2)score=1; - } - list2[a][0]=a; - list2[a][1]=score; - } - - - for ( a=0; a< F->len_aln; a++) - { - if ( list2[a][1]==1); - else - { - b=list2[a][0]; - for ( c=0; cnseq; c++) - { - F->seq_al[c][b]='-'; - } - } - } - ungap_aln (F); - free_aln (A); - free_arrayN ( (void ***)cache, 3); - free_arrayN ((void**)list1, 2); - free_arrayN ((void**)list2, 2); - - return F; -} - -Alignment * filter_aln4sar5 ( Alignment *inA, Alignment *S, int comp, int leave, char *mode) -{ - Alignment *F, *rA, *A; - int a, b,c; - int **list1; - char *bufS, *bufA; - int max; - /*Look for the positions that show the best correlation between the sequence variation and the SAR*/ - - A=copy_aln (inA, NULL); - A->nseq=strlen (S->seq_al[comp]); - - rA=rotate_aln (inA, NULL); - F=copy_aln (inA, NULL); - - list1=declare_int ( A->len_aln, 2); - bufA=vcalloc ( A->nseq+1, sizeof (char)); - bufS=vcalloc ( A->nseq+1, sizeof (char)); - - - - sprintf ( bufS, "%s", S->seq_al[comp]); - splice_out_seg(bufS,leave, 1); - - - for (a=0; a< A->len_aln; a++) - { - char aa; - list1[a][0]=a; - sprintf (bufA, "%s", rA->seq_al[a]); - splice_out_seg (bufA,leave,1); - list1[a][1]=(int)sar_vs_seq4 ( bufS, bufA,0,NULL, &aa); - } - - sort_int (list1, 2, 1, 0, F->len_aln-1); - max=F->score=list1[F->len_aln-1][1]; - max-=(max/10); - - - for ( a=0; a< F->len_aln-10; a++) - { - - b=list1[a][0]; - - for ( c=0; cnseq; c++) - { - F->seq_al[c][b]='-'; - } - - } - F->score_aln=10; - ungap_aln (F); - free_aln (inA); - free_aln (rA); - free_arrayN ((void**)list1, 2); - vfree (bufS);vfree (bufA); - return F; -} - -int sar_profile2score ( char *seq, int **P) -{ - int a,r, l, score; - - l=strlen (seq); - for ( score=0,a=0; a< l; a++) - { - r=seq[a]; - if ( is_gap(r))continue; - score+=P[a][tolower(r)-'a']; - } - return score; -} -int make_sim_pred ( Alignment *A,Alignment *S, int comp, int seq) -{ - int a, b, i, r1, r2; - static float **cscore; - static float **tscore; - - if ( !cscore) - { - cscore=declare_float (2, 2); - tscore=declare_float (2, 2); - } - - for (a=0; a< 2; a++)for (b=0; b<2; b++)cscore[a][b]=tscore[a][b]=0; - - for ( a=0; alen_aln; a++) - { - r1=A->seq_al[seq][a]; - if ( r1=='-') continue; - else - { - for ( b=0; b< A->nseq; b++) - { - if (b==seq) continue; - else - { - r2=A->seq_al[b][a]; - if (r2=='-')continue; - else - { - - i=(S->seq_al[comp][b]=='I')?1:0; - cscore[i][0]+=(r1==r2)?1:0; - cscore[i][1]++; - } - } - } - - for (i=0; i<2; i++) - { - cscore[i][0]/=(cscore[i][1]==0)?1:cscore[i][1]; - tscore[i][0]+=cscore[i][0];tscore[i][1]++; - cscore[i][0]=cscore[i][1]=0; - } - } - } - - fprintf ( stdout, "\nn\t 1: %.2f 0: %.2f", tscore[1][0],tscore[0][0]); - return ( tscore[1][0]>=tscore[0][0])?1:0; -} - - -Alignment * sar_analyze (Alignment *inA, Alignment *inS, char *mode) -{ - int ***sim,***glob_results, ***comp_results; - int *count; - int a,b,c,m; - float *tot2; - Alignment *A=NULL,*S=NULL,*F, *SUBSET; - char *subset, *target; - int jack, T, filter; - filter_func *ff; - int n_methods=0; - char *prediction, *reliability; - int pred_start=0, pred_end, ref_start=0, ref_end; - int display, CSV=1, NONCSV=0; - char method[5]; - - strget_param (mode, "_METHOD_", "1111", "%s_", method); - ff=vcalloc (6,sizeof (filter_func)); - if (method[0]=='1')ff[n_methods++]=filter_aln4sar0; - if (method[1]=='1')ff[n_methods++]=filter_aln4sar1; - if (method[2]=='1')ff[n_methods++]=filter_aln4sar2; - if (method[3]=='1')ff[n_methods++]=filter_aln4sar3; - /* - ff[n_methods++]=filter_aln4sar4; - ff[n_methods++]=filter_aln4sar5; - */ - sim=vcalloc (n_methods, sizeof (int**)); - - - tot2=vcalloc ( 10, sizeof (float)); - subset=vcalloc ( 100, sizeof (char)); - target=vcalloc ( 100, sizeof (char)); - - strget_param (mode, "_TARGET_", "no", "%s_", target); - strget_param (mode, "_SUBSET_", "no", "%s_", subset); - strget_param (mode, "_JACK_", "0", "%d", &jack); - strget_param (mode, "_T_", "0", "%d", &T); - strget_param (mode, "_FILTER_", "11", "%d", &filter); - strget_param (mode, "_DISPLAY_", "0", "%d", &display); - - - - if ( !strm (target, "no")) - { - Alignment *T; - T=main_read_aln(target, NULL); - if ( T->len_aln !=inA->len_aln ) - { - printf_exit ( EXIT_FAILURE,stderr, "Error: %s is incompatible with the reference alignment [FATAL:%s]",target,PROGRAM); - } - - inA=stack_aln (inA, T); - - } - - if ( !strm(subset, "no")) - { - SUBSET=main_read_aln (subset, NULL); - sarset2subsarset ( inA, inS, &A, &S, SUBSET); - } - else - { - A=inA; - S=inS; - } - - - prediction=vcalloc ( n_methods+1, sizeof (char)); - reliability=vcalloc ( n_methods+1, sizeof (char)); - - glob_results=declare_arrayN(3, sizeof (int), n_methods*2, 2, 2); - - count=vcalloc (S->nseq, sizeof (int)); - for (a=0; anseq; a++) - { - int l; - l=strlen (S->seq_al[a]); - for ( b=0; bseq_al[a][b]=='I')?1:0; - } - if ( display==CSV) - {fprintf ( stdout, "\nCompound %s ; Ntargets %d", S->name[a],count[a]); - pred_start=(strlen (S->seq_al[0])==A->nseq)?0:strlen (S->seq_al[0]); - pred_end=A->nseq; - for (a=pred_start; a< pred_end; a++) - fprintf ( stdout, ";%s", A->name[a]); - fprintf ( stdout, ";npred;"); - } - - - for (a=0; anseq; a++) - { - int n_pred; - comp_results=declare_arrayN(3, sizeof (int), n_methods*2, 2, 2); - - pred_start=(strlen (S->seq_al[a])==A->nseq)?0:strlen (S->seq_al[a]); - pred_end=A->nseq; - if ( display==CSV)fprintf ( stdout, "\n%s;%d", S->name[a],count[a]); - - for (n_pred=0,b=pred_start; b%-15s %10s %c ", S->name[a], A->name[b], (pred_start==0)?S->seq_al[a][b]:'?'); - if (jack || b==pred_start) - { - for (m=0; mseq_al[m]); - - for (c=ref_start;cseq_al[a][c]=='O') - { - Nbsim=MAX(Nbsim,sim[m][b][c]); - } - else - { - Ybsim=MAX(Ybsim,sim[m][b][c]); - } - } - - bsim=(Ybsim>Nbsim)?Ybsim:-Nbsim; - pred=(bsim>0)?1:0; - real=(S->seq_al[a][b]=='O')?0:1; - comp_results[m][pred][real]++; - glob_results[m][pred][real]++; - score+=pred; - prediction[m]=pred+'0'; - reliability[m]=(FABS((Ybsim-Nbsim))-1)/10+'0'; - } - - if ( score>0)n_pred++; - prediction[m]=reliability[m]='\0'; - if (display==NONCSV)fprintf ( stdout, "Compound_Count:%d primary_predictions: %s Total: %d", count[a],prediction, score); - else if ( display==CSV)fprintf ( stdout, ";%d", score); - for (t=0; tt) - { - comp_results[t+n_methods][1][real]++; - glob_results[t+n_methods][1][real]++; - } - else - { - comp_results[t+n_methods][0][real]++; - glob_results[t+n_methods][0][real]++; - } - } - } - if ( display==NONCSV) - {if ( pred_start==0)display_prediction (comp_results, S,a, n_methods*2);} - else fprintf (stdout, ";%d;",n_pred); - } - if ( display==NONCSV)if (pred_start==0)display_prediction (glob_results, S,-1, n_methods*2); - - - exit (EXIT_SUCCESS); -} -float display_prediction (int ***count, Alignment *S, int c, int n) -{ - float tp,tn,fn,fp,sp,sn,sn2; - int a, nm; - - nm=n/2; - - for (a=0; a#Method %d Compound %15s sp=%.2f sn=%.2f sn2=%.2f",a, (c==-1)?"TOTAL":S->name[c],sp, sn, sn2 ); - else fprintf ( stdout, "\n>#Combined: T=%d Compound %15s sp=%.2f sn=%.2f sn2=%.2f",a-nm, (c==-1)?"TOTAL":S->name[c],sp, sn, sn2 ); - } - fprintf ( stdout, "\n"); - return 0; -} - -float display_prediction_2 (int **prediction, int n,Alignment *A, Alignment *S, int field) -{ - int a, t, T; - float max_sn, max_sp; - - if ( field==17 || field ==18) - { - printf_exit ( EXIT_FAILURE, stderr, "\nERROR: Do not use filed %d in display_prediction", field); - } - - sort_int_inv ( prediction, 10,field, 0, n-1); - for (t=0,a=0; a=0; a--) - { - prediction[a][18]=t; - t+=prediction[a][3]; - } - - max_sn=max_sp=T=0; - for (a=0; a0.8) - { - if (sn>max_sn) - { - max_sn=sn; - max_sp=sp; - - T=prediction[a][field]; - } - } - } - if (max_sn==0) - fprintf (stdout, "\n T =%d SN=%.2f SP= %.2f",T,max_sn,max_sp); - else - fprintf (stdout, "\n T =%d SN=%.2f SP= %.2f",T,max_sn,max_sp); - - return max_sn; -} - - -/************************************************************************************/ -/* NEW ANALYZE : SAR */ -/************************************************************************************/ -float** cache2pred1 (Alignment *A,int**cache, int *ns, int **ls, Alignment *S, char *compound, char *mode); -float** cache2pred2 (Alignment *A,int**cache, int *ns, int **ls, Alignment *S, char *compound, char *mode); -float** cache2pred3 (Alignment *A,int**cache, int *ns, int **ls, Alignment *S, char *compound, char *mode); -float** cache2pred4 (Alignment *A,int**cache, int *ns, int **ls, Alignment *S, char *compound, char *mode); -float** cache2pred5 (Alignment *A,int**cache, int *ns, int **ls, Alignment *S, char *compound, char *mode); -float** cache2pred_new (Alignment *A,int**cache, int *ns, int **ls, Alignment *S, char *compound, char *mode); - -int **sar2cache_adriana ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode); -int **sar2cache_proba_old ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode); -int **sar2cache_count1 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode); -int **sar2cache_count2 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode); -int **sar2cache_count3 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode); - -int **sar2cache_proba_new ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode); -int **sar2cache_proba2 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode); -int **analyze_sar_compound1 ( char *name, char *seq, Alignment *A, char *mode); -int **analyze_sar_compound2 ( char *name, char *seq, Alignment *A, char *mode); - -int aln2n_comp_col ( Alignment *A, Alignment *S, int ci); - - - - -int ***simple_sar_analyze_vot ( Alignment *inA, Alignment *SAR, char *mode); -int ***simple_sar_analyze_col ( Alignment *inA, Alignment *SAR, char *mode); - - - -int sarset2subsarset ( Alignment *A, Alignment *S, Alignment **subA, Alignment **subS, Alignment *SUB); -int benchmark_sar (int v); -int aln2jack_group1 (Alignment *A, int seq, int **l1, int *nl1, int **l2, int *nl2); -int aln2jack_group2 (Alignment *A, int seq, int **l1, int *nl1, int **l2, int *nl2); -int aln2jack_group3 (Alignment *A, char *sar_seq, int **l1, int *nl1, int **l2, int *nl2); -float** jacknife5 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode); -float** jacknife6 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode); - -int process_cache ( Alignment *A,Alignment *S, int ***Cache, char *mode); -Alignment *analyze_compounds (Alignment *A, Alignment *S, char *mode); - -Alignment *analyze_compounds (Alignment *A, Alignment *S, char *mode) -{ - int a, b, c, tot, n; - int **sim; - int sar1, sar2; - - sim=aln2sim_mat (A, "idmat"); - for (a=0; a< S->nseq; a++) - { - for (n=0, tot=0, b=0; b< A->nseq-1; b++) - { - sar1=(S->seq_al[a][b]=='I')?1:0; - for ( c=b+1; cnseq; c++) - { - sar2=(S->seq_al[a][c]=='I')?1:0; - - if (sar1 && sar2) - { - tot+=sim[b][c]; - n++; - } - } - } - fprintf ( stdout, ">%-10s CMPSIM: %.2f\n", S->name[a],(float)tot/(float)n); - } - free_int (sim, -1); - return A; -} - -int print_seq_pos ( int pos, Alignment *A, char *seq); -int abl1_evaluation (int p); -int print_seq_pos ( int pos, Alignment *A, char *seq) -{ - int a, b, s; - - s=name_is_in_list (seq, A->name, A->nseq, MAXNAMES); - fprintf ( stdout, "S=%d", s); - - for (b=0,a=0; aseq_al[s][a]))b++; - } - fprintf ( stdout, "Pos %d SEQ %s: %d ", pos+1, seq, b+246); - if ( strm ( seq, "ABL1")) fprintf ( stdout , "PT: %d", abl1_evaluation (b+246)); - return 0; -} - -int process_cache ( Alignment *A,Alignment *S, int ***Cache, char *mode) -{ - int a, b; - int **pos, **pos2; - int **C; - int ab1, *ab1_pos; - int weight_mode; - - strget_param ( mode, "_WEIGHT_", "1", "%d", &weight_mode); - pos=declare_int(A->len_aln+1,2); - pos2=declare_int (A->len_aln+1,S->nseq); - for (a=0; anseq; a++) - { - C=Cache[a]; - for (b=0; b< A->len_aln; b++) - { - pos[b][0]+=C[26][b]; - if ( C[26][b]>0) - { - pos[b][1]++; - pos2[b][a]=1; - } - } - } - - C=Cache[0]; - ab1=name_is_in_list ("ABL1", A->name, A->nseq,100); - ab1_pos=vcalloc (A->len_aln+1, sizeof (int)); - - for ( b=0,a=0; a< A->len_aln; a++) - { - if ( A->seq_al[ab1][a]=='-')ab1_pos[a]=-1; - else ab1_pos[a]=++b; - } - - for ( a=0; a< A->len_aln; a++) - { - fprintf ( stdout, "\n%4d %5d %5d %5d [%c] [%2d] ALN", a+1, pos[a][0], pos[a][1], ab1_pos[a]+246,A->seq_al[ab1][a],abl1_evaluation (ab1_pos[a]+246)); - for ( b=0; b< S->nseq; b++)fprintf ( stdout, "%d", pos2[a][b]); - } - return 1; -} -int abl1_evaluation (int p) -{ - if ( p==248) return 10; - if ( p==250) return 10; - if ( p==253) return 10; - if ( p==254) return 10; - if ( p==255) return 9; - if ( p==256) return 10; - if ( p==257) return 5; - if ( p==258) return 8; - if ( p==269) return 8; - if ( p==291) return 4; - if ( p==294) return 8; - if ( p==299) return 10; - if ( p==306) return 0; - if ( p==314) return 9; - if ( p==315) return 10; - if ( p==318) return 10; - - if ( p==319) return 10; - if ( p==321) return 10; - if ( p==323) return 0; - if ( p==324) return 0; - if ( p==339) return 0; - if ( p==340) return 0; - if ( p==355) return 5; - if ( p==364) return 10; - - if ( p==366) return 0; - if ( p==368) return 10; - if ( p==370) return 10; - if ( p==372) return 0; - if ( p==378) return 8; - if ( p==382) return 10; - - if ( p==384) return 10; - if ( p==387) return 10; - if ( p==395) return 8; - - if ( p==398) return 8; - if ( p==399) return 8; - if ( p==400) return 8; - if ( p==403) return 0; - if ( p==416) return 8; - if ( p==419) return 5; - if ( p>400) return 0; - return -1; -} -float** cache2pred1 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode) -{ - int s1, s2, seq1, seq2, r1, r2,col, pred, real, ci; - double score, max, id, m; - float **R, T; - - - int used_col, used_res,is_used_col, n_res=0; - int weight_mode; - /*Predict on ns[1] what was trained on ns[0]*/ - - strget_param ( mode, "_THR_", "0.09", "%f", &T); - strget_param ( mode, "_WEIGHT_", "0", "%d", &weight_mode); - - R=declare_float (2, 2); - ci=name_is_in_list ( compound, S->name, S->nseq, -1); - - - - for (s1=0; s1len_aln; col++) - { - int max1; - r1=tolower (A->seq_al[seq1][col]); - for (max1=0,id=0, m=0,s2=0; s2seq_al[ci][seq2]=='O')continue; - if ( cache[seq2][col]==0 && !is_gap( A->seq_al[seq2][col]))continue; - - r2=tolower ( A->seq_al[seq2][col]); - if ( is_gap(r2))continue; - - v=(cache[seq2][col]>0 && weight_mode==1)?cache[seq2][col]:1; - - max+=v; - if ( r2==r1) - { - score+=v; - } - - } - - } - pred=(( score/max) >T)?1:0; - real=(S->seq_al[ci][seq1]=='I')?1:0; - R[pred][real]++; - - fprintf ( stdout, "\n>%s %d%d SCORE %.2f C %s [SEQ]\n", A->name[seq1],real, pred, (float)score/(float)max, compound); - } - - for (used_col=0,used_res=0,col=0; collen_aln; col++) - { - for (is_used_col=0,s2=0; s2seq_al[seq2][col]))n_res++; - else if (is_gap(A->seq_al[seq2][col])); - else - { - is_used_col=1; - used_res++; - } - } - used_col+=is_used_col; - } - fprintf ( stdout, "\n>%s USED_POSITIONS: COL: %.2f RES: %.2f COMP\n", S->name[ci], (float)used_col/(float)A->len_aln, (float)used_res/(float) n_res); - - return R; -} - -float** cache2pred2 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode) -{ - int s1, s2, seq1, seq2, r1, r2,col, pred, real, ci; - double score, max; - float **R, T; - - - int used_col, used_res,is_used_col, n_res=0; - /*Predict on ns[1] what was trained on ns[0]*/ - - strget_param ( mode, "_THR_", "0.5", "%f", &T); - - - R=declare_float (2, 2); - ci=name_is_in_list ( compound, S->name, S->nseq, -1); - - for (s1=0; s1len_aln; col++) - { - int used; - - r1=tolower (A->seq_al[seq1][col]); - for (used=0,s2=0; s2seq_al[ci][seq2]=='O')continue; - if ( cache[seq2][col]==0 && !is_gap( A->seq_al[seq2][col]))continue; - - - r2=tolower ( A->seq_al[seq2][col]); - if ( is_gap(r2))continue; - - v=cache[seq2][col]; - if ( r2==r1){score+=v;} - used=1; - max+=v; - } - if (used) fprintf ( stdout, "%c", r1); - } - - pred=(( score/max) >T)?1:0; - real=(S->seq_al[ci][seq1]=='I')?1:0; - R[pred][real]++; - - fprintf ( stdout, "PSEQ: %-10s SC: %4d MAX: %4d S: %.2f R: %4d", A->name[seq1],(int)score, (int)max, (float)score/max,real); - - } - - for (used_col=0,used_res=0,col=0; collen_aln; col++) - { - for (is_used_col=0,s2=0; s2seq_al[seq2][col]))n_res++; - else if (is_gap(A->seq_al[seq2][col])); - else - { - is_used_col=1; - used_res++; - } - } - used_col+=is_used_col; - } - fprintf ( stdout, "\n>%s USED_POSITIONS: COL: %.2f RES: %.2f COMP\n", S->name[ci], (float)used_col/(float)A->len_aln, (float)used_res/(float) n_res); - - return R; -} - -float** cache2pred3 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode) -{ - int s1, s2, seq1, seq2, r1, r2,col, pred, real, ci, a, n; - double score, max; - float **R, T; - - - - int tp, tn, fn, fp; - int best_tp, best_fp; - int delta, best_delta; - int **list; - - /*Predict on ns[1] what was trained on ns[0]*/ - - strget_param ( mode, "_THR_", "0.5", "%f", &T); - - - R=declare_float (2, 2); - ci=name_is_in_list ( compound, S->name, S->nseq, -1); - list=declare_int ( ns[1],3); - - for (s1=0; s1len_aln; col++) - { - int used; - - r1=tolower (A->seq_al[seq1][col]); - for (used=0,s2=0; s2seq_al[ci][seq2]=='O')continue; - if ( cache[seq2][col]==0 && !is_gap( A->seq_al[seq2][col]))continue; - - - r2=tolower ( A->seq_al[seq2][col]); - if ( is_gap(r2))continue; - - v=cache[seq2][col]; - if ( r2==r1){score+=v;} - used=1; - max+=v; - } - } - - - - pred=(( score/max) >T)?1:0; - real=(S->seq_al[ci][seq1]=='I')?1:0; - - list[s1][0]=real; - list[s1][1]=(int)((score/max)*(float)1000); - list[s1][2]=seq1; - - - - } - sort_int_inv (list, 3, 1, 0, ns[1]-1); - - for ( a=0; aname[seq1],list[a][0], list[a][1]); - } - - for (n=0, a=0; alen_aln, sizeof (int)); - for (a=0; a< A->len_aln; a++) - for ( b=0; b< A->nseq; b++) - if ( cache[b][a])ul[nused++]=a; - - /*compute the similarity on the used columns*/ - - R=declare_float (2, 2); - sim=declare_int (A->nseq, A->nseq); - for (a=0; a< A->nseq; a++) - for ( b=0; b< A->nseq; b++) - { - for (c=0; c< nused; c++) - { - if ( A->seq_al[a][ul[c]]==A->seq_al[b][ul[c]])sim[a][b]++; - } - sim[a][b]=(sim[a][b]*100)/nused; - } - vfree (ul); - - - - - ci=name_is_in_list ( compound, S->name, S->nseq, -1); - list=declare_int ( ns[1],2); - - for (s1=0; s1seq_al[ci][seq2]=='I')score=MAX(score, sim[seq1][seq2]); - } - list[s1][0]=(S->seq_al[ci][seq1]=='I')?1:0; - list[s1][1]=(int)score; - - } - sort_int_inv (list, 2, 1, 0, ns[1]-1); - - for (n=0, a=0; aname, S->nseq, -1); - list=declare_int ( ns[1],2); - - for (s1=0; s1seq_al[ci][seq2]=='I')score=MAX(score, sim[seq1][seq2]); - } - list[s1][0]=(S->seq_al[ci][seq1]=='I')?1:0; - list[s1][1]=(int)score; - - } - sort_int_inv (list, 2, 1, 0, ns[1]-1); - - for (n=0, a=0; aname, S->nseq, -1); - list=declare_int (A->nseq,2); - R=declare_float (2, 2); - - - for ( a=0; anseq; a++) - { - int real, res; - - ns[0]=A->nseq-1; - ns[1]=1; - for (c=0,b=0; bnseq; b++) - if (a!=b)ls[0][c++]=b; - ls[1][0]=a; - - - cache=sar2cache_count1 (A, ns, ls,S, compound, mode); - for (b=0; b<=26; b++) - for ( c=0; c< A->len_aln; c++) - cacheIN[b][c]+=cache[b][c]; - - seq1=a; - real=(S->seq_al[ci][seq1]=='I')?1:0; - fprintf ( stdout, ">%-10s %d ", A->name[seq1], real); - - - - for (max_score=0,b=0; blen_aln; b++) - max_score+=cache[26][b]; - - for (score=0,b=0; blen_aln; b++) - { - res=tolower (A->seq_al[seq1][b]); - if ( cache[26][b]==0) continue; - if ( !is_gap(res)) - { - score+=cache[res-'a'][b]; - } - /*fprintf ( stdout, "%c[%3d]", res,b);*/ - } - fprintf ( stdout, " SCORE: %5d SPRED %d RATIO: %.2f \n", (int)score, a, (score*100)/max_score); - list[a][0]=real; - - if ( strstr (mode, "SIMTEST"))list[a][1]=(score*100)/max_score; - else list[a][1]=(score*100)/max_score; - free_int (cache, -1); - } - - - sort_int_inv (list, 2, 1, 0, A->nseq-1); - for (n=0, a=0; anseq; a++) - { - n+=list[a][0]; - } - - for (best_delta=100000,best_tp=0,tp=0,fp=0,best_fp=0,a=0; anseq; a++) - { - - tp+=list[a][0]; - fp+=1-list[a][0]; - delta=(n-(tp+fp)); - if (FABS(delta)nseq-(tp+fp+fn); - R[1][1]=tp; - R[1][0]=fp; - R[0][1]=fn; - R[0][0]=tn; - free_int (list, -1); - - return R; -} -float** jacknife6 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode) -{ - int seq1, ci, a,b, c,d,e,f, n; - double score; - float **R; - - - int tp, tn, fn, fp; - int best_tp, best_fp; - int delta, best_delta; - int **list; - - ci=name_is_in_list ( compound, S->name, S->nseq, -1); - list=declare_int (A->len_aln,2); - R=declare_float (2, 2); - - - for ( a=0; anseq; a++) - { - int sar, res; - int **new_cache; - - ns[0]=A->nseq-1; - ns[1]=1; - for (c=0,b=0; bnseq; b++) - if (a!=b)ls[0][c++]=b; - ls[1][0]=a; - - cache=sar2cache_proba_new (A, ns, ls,S, compound, mode); - - - new_cache=declare_int (27,A->len_aln); - - for (d=0; d< A->len_aln; d++) - { - int **analyze; - if ( cache[26][d]==0)continue; - analyze=declare_int (26, 2); - - for ( e=0; e< ns[0]; e++) - { - f=ls[0][e]; - sar=(S->seq_al[ci][f]=='I')?1:0; - res=tolower (A->seq_al[f][d]); - - if ( res=='-') continue; - analyze[res-'a'][sar]++; - } - for (e=0;e<26; e++) - { - if ( analyze[e][1]){new_cache[26][d]=1;new_cache[e][d]+=cache[e][d];} - /* - if ( analyze[e][0] && analyze[e][1]){new_cache[26][d]=1;new_cache[e][d]+=analyze[e][1];} - else if ( analyze[e][0]){new_cache[26][d]=1;new_cache[e][d]-=analyze[e][0]*10;} - else if ( analyze[e][1]){new_cache[26][d]=1;new_cache[e][d]+=analyze[e][1];} - else if ( !analyze[e][0] &&!analyze[e][1]); - */ - } - free_int (analyze, -1); - } - - seq1=a; - sar=(S->seq_al[ci][seq1]=='I')?1:0; - fprintf ( stdout, ">%-10s %d ", A->name[seq1], sar); - - for (score=0,b=0; blen_aln; b++) - { - res=tolower (A->seq_al[seq1][b]); - if ( cache[26][b]==0) continue; - if ( !is_gap(res)) - { - score+=new_cache[res-'a'][b]; - } - } - fprintf ( stdout, " SCORE: %5d SPRED\n", (int)score); - list[seq1][0]=sar; - list[seq1][1]=(int)score; - - free_int (new_cache, -1); - free_int (cache, -1); - } - sort_int_inv (list, 2, 1, 0, A->nseq-1); - for (n=0, a=0; anseq; a++)n+=list[a][0]; - for (best_delta=100000,best_tp=0,tp=0,fp=0,best_fp=0,a=0; anseq; a++) - { - tp+=list[a][0]; - fp+=1-list[a][0]; - delta=(n-(tp+fp)); - if (FABS(delta)nseq-(tp+fp+fn); - R[1][1]=tp; - R[1][0]=fp; - R[0][1]=fn; - R[0][0]=tn; - free_int (list, -1); - - - return R; -} -float** cache2pred_new (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode) -{ - int s1, seq1, ci, a,b, n; - double score; - float **R; - - - int tp, tn, fn, fp; - int best_tp, best_fp; - int delta, best_delta; - int **list; - - ci=name_is_in_list ( compound, S->name, S->nseq, -1); - list=declare_int ( ns[1],2); - R=declare_float (2, 2); - - for (s1=0; s1seq_al[ci][seq1]=='I')?1:0; - fprintf ( stdout, ">%-10s %d ", A->name[seq1], real); - for (score=0,b=0; blen_aln; b++) - { - res=tolower (A->seq_al[seq1][b]); - if ( cache[26][b]==0) continue; - if ( !is_gap(res)) - { - score+=cache[res-'a'][b]; - } - fprintf ( stdout, "%c", res); - } - fprintf ( stdout, " SCORE: %5d SPRED\n", (int)score); - list[s1][0]=real; - list[s1][1]=(int)score; - } - - sort_int_inv (list, 2, 1, 0, ns[1]-1); - - for (n=0, a=0; aname, S->nseq, -1); - list=declare_int ( ns[1],2); - R=declare_float (2, 2); - - for (s1=0; s1seq_al[ci][seq1]=='I')?1:0; - fprintf ( stdout, ">%-10s %d ", A->name[seq1], real); - for (score=0,b=0; blen_aln; b++) - { - res=tolower (A->seq_al[seq1][b]); - if ( cache[26][b]==0) continue; - if ( !is_gap(res)) - { - score+=cache[res-'a'][b]; - } - fprintf ( stdout, "%c", res); - } - fprintf ( stdout, " SCORE: %5d SPRED\n", (int)score); - list[s1][0]=real; - list[s1][1]=(int)score; - } - new_cache=declare_int (27,A->len_aln); - for (a=0; a< A->len_aln; a++) - { - int **analyze, real, res, d; - int *res_type; - int **sub; - int *keep; - keep=vcalloc ( 26, sizeof (int)); - res_type=vcalloc ( 26, sizeof (int)); - sub=declare_int (256, 2); - - if ( cache[26][a]==0)continue; - analyze=declare_int (26, 2); - for ( b=0; b< ns[0]; b++) - { - seq1=ls[0][b]; - real=(S->seq_al[ci][seq1]=='I')?1:0; - res=tolower (A->seq_al[seq1][a]); - - if ( res=='-') continue; - analyze[res-'a'][real]++; - } - fprintf ( stdout, "RSPRED: "); - for (c=0;c<26; c++)fprintf ( stdout, "%c", c+'a'); - fprintf ( stdout, "\nRSPRED: "); - for (c=0;c<26; c++) - { - if ( analyze[c][0] && analyze[c][1]){fprintf ( stdout, "1");res_type[c]='1';} - else if ( analyze[c][0]){new_cache[26][a]=1;new_cache[c][a]-=analyze[c][0];fprintf ( stdout, "0");res_type[c]='0';} - else if ( analyze[c][1]){new_cache[26][a]=1;new_cache[c][a]+=analyze[c][1];fprintf ( stdout, "1");res_type[c]='1';} - else if ( !analyze[c][0] &&!analyze[c][1]){fprintf ( stdout, "-");res_type[c]='-';} - } - - - for ( c=0; c<26; c++) - { - for ( d=0; d<26; d++) - { - - if ( res_type[c]==res_type[d]) - { - sub[res_type[c]][0]+=mat[c][d]; - sub[res_type[c]][1]++; - } - if ( res_type[c]!='-' && res_type[d]!='-') - { - sub['m'][0]+=mat[c][d]; - sub['m'][1]++; - } - } - } - for ( c=0; c< 256; c++) - { - if ( sub[c][1])fprintf ( stdout, " %c: %5.2f ", c, (float)sub[c][0]/(float)sub[c][1]); - } - fprintf ( stdout, " SC: %d\nRSPRED ", cache[26][a]); - - for ( c=0; c<26; c++) - if ( res_type[c]=='1') - { - for (d=0; d<26; d++) - if (mat[c][d]>0)keep[d]++; - keep[c]=9; - } - - for (c=0; c<26; c++) - { - if ( keep[c]>10)fprintf ( stdout, "9"); - else fprintf ( stdout, "%d", keep[c]); - } - for ( c=0; c<26; c++) - { - if ( keep[c]>8)new_cache[c][a]=10; - else new_cache[c][a]=-10; - } - fprintf ( stdout, "\n"); - free_int (analyze, -1); - free_int (sub, -1); - vfree (res_type); - vfree (keep); - - } - for ( a=0; a<25; a++) - for (b=a+1; b<26; b++) - { - int r1, r2; - r1=a+'a';r2=b+'a'; - if ( strchr("bjoxz", r1))continue; - if ( strchr("bjoxz",r2))continue; - - if ( mat[a][b]>0 && a!=b)fprintf ( stdout, "\nMATANALYZE %c %c %d", a+'a', b+'a', mat[a][b]); - } - - for (s1=0; s1seq_al[ci][seq1]=='I')?1:0; - fprintf ( stdout, ">%-10s %d ", A->name[seq1], real); - for (score=0,b=0; blen_aln; b++) - { - res=tolower (A->seq_al[seq1][b]); - if ( cache[26][b]==0) continue; - if ( !is_gap(res)) - { - score+=new_cache[res-'a'][b]; - } - fprintf ( stdout, "%c", res); - } - fprintf ( stdout, " SCORE: %5d SPRED\n", (int)score); - list[s1][0]=real; - list[s1][1]=(int)score; - } - free_int (new_cache, -1); - sort_int_inv (list, 2, 1, 0, ns[1]-1); - - - for (n=0, a=0; aname, S->nseq, -1); - cache=declare_int (A->nseq, A->len_aln); - - strget_param ( mode, "_FILTER1_", "0" , "%f", &T1); - strget_param ( mode, "_FILTER2_", "1000000", "%f", &T2); - strget_param ( mode, "_FILTER3_", "0" , "%f", &T3); - strget_param ( mode, "_FILTER4_", "1000000", "%f", &T4); - strget_param ( mode, "_SIMWEIGHT_", "1", "%d", &sim_weight); - strget_param ( mode, "_SWTHR_", "30", "%d", &sw_thr); - strget_param (mode, "_TRAIN_","1", "%d", &train_mode); - strget_param (mode, "_ZSCORE_","0", "%f", &zscore); - - - - - - if (sim_weight==1 && !sim) sim=aln2sim_mat(A, "idmat"); - for ( ms=0; msseq_al[ci][mseq]!='I')continue; - - list=declare_int (A->len_aln+1, nfield); - for (t=0,n=0, col=0; col< A->len_aln; col++) - { - int same_res; - - mres=tolower(A->seq_al[mseq][col]); - list[col][RES]=mres; - list[col][COL_INDEX]=col; - - if ( is_gap(mres))continue; - for ( s=0; sseq_al[seq][col]); - if (is_gap(res))continue; - - - if (sim_weight==1) - { - w=sim[seq][mseq];w=(mres==res)?100-w:w; - if (wseq_al[ci][seq]=='I')same_res=1; - else same_res=(res==mres)?1:0; - } - else - same_res=(res==mres)?1:0; - - list[col][N]+=w; - - if (S->seq_al[ci][seq]=='I' && same_res)list[col][N11]+=w; - else if (S->seq_al[ci][seq]=='I' && same_res)list[col][N10]+=w; - else if (S->seq_al[ci][seq]=='O' && same_res)list[col][N01]+=w; - else if (S->seq_al[ci][seq]=='O' && same_res)list[col][N00]+=w; - - if ( S->seq_al[ci][seq]=='I')list[col][N1sar]+=w; - if ( same_res)list[col][N1msa]+=w; - - } - - list[col][SCORE]=(int)evaluate_sar_score1 (list[col][N], list[col][N11], list[col][N1msa], list[col][N1sar]); - - } - - strget_param ( mode, "_MAXN1_", "5", "%d", &maxn1); - strget_param ( mode, "_WEIGHT_", "1", "%d", &weight_mode); - strget_param ( mode, "_QUANT_", "0.0", "%f", &quant); - - sort_int_inv (list,nfield,SCORE,0,A->len_aln-1); - if ( quant !=0) - { - - n=quantile_rank ( list,SCORE, A->len_aln,quant); - sort_int (list,nfield,N1msa, 0, n-1); - maxn1=MIN(n,maxn1); - } - - for (a=0; aT1 && valuelen_aln+1, nfield); - for ( col=0; col< A->len_aln; col++) - { - list[col][COL_INDEX]=col; - for ( s=0; slen_aln; col++) - if (list[col][SCORE]T4) - { - list[col][SCORE]=0; - for (s=0; s< A->nseq; s++) - if (!is_gap(A->seq_al[s][col]))cache[s][col]=0; - } - - /*Keep The N Best Columns*/ - if ( zscore!=0) - { - double sum=0, sum2=0, z; - int n=0; - for (a=0; a< A->len_aln; a++) - { - if ( list[a][SCORE]>0) - { - sum+=list[a][SCORE]; - sum2+=list[a][SCORE]*list[a][SCORE]; - n++; - } - } - for (a=0; alen_aln; a++) - { - if ( list[a][SCORE]>0) - { - z=return_z_score (list[a][SCORE], sum, sum2,n); - if ((float)znseq; s++) - cache [s][col]=0; - } - else - { - fprintf ( stdout, "\nZSCORE: KEEP COL %d SCORE: %f SCORE: %d\n", list[a][COL_INDEX], (float)z, list[a][SCORE]); - } - } - } - } - else - { - sort_int_inv (list,nfield,SCORE,0,A->len_aln-1); - strget_param ( mode, "_MAXN2_", "100000", "%d", &maxn2); - - for (a=maxn2;alen_aln; a++) - { - col=list[a][COL_INDEX]; - for (s=0; snseq; s++) - cache [s][col]=0; - } - } - - /*Get Rid of the N best Columns*/; - strget_param ( mode, "_MAXN3_", "0", "%d", &maxn3); - - for (a=0; anseq; s++) - cache [s][col]=0; - } - - return cache; -} -int aln2n_comp_col ( Alignment *A, Alignment *S, int ci) -{ - int res, seq,sar, col, r; - int **analyze; - - int tot=0; - - analyze=declare_int (27, 2); - for ( col=0; col< A->len_aln; col++) - { - int n1, n0; - - - for ( n1=0, n0=0,seq=0; seqnseq; seq++) - { - res=tolower(A->seq_al[seq][col]); - sar=(S->seq_al[ci][seq]=='I')?1:0; - n1+=(sar==1)?1:0; - n0+=(sar==0)?1:0; - if ( res=='-')continue; - res-='a'; - analyze[res][sar]++; - } - - for (r=0; r<26; r++) - { - int a0,a1; - a0=analyze[r][0]; - a1=analyze[r][1]; - - - if ( a1==n1 && a0len_aln, 2); - cache=declare_int ( 27, A->len_aln); - analyze=declare_int (27, 2); - - ci=name_is_in_list ( compound, S->name, S->nseq, -1); - - for ( col=0; col< A->len_aln; col++) - { - int n1, n0; - - - for ( n1=0, n0=0,s=0; sseq_al[seq][col]); - sar=(S->seq_al[ci][seq]=='I')?1:0; - n1+=(sar==1)?1:0; - n0+=(sar==0)?1:0; - if ( res=='-')continue; - res-='a'; - - analyze[res][sar]++; - } - - for (r=0; r<26; r++) - { - - a0=analyze[r][0]; - a1=analyze[r][1]; - - if ( strstr (mode, "SIMTEST")) - { - w=a1; - } - else if (a1 ) - { - w=n0-a0; - } - else w=0; - - cache[r][col]+=w; - cache[26][col]=MAX(w, cache[26][col]); - } - - for ( r=0; r<26; r++)analyze[r][0]=analyze[r][1]=0; - list[col][0]=col; - list[col][1]=cache[26][col]; - } - - free_int (analyze, -1); - - sort_int_inv (list, 2, 1, 0, A->len_aln-1); - - strget_param ( mode, "_MAXN2_", "100000", "%d", &maxn2); - - for ( col=maxn2; collen_aln; col++) - for ( r=0; r<=26; r++)cache[r][list[col][0]]=0; - - free_int (list, -1); - return cache; -} - - -int **sar2cache_count2 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode) -{ - int maxn2, res, seq,sar, ci, col,s, r; - int **analyze, **list, **cache, **conseq; - static int **mat; - int w=0; - if (!mat) mat=read_matrice ("blosum62mt"); - - - list=declare_int ( A->len_aln, 2); - cache=declare_int ( 27, A->len_aln); - conseq=declare_int ( A->len_aln,3); - - analyze=declare_int (27, 2); - - ci=name_is_in_list ( compound, S->name, S->nseq, -1); - for ( col=0; col< A->len_aln; col++) - { - int n1, n0; - - for ( n1=0, n0=0,s=0; sseq_al[seq][col]); - sar=(S->seq_al[ci][seq]=='I')?1:0; - n1+=(sar==1)?1:0; - n0+=(sar==0)?1:0; - if ( res=='-')continue; - res-='a'; - analyze[res][sar]++; - } - for (r=0; r<26; r++) - { - int a0,a1; - a0=analyze[r][0]; - a1=analyze[r][1]; - if ( a1==n1 && a0len_aln; col++) - { - - res=tolower(A->seq_al[seq][col]); - if ( is_gap(res))continue; - else res-='a'; - - if ( conseq[col][1] && res!=conseq[col][0])w1++; - if ( conseq[col][1])w2++; - } - for (col=0; collen_aln; col++) - { - res=tolower(A->seq_al[seq][col]); - if ( is_gap(res))continue; - else res-='a'; - - if ( conseq[col][1] && res!=conseq[col][0])conseq[col][2]+=(w2-w1); - } - } - - for (col=0; collen_aln; col++) - { - r=conseq[col][0]; - w=conseq[col][2]; - - - cache[r][col]=cache[26][col]=list[col][1]=w; - list[col][0]=col; - } - sort_int_inv (list, 2, 1, 0, A->len_aln-1); - strget_param ( mode, "_MAXN2_", "100000", "%d", &maxn2); - - for ( col=maxn2; collen_aln; col++) - for ( r=0; r<=26; r++)cache[r][list[col][0]]=0; - - - free_int (list, -1); - return cache; -} - -int **sar2cache_count3 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode) -{ - int maxn2, res, seq,sar, ci, col,s, r, a1, a0, n1, n0; - int **analyze, **list, **cache; - static int **mat; - - if (!mat) mat=read_matrice ("blosum62mt"); - - - list=declare_int ( A->len_aln, 2); - cache=declare_int ( 27, A->len_aln); - analyze=declare_int (27, 2); - - ci=name_is_in_list ( compound, S->name, S->nseq, -1); - - for ( col=0; col< A->len_aln; col++) - { - double e, g; - for ( n1=0, n0=0,s=0; sseq_al[seq][col]); - sar=(S->seq_al[ci][seq]=='I')?1:0; - n1+=(sar==1)?1:0; - n0+=(sar==0)?1:0; - if ( res=='-')continue; - res-='a'; - - analyze[res][sar]++; - } - - /*Gap*/ - for (g=0,r=0; rnseq; r++) - g+=is_gap(A->seq_al[r][col]); - g=(100*g)/A->nseq; - - /*enthropy - for (e=0, r=0; r<26; r++) - { - a0=analyze[r][0]; - a1=analyze[r][1]; - t=a0+a1; - - if (t>0) - e+= t/(double)A->nseq*log(t/(double)A->nseq); - } - e*=-1; - */ - e=0; - if (g>10) continue; - if (e>10) continue; - - if ( strstr ( mode, "SIMTEST")) - { - for (r=0; r<26; r++) - { - - a0=analyze[r][0]; - a1=analyze[r][1]; - - if (a1) - { - cache[r][col]=a1; - cache[26][col]=MAX(cache[26][col],a1); - } - } - } - else - { - - - - for (r=0; r<26; r++) - { - - a0=analyze[r][0]; - a1=analyze[r][1]; - - if (!a1 && a0) - { - cache[r][col]=a0; - cache[26][col]=MAX(cache[26][col],a0); - } - } - } - - for ( r=0; r<26; r++)analyze[r][0]=analyze[r][1]=0; - list[col][0]=col; - list[col][1]=cache[26][col]; - } - - free_int (analyze, -1); - - sort_int_inv (list, 2, 1, 0, A->len_aln-1); - - strget_param ( mode, "_MAXN2_", "100000", "%d", &maxn2); - - for ( col=maxn2; collen_aln; col++) - for ( r=0; r<=26; r++)cache[r][list[col][0]]=0; - - free_int (list, -1); - return cache; -} - - -int **sar2cache_proba_new ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode) -{ - int col, s, seq,ms,mseq, res, mres, res1, n,maxn1, maxn2,maxn3, t, ci, a,w; - - int **list; - - int N1msa,N1sar, N, N11, N10, N01,N00, SCORE, COL_INDEX, RES; - int nfield=0; - int value; - - - int **cache; - static int **sim; - int sw_thr; - float zscore; - - RES=nfield++;COL_INDEX=nfield++;N1msa=nfield++;N1sar=nfield++;N=nfield++;N11=nfield++;N10=nfield++;N01=nfield++;N00=nfield++;SCORE=nfield++; - ci=name_is_in_list ( compound, S->name, S->nseq, -1); - cache=declare_int (27, A->len_aln); - - strget_param ( mode, "_SWTHR_", "30", "%d", &sw_thr); - strget_param (mode, "_ZSCORE_","0", "%f", &zscore); - - - if (!sim)sim=aln2sim_mat(A, "idmat"); - for ( ms=0; msseq_al[ci][mseq]!='I')continue; - - list=declare_int (A->len_aln+1, nfield); - for (t=0,n=0, col=0; col< A->len_aln; col++) - { - int same_res; - - mres=tolower(A->seq_al[mseq][col]); - if ( is_gap(mres))continue; - - list[col][RES]=mres; - list[col][COL_INDEX]=col; - - for ( s=0; sseq_al[seq][col]); - if (is_gap(res))continue; - w=sim[seq][mseq];w=(mres==res)?100-w:w; - if (wseq_al[ci][seq]=='I' && same_res)list[col][N11]+=w; - else if (S->seq_al[ci][seq]=='I' && same_res)list[col][N10]+=w; - else if (S->seq_al[ci][seq]=='O' && same_res)list[col][N01]+=w; - else if (S->seq_al[ci][seq]=='O' && same_res)list[col][N00]+=w; - - if ( S->seq_al[ci][seq]=='I')list[col][N1sar]+=w; - if ( same_res)list[col][N1msa]+=w; - - } - - list[col][SCORE]=(int)evaluate_sar_score1 (list[col][N], list[col][N11], list[col][N1msa], list[col][N1sar]); - - } - strget_param ( mode, "_MAXN1_", "5", "%d", &maxn1); - sort_int_inv (list,nfield,SCORE,0,A->len_aln-1); - for (a=0; alen_aln+1, nfield); - for ( col=0; col< A->len_aln; col++) - { - list[col][COL_INDEX]=col; - list[col][SCORE]=cache[26][col]; - } - /*Keep The N Best Columns*/ - if ( zscore!=0) - { - double sum=0, sum2=0, z; - int n=0; - for (a=0; a< A->len_aln; a++) - { - if ( list[a][SCORE]>0) - { - sum+=list[a][SCORE]; - sum2+=list[a][SCORE]*list[a][SCORE]; - n++; - } - } - for (a=0; alen_aln; a++) - { - if ( list[a][SCORE]>0) - { - z=return_z_score (list[a][SCORE], sum, sum2,n); - if ((float)zlen_aln-1); - strget_param ( mode, "_MAXN2_", "100000", "%d", &maxn2); - - for (a=maxn2;alen_aln; a++) - { - col=list[a][COL_INDEX]; - for (s=0; s<27; s++) - cache [s][col]=0; - } - } - - /*Get Rid of the N best Columns*/; - strget_param ( mode, "_MAXN3_", "0", "%d", &maxn3); - - for (a=0; aname, S->nseq, -1); - cache=declare_int (A->nseq, A->len_aln); - - - for ( ms=0; msseq_al[ci][mseq]!='I')continue; - - list=declare_int (A->len_aln+1, 5); - for (t=0,n=0, col=0; col< A->len_aln; col++) - { - mres=tolower(A->seq_al[mseq][col]); - list[col][0]=mres; - list[col][1]=col; - - if ( is_gap(mres))continue; - for ( s=0; sseq_al[seq][col]); - if (is_gap(res))continue; - - if (S->seq_al[ci][seq]=='I' && res==mres)list[col][3]++; - if (res==mres)list[col][2]++; - } - } - - sort_int_inv (list,5,3,0,A->len_aln-1); - - strget_param ( mode, "_MAXN1_", "5", "%d", &maxn1); - strget_param ( mode, "_QUANT_", "0.95", "%f", &quant); - - n=quantile_rank ( list, 3, A->len_aln,quant); - sort_int (list, 5, 2, 0, n-1); - - for (a=0; anseq, A->len_aln); - ci=name_is_in_list ( compound, S->name, S->nseq, -1); - - strget_param ( mode, "_FILTER1_", "0" , "%f", &T1); - strget_param ( mode, "_FILTER2_", "1000000", "%f", &T2); - strget_param ( mode, "_FILTER3_", "0" , "%f", &T3); - strget_param ( mode, "_FILTER4_", "1000000", "%f", &T4); - - list=declare_int (A->len_aln+1,A->nseq+2); - SCORE=A->nseq; - COL=A->nseq+1; - - for ( ms=0; msseq_al[ci][mseq]!='I')continue; - - for (t=0,n=0, col=0; col< A->len_aln; col++) - { - int N11=0,N10=0,N01=0,N00=0,N1sar=0,N1msa=0,N=0; - - mres=tolower(A->seq_al[mseq][col]); - if ( is_gap(mres))continue; - for ( s=0; sseq_al[seq][col]); - if (is_gap(res))continue; - - N++; - if (S->seq_al[ci][seq]=='I' && res==mres)N11++; - else if (S->seq_al[ci][seq]=='I' && res!=mres)N10++; - else if (S->seq_al[ci][seq]=='O' && res==mres)N01++; - else if (S->seq_al[ci][seq]=='O' && res!=mres)N00++; - - if ( S->seq_al[ci][seq]=='I')N1sar++; - if ( res==mres)N1msa++; - } - list[col][mseq]=(int)evaluate_sar_score1 (N,N11,N1msa,N1sar); - list[col][SCORE]+=list[col][mseq]; - list[col][COL]=col; - } - } - - strget_param ( mode, "_MAXN1_", "5", "%d", &maxn1); - strget_param ( mode, "_QUANT_", "0.95", "%f", &quant); - sort_int_inv (list,A->nseq+2,SCORE, 0, A->len_aln-1); - n=quantile_rank ( list,A->nseq, A->len_aln,quant); - n=5; - - - for (a=0; anseq; b++) - { - value=list[col][b]; - if ( value>T1 && valuenseq, 2); - seq_list=declare_int (A->nseq, 2); - for (a=0; a< A->nseq; a++) - { - if (comp[a]=='I') - { - sar_list[nsar][0]=a; - sar_list[nsar][1]=rand()%100000; - nsar++; - } - else - { - seq_list[nseq][0]=a; - seq_list[nseq][1]=rand()%100000; - nseq++; - } - } - - - l1[0]=vcalloc (A->nseq, sizeof (int)); - l2[0]=vcalloc (A->nseq, sizeof (int)); - nl1[0]=nl2[0]=0; - - sort_int (seq_list, 2, 1, 0,nseq-1); - sort_int (sar_list, 2, 1, 0,nsar-1); - mid=nsar/2; - for (a=0; anseq, 2); - l1[0]=vcalloc (A->nseq, sizeof (int)); - l2[0]=vcalloc (A->nseq, sizeof (int)); - nl1[0]=nl2[0]; - - vsrand (0); - for ( a=0; a< A->nseq; a++) - { - list[a][0]=a; - list[a][1]=rand()%100000; - } - sort_int (list, 2, 1, 0,A->nseq-1); - mid=A->nseq/2; - for (a=0; anseq; b++, a++) - { - l2[0][nl2[0]++]=list[b][0]; - } - - free_int (list, -1); - return 1; -} -int aln2jack_group1 (Alignment *A, int seq, int **l1, int *nl1, int **l2, int *nl2) -{ - int **sim; - int **list; - int a, mid; - - list=declare_int ( A->nseq, 3); - l1[0]=vcalloc (A->nseq, sizeof (int)); - l2[0]=vcalloc (A->nseq, sizeof (int)); - nl1[0]=nl2[0]; - - sim=aln2sim_mat (A, "idmat"); - for ( a=0; a< A->nseq; a++) - { - list[a][0]=seq; - list[a][1]=a; - list[a][2]=(a==seq)?100:sim[seq][a]; - } - sort_int_inv (list, 3, 2, 0, A->nseq-1); - fprintf ( stderr, "\nJacknife fromsequence %s [%d]\n", A->name[seq], seq); - mid=A->nseq/2; - for (a=0; a< mid; a++) - l1[0][nl1[0]++]=list[a][1]; - for (a=mid; anseq; a++) - l2[0][nl2[0]++]=list[a][1]; - return 1; -} - - -int sarset2subsarset ( Alignment *A, Alignment *S, Alignment **subA, Alignment **subS, Alignment *SUB) -{ - Alignment *rotS, *intS; - int a,b, *list, nl; - - list=vcalloc ( SUB->nseq, sizeof (int)); - for (nl=0,a=0; anseq; a++) - { - b=name_is_in_list(SUB->name[a], A->name, A->nseq, 100); - if ( b!=-1)list[nl++]=b; - } - - subA[0]=extract_sub_aln (A, nl, list); - rotS=rotate_aln (S, NULL); - intS=extract_sub_aln (rotS, nl, list); - - subS[0]=rotate_aln (intS, NULL); - - for ( a=0; anseq; a++) sprintf ( (subS[0])->name[a], "%s", S->name[a]); - - - return 0; -} - -int ***simple_sar_analyze_vot ( Alignment *A, Alignment *SAR, char *mode) -{ - int a, b, c, d; - int res1, res2, sar1, sar2; - float s; - int **sim; - static float ***result; - static int ***iresult; - if (!result) - { - result=declare_arrayN (3,sizeof (float),SAR->nseq, A->len_aln,3); - iresult=declare_arrayN (3,sizeof (int),SAR->nseq, A->len_aln,3); - } - - sim=aln2sim_mat (A, "idmat"); - - - for (a=0; anseq; a++) - for (b=0; blen_aln; b++) - result[a][b][0]=1; - - for ( a=0; a< SAR->nseq; a++) - for ( b=0; bnseq-1; b++) - for ( c=b+1; c< A->nseq; c++) - for ( d=0; dlen_aln; d++) - { - res1=A->seq_al[b][d]; - res2=A->seq_al[c][d]; - - sar1=(SAR->seq_al[a][b]=='I')?1:0; - sar2=(SAR->seq_al[a][c]=='I')?1:0; - - s=sim[b][c]; - - - - - if ( sar1!=sar2 && res1!=res2) - result[a][d][0]*=(1/(100-s)); - - else if ( sar1==sar2 && sar1==1 && res1==res2) - result[a][d][0]*=1/s; - - - - - /* - else if ( sar1==sar2 && res1==res2)result[a][d][0]+=(100-s)*(100-s); - else if ( sar1==sar2 && res1!=res2)result[a][d][0]-=s*s; - else if ( sar1!=sar2 && res1==res2)result[a][d][0]-=(100-s)*(100-s); - */ - - result[a][d][1]='a'; - } - for ( a=0; anseq; a++) - for ( b=0; blen_aln; b++) - { - fprintf ( stderr, "\n%f", result[a][b][0]); - iresult[a][b][0]=100*log(1-result[a][b][0]); - } - return iresult; -} -int display_simple_sar_analyze_pair_col (Alignment *A, Alignment *SAR, char *mode) -{ - int **r; - int a, b, n, do_tm; - - - Alignment *rA; - int *nI; - - - strget_param (mode, "_TM_", "0", "%d", &do_tm); - r=simple_sar_analyze_pair_col (A, SAR,mode); - rA=rotate_aln (A, NULL); - n=0; - - nI=vcalloc ( SAR->nseq, sizeof (int)); - for (a=0; a< SAR->nseq; a++) - for (b=0; blen_aln; b++) nI[a]+=(SAR->seq_al[a][b]=='I')?1:0; - - - - while ( r[n][0]!=-1) - { - if (r[n][3]>0) - {fprintf ( stdout, "COMP S: %3d %3d %s %20s %2d #\n", r[n][3],0,SAR->seq_al[r[n][0]], SAR->name[r[n][0]], nI[r[n][0]]); - fprintf ( stdout, "SEQ1 S: %3d %3d %s %20s %2d #\n", r[n][3],r[n][1],rA->seq_al[r[n][1]], SAR->name[r[n][0]],nI[r[n][0]]); - fprintf ( stdout, "SEQ2 S: %3d %3d %s %20s %2d #\n\n", r[n][3],r[n][2],rA->seq_al[r[n][2]], SAR->name[r[n][0]],nI[r[n][0]]); - } - n++; - } - return 0; -} -int display_simple_sar_analyze_col (Alignment *A, Alignment *SAR, char *mode) -{ - int ***result, **r2, **r3, **r4, **aa; - int a, b, c, n; - char *cons; - int threshold=20; - int do_tm; - strget_param (mode, "_TM_", "0", "%d", &do_tm); - result=simple_sar_analyze_col (A, SAR,mode); - r2=declare_int (A->len_aln*SAR->nseq, 5); - r3=declare_int (A->len_aln+1, 5); - r4=declare_int (A->len_aln+1, SAR->nseq+1); - aa=declare_int (2, 256); - cons=vcalloc (A->len_aln+1, sizeof (char)); - for (a=0; alen_aln; a++){r3[a][0]=a;cons[a]='A';} - - - - for (n=0,a=0; a< SAR->nseq; a++) - { - double sum, sum2; - for (sum=0, sum2=0,b=0; blen_aln; b++) - { - sum+=result[a][b][0]; - sum2+=result[a][b][0]*result[a][b][0]; - } - - for (b=0; blen_aln; b++, n++) - { - r2[n][0]=a;//compound - r2[n][1]=b;//pos - r2[n][2]=result[a][b][1]; //AA - r2[n][3]=result[a][b][0]; //Score - r2[n][4]=result[a][b][2]; //(int)10*return_z_score ((double)result[a][b][0], sum, sum2, A->len_aln); //ZScore - } - } - sort_int (r2,5, 3, 0, n-1);//sort on Score (3rd field) - for ( a=0; a< n; a++) - { - int comp, pos, bad; - - comp=r2[a][0]; - pos=r2[a][1]; - fprintf ( stdout, "SEQ %5d %5d %5d %s ",r2[a][1]+1,r2[a][3], r2[a][4], (do_tm)?alnpos2hmmtop_pred (A, NULL, r2[a][1], SHORT):"x"); - for (c=0; cnseq; c++)fprintf (stdout, "%c", A->seq_al[c][r2[a][1]]); - - - - bad=0; - for (c=0; c< A->nseq; c++) - { - int activity, res; - - activity=SAR->seq_al[comp][c]; - res=A->seq_al[c][pos]; - - if (activity=='O')aa[0][res]++; - if (activity=='I')aa[1][res]++; - } - - for (c=0; c< A->nseq; c++) - { - int activity, res; - activity=SAR->seq_al[comp][c]; - res=A->seq_al[c][pos]; - bad+=(aa[0][res] && aa[1][res])?1:0; - aa[0][res]=aa[1][res]=0; - } - fprintf ( stdout, " %20s %d |\nCOM %5d %5d %5d %s %s %20s %d |\n\n", SAR->name[r2[a][0]],bad,r2[a][1]+1,r2[a][3],r2[a][4], (do_tm)?alnpos2hmmtop_pred (A, NULL, r2[a][1], SHORT):"x",SAR->seq_al[r2[a][0]], SAR->name[r2[a][0]], bad); - - - if (r2[a][4]>threshold) - { - cons[r2[a][1]]++; - r3[r2[a][1]][1]++; - r3[r2[a][1]][2]+=r2[a][3]; - r4[r2[a][1]][r2[a][0]]=1; - } - } - sort_int (r3, 3,1,0, A->len_aln-1); - - for (a=0; alen_aln; a++) - { - if (r3[a][1]>0) - { - fprintf ( stdout, "\nPOS %4d %4d %4d %c ", r3[a][0]+1, r3[a][1], r3[a][2], cons[r3[a][0]]); - for (b=0; bnseq; b++)fprintf ( stdout, "%d", r4[r3[a][0]][b]); - if (do_tm)fprintf ( stdout, " %s",alnpos2hmmtop_pred (A, NULL, r3[a][0], VERBOSE)); - } - } - for (a=0; a< A->nseq; a++)fprintf ( stdout, "\n#MSA >%s\n#MSA %s",A->name[a], A->seq_al[a]); - fprintf ( stdout, "\n#MSA >cons\n#MSA %s", cons); - - return 0; -} -int *** simple_sar_predict (Alignment *A, Alignment *SAR, char *mode) -{ - //This function estimates the z score of every poition with every compound - //The best Z-score position is then used for the prediction - - int a, b, c, nts, pos, Rscore,Zscore; - int ***r; - int ***pred; - int **aa; - - - aa=declare_int (2,256); - pred=declare_arrayN (3, sizeof (int),SAR->nseq, A->nseq, 5); - - - r=simple_sar_analyze_col (A, SAR, mode); - nts=SAR->len_aln; //number_trainning_sequences; - - for (a=0; anseq; a++) - { - sort_int (r[a],4, 2, 0, A->len_aln-1); - - pos=r[a][A->len_aln-1][3]; //Best Position - Zscore=r[a][A->len_aln-1][2]; //Best Z-Score - Rscore=r[a][A->len_aln-1][0]; //Best Z-Score - - - for (c=0; cseq_al[a][c]=='I')aa[1][(int)A->seq_al[c][pos]]++;//Build Positive Alphabet for Compound a - if (SAR->seq_al[a][c]=='O')aa[0][(int)A->seq_al[c][pos]]++;//Build Positive Alphabet for Compound a - } - for (c=nts; cnseq; c++) - { - pred[a][c][0]=pos; - pred[a][c][1]=Zscore; - pred[a][c][2]=Rscore; - if (aa[1][(int)A->seq_al[c][pos]]>0) - { - - pred[a][c][3]=aa[1][(int)A->seq_al[c][pos]]; - pred[a][c][4]=aa[0][(int)A->seq_al[c][pos]]; - } - } - for (c=0; cseq_al[c][pos]]=aa[1][(int)A->seq_al[c][pos]]=0; - } - - for ( a=nts; a< A->nseq; a++) - { - for ( b=0; bnseq; b++) - { - fprintf ( stdout, ">%-25s %-25s Pos %3d ZScore %3d Rscore %3d Activity +: %d -: %d ", A->name [a], SAR->name[b], pred[b][a][0],pred[b][a][1], pred[b][a][2], pred[b][a][3], pred[b][a][4]); - if (pred[b][a][4]==0)for (c=0; cnseq; c++)fprintf ( stdout, "%c", A->seq_al[c][ pred[b][a][0]]); - fprintf ( stdout, " %s\n", SAR->name[b]); - for (c=0; cnseq-1; c++)fprintf ( stdout, "%c", SAR->seq_al[b][c]); - fprintf ( stdout, " %s\n", SAR->name[b]); fprintf ( stdout, "\n"); - - } - } - return pred; -} -int *pair_seq2seq (int *iseq, char *seq1, char *seq2); -int **simple_sar_analyze_pair_col ( Alignment *inA, Alignment *SAR, char *mode) -{ - - int a, b, c, n, n2; - int *iseq=NULL; - static int **result, **fresult; - int sar_mode=1; - int maxgapratio=0; - int nresults=10; - double sum, sum2, score; - Alignment *A; - char aa; - - if (!result) - { - result=declare_int (inA->len_aln*inA->len_aln,5); - - fresult=declare_int (inA->len_aln*nresults*SAR->nseq, 5); - - } - - A=rotate_aln (inA, NULL); - - - for (n2=0,a=0; anseq; a++) - { - - for (n=0, sum=0, sum2=0,b=0; bnseq-1; b++) - { - for ( c=b+1; cnseq; c++, n++) - { - - iseq=pair_seq2seq (iseq,A->seq_al[b], A->seq_al[c]); - if ( sar_mode==1) - score=sar_vs_iseq1(SAR->seq_al[a],iseq,maxgapratio,NULL,&aa); - else if (sar_mode==4) - score=sar_vs_iseq4(SAR->seq_al[a],iseq,maxgapratio,NULL,&aa); - //HERE ("%d", (int)score); - result[n][0]=a;//compound; - result[n][1]=b; //pos1 - result[n][2]=c; //pos2 - result[n][3]=(int)score; - - sum+=score; - sum2+=score*score; - } - } - for (b=0; bnseq, inA->len_aln,4); - - - sim=aln2sim_mat (inA, "idmat"); - A=rotate_aln (inA, NULL); - - - for ( a=0; anseq; a++) - { - best_pos=best_score=0; - for ( sum=0, sum2=0,b=0; bnseq; b++) - { - - if ( sar_mode==1) - score=sar_vs_seq1(SAR->seq_al[a], A->seq_al[b],maxgapratio, sim, &aa); - else if ( sar_mode==2) - score=sar_vs_seq2(SAR->seq_al[a], A->seq_al[b],maxgapratio, sim, &aa); - else if (sar_mode ==3) - score=sar_vs_seq3(SAR->seq_al[a], A->seq_al[b],maxgapratio, sim, &aa); - else if (sar_mode ==4) - score=sar_vs_seq4(SAR->seq_al[a], A->seq_al[b],maxgapratio, sim, &aa); - else if (sar_mode ==5) - score=sar_vs_seq5(SAR->seq_al[a], A->seq_al[b],maxgapratio, sim, &aa); - - - result[a][b][0]+=score*10; - result[a][b][1]=aa; - result[a][b][3]=b; - sum+=result[a][b][0]; - sum2+=result[a][b][0]*result[a][b][0]; - - } - for ( b=0; b< A->nseq; b++)result[a][b][2]=10*return_z_score ((double)result[a][b][0], sum, sum2, A->nseq); //Score - } - - return result; - - } -int *seq2iseq ( char *seq); -double sar_vs_seq4 ( char *sar, char *seq, float gl, int **sim, char *best_aa) -{ - - return sar_vs_iseq4 (sar, seq2iseq(seq), gl, sim, best_aa); -} -double sar_vs_seq1 ( char *sar, char *seq, float gl, int **sim, char *best_aa) -{ - - return sar_vs_iseq1 (sar, seq2iseq(seq), gl, sim, best_aa); -} - -int *seq2iseq ( char *seq) -{ - static int *iseq, clen; - int a; - - if (!iseq || clengl) return 0; - - if (!aa) - { - aa=vcalloc (256*256, sizeof(int)); - aal=vcalloc (N, sizeof (int)); - } - naa=0; - for ( a=0; areturn_score) - { - best_aa[0]=res; - return_score=score; - } - } - } - for ( a=0; agl) return 0; - - //Identify all the AA associated with a I (Positive alphabet) - aa=vcalloc ( 256, sizeof (int)); - for (b=0; b=1 || N01>=1) return 0; - if (N11) - { - score=evaluate_sar_score1 ( N, N11, Nmsa, Nsar); - } - else score=0; - - vfree (aa); - return score; - -} - - - -double sar_vs_iseq4 ( char *sar, int *seq, float gl, int **sim, char *best_aa) -{ - int N, Ni, No; - int a, b,c, r, s; - double Ng=0; - static int **aa; - - /*Correlation between AA conservation and Activity*/ - - N=strlen (sar); - for (a=0; agl) return 0; - - - if (!aa)aa=declare_int(2,257*257); - for (No=Ni=b=0; bgl) return 0; - - //Identify all the AA associated with a I (Positive alphabet) - aa=vcalloc ( 256, sizeof (int)); - for (b=0; bgl) return 0; - for (a=0; a<26; a++) - { - - N=Nmsa=Nsar=N11=N10=N01=0; - res='a'+a; - for (d=0,b=0; bmax_depth) - { - printf_exit (EXIT_FAILURE, stderr,"maximum depth: %d", max_depth); - } - if ( depth==0) depth=2; - A=declare_aln2 (strlen (S1->seq[0]),depth); - a0=A->seq_al[0]; - a1=A->seq_al[1]; - A->len_aln=strlen (S1->seq[0]); - - for (a=0; a< S1->nseq; a++) - for ( b=0; bnseq; b++) - { - A->nseq=2; - sprintf (a0, "%s", S1->seq[a]); - sprintf (a1, "%s", S2->seq[b]); - - if ( strlen (a0)!=strlen (a1)) - { - add_warning (stderr, "WARNING %s (%d) and %s (%d) do not have the same length", S1->name[a], strlen (S1->seq[a]), S2->name[b], strlen (S2->seq[b])); - exit (0); - } - - fprintf ( stdout, ">2 %15s %15s CORR: %.3f EVAL: %5d\n",S1->name[a], S2->name[b], sar_aln2cor (A), sar_aln2ev (A)); - sarseq2anti_sarseq (S1->seq[a],a0); - fprintf ( stdout, ">2 %15s %15s ANTI: %.3f EVAL: %5d\n", S1->name[a], S2->name[b], sar_aln2cor (A), sar_aln2ev (A)); - if ( depth >=3) - { - A->nseq=3; - a2=A->seq_al[2]; - for (c=b+1; cnseq; c++) - { - sprintf (a0, "%s", S1->seq[a]); - sprintf (a1, "%s", S2->seq[b]); - sprintf (a2, "%s", S2->seq[c]); - fprintf ( stdout, ">2 %15s %15s %15s CORR: %.3f EVAL: %5d\n",S1->name[a], S2->name[b],S2->name[c], sar_aln2cor (A), sar_aln2ev (A)); - sarseq2anti_sarseq (S1->seq[a],a0); - fprintf ( stdout, ">2 %15s %15s ANTI: %.3f EVAL: %5d\n", S1->name[a], S2->name[b], S2->name[c],sar_aln2cor (A), sar_aln2ev (A)); - } - if ( depth>=4) - { - A->nseq=4; - a3=A->seq_al[2]; - for (d=c+1; dnseq; d++) - { - sprintf (a0, "%s", S1->seq[a]); - sprintf (a1, "%s", S2->seq[b]); - sprintf (a2, "%s", S2->seq[c]); - sprintf (a3, "%s", S2->seq[d]); - - fprintf ( stdout, ">2 %15s %15s %15s %15s CORR: %.3f EVAL: %5d\n",S1->name[a], S2->name[b],S2->name[c],S2->name[d], sar_aln2cor (A), sar_aln2ev (A)); - sarseq2anti_sarseq (S1->seq[a],a0); - fprintf ( stdout, ">2 %15s %15s %15s %15s ANTI: %.3f EVAL: %5d\n", S1->name[a], S2->name[b], S2->name[c],S2->name[d],sar_aln2cor (A), sar_aln2ev (A)); - } - if (depth>=5) - { - A->nseq=5; - a4=A->seq_al[3]; - for (e=d+1; enseq; e++) - { - sprintf (a0, "%s", S1->seq[a]); - sprintf (a1, "%s", S2->seq[b]); - sprintf (a2, "%s", S2->seq[c]); - sprintf (a3, "%s", S2->seq[d]); - sprintf (a4, "%s", S2->seq[d]); - } - } - } - } - } - - return S1; -} -char* sarseq2anti_sarseq (char *seq_in, char *seq_out) -{ - int a; - if (!seq_out)seq_out=vcalloc (strlen (seq_in)+1, sizeof (char)); - for (a=0; anseq-1; a++) - for (b=a+1; bnseq; b++) - { - for (n11=n1=0,c=0; clen_aln; c++) - { - n11+=(A->seq_al[a][c]=='I' && A->seq_al[b][c]=='I'); - n1+= (A->seq_al[a][c]=='I' || A->seq_al[b][c]=='I'); - } - tot_cor+=(n1==0)?0:n11/n1; - n++; - } - tot_cor/=n; - return tot_cor; -} -int sarseq_pair2ev ( char *s1, char *s2,int mode); -int sar_aln2ev (Alignment *A) -{ - float n1, n11; - int a, b, c, tot=0, n=0; - - tot=0; - for (a=0; anseq-1; a++) - for (b=a+1; bnseq; b++) - { - tot+=sarseq_pair2ev (A->seq_al[a], A->seq_al[b], 1); - n++; - } - return tot; -} -int sarseq_pair2ev ( char *s1, char *s2,int mode) -{ - int l, t1, t2, t11,a, n1, n2, s; - if ( (l=strlen (s1))!=strlen (s2)) - { - return -1; - } - if (mode==2) - { - t1=l/2; - t2=l/2; - t11=l/2; - } - else - { - for (t1=t2=t11=0,a=0; an11 || n01>n11)return 0; - - p1= M_chooses_Nlog (n1msa, N) + M_chooses_Nlog (n1sar-n11, N-n1msa) + M_chooses_Nlog (n11, n1msa); - p2=(M_chooses_Nlog (n1msa, N)+ M_chooses_Nlog (n1sar, N)); - p=(p1-p2); - - return -p; - -} -double evaluate_sar_score2 ( int N, int n11, int n1msa, int n1sar) -{ - - - return n11-((n1msa-n11)+(n1sar-n11)); - - if ( n11nseq, sizeof (int**)); - - - list=file2list (weight_file, " "); - - a=b=0; - for (a=0; a< SAR->nseq; a++) - { - b=c=0; - while (list[b]) - { - if ( strm (list[b][1], SAR->name[a]) && atoi (list[b][3])>0)c++; - b++; - } - - weight[a]=declare_int (c+1, 3); - fprintf ( stderr, "\n%s %d", SAR->name[a], c); - b=c=0; - while (list[b]) - { - if ( strm (list[b][1], SAR->name[a]) && atoi (list[b][3])>0) - { - weight[a][c][0]=atoi(list[b][2])-1; - weight[a][c][1]=list[b][5][0]; - weight[a][c][2]=atoi (list[b][3]); - c++; - } - b++; - } - weight[a][c][0]=-1; - } - - for (a=0; anseq; a++) - { - fprintf ( stdout, ">%s\n", A->name[a]); - for ( b=0; b< SAR->nseq; b++) - { - score=seq2weighted_sar_score(A->seq_al[a], weight[b]); - fprintf ( stdout, "%c", (score>limit)?'I':'O'); - } - fprintf (stdout, "\n"); - } - myexit (EXIT_SUCCESS); - return A; -} - -Alignment *display_sar ( Alignment *A, Alignment *SAR, char *compound) -{ - int a,c; - char name[100]; - - c=name_is_in_list ( compound, SAR->name, SAR->nseq, 100); - if ( c==-1)return A; - - for ( a=0; a< A->nseq; a++) - { - sprintf (name, "%s", A->name[a]); - sprintf ( A->name[a], "%c_%s_%s", SAR->seq_al[c][a], name,compound); - } - return A; -} -Alignment *aln2weighted_sar_score ( Alignment *A,Alignment *SAR, char *weight_file, char *compound) -{ - - int a, b, c=0; - int **weight; - - int score; - char reactivity; - char ***list; - - - if ( SAR) - { - c=name_is_in_list (compound, SAR->name, SAR->nseq, 100); - } - - list=file2list (weight_file, " "); - a=b=0; - while (list[a]) - { - if (strm (list[a][1], compound))b++; - a++; - } - weight=declare_int ( b+1, 3); - - - a=b=0; - while (list[a]) - { - if ( !strm (list[a][1], compound) || strm ("TOTPOS", list[a][1])); - else - { - weight[b][0]=atoi(list[a][2])-1; - weight[b][1]=list[a][5][0]; - weight[b][2]=atoi(list[a][3]); - b++; - } - a++; - } - weight[b][0]=-1; - for ( a=0; a< A->nseq; a++) - { - score=seq2weighted_sar_score (A->seq_al[a], weight); - reactivity=(!SAR || c==-1)?'U':SAR->seq_al[c][a]; - - sprintf (A->seq_comment[a], "Compound %-15s Reactivity %c SAR_SCORE %5d", compound,reactivity, (int) score); - - } - return A; -} - -float seq2weighted_sar_score ( char *seq, int **weight) -{ - int a, p, r, w; - float score=0; - - a=0; - while (weight[a][0]!=-1) - { - p=weight[a][0]; - r=weight[a][1]; - w=weight[a][2]; - - if ( is_gap(seq[p])); - else if ( tolower(seq[p])==r)score+=w; - a++; - } - return score; - } - -Alignment * sar2simpred (Alignment *A, Alignment *SAR, char *posfile, char *compound, int L1,int L2 ) -{ - int a, b, c, c1, c2; - int **sim, **sim_ref, npred=0; - float n11, n10, n01, n00; - float sn, sp; - - int tot_sim=0; - int N11=1, N01=2, N10=3, NXX=4, SIM=5; - float ***tot; - int i1, i2; - - - n11=n10=n01=n00=0; - tot=declare_arrayN(3,sizeof (float), 10, 6, 2); - - sim_ref=aln2sim_mat (A, "idmat"); - if (strm (posfile, "all")) - sim=sim_ref; - else - { - Alignment *B; - B=copy_aln ( A,NULL); - B=extract_aln3(B,posfile); - - /*if (B->len_aln==0)L1=100; - else - L1=((B->len_aln-1)*100)/B->len_aln; - - if (L1<=0)L1=100; - */ - sim=aln2sim_mat (B, "idmat"); - } - - for (a=0; a< A->nseq-1; a++) - { - for ( b=a+1; b< A->nseq; b++) - { - for ( c=0; cnseq; c++) - { - if ( (strm (compound, SAR->name[c]) || strm ( compound, "all"))) - { - /*if ( sim_ref[a][b]<30 || sim_ref[a][b]>60)continue;*/ - i1=0; /*sim_ref[a][b]/10;if (i1==10)i1--;*/ - - i2=sim[a][b]; - - - c1=(SAR->seq_al[c][a]=='I')?1:0; - c2=(SAR->seq_al[c][b]=='I')?1:0; - - n11=(c1 && c2)?1:0; - n01=(!c1 && c2)?1:0; - n10=(c1 && !c2)?1:0; - n00=(!c1 && !c2)?1:0; - - tot[i1][N11][0]+=n11; - tot[i1][N01][0]+=n01; - tot[i1][N10][0]+=n10; - /*tot[i1][N00][0]+=n00;*/ - tot[i1][NXX][0]++; - tot[i1][SIM][0]+=sim_ref[a][b]; - - if ( i2>=L1) - { - tot[i1][N11][1]+=n11; - tot[i1][N01][1]+=n01; - tot[i1][N10][1]+=n10; - /*tot[i1][N00][1]+=n00;*/ - tot[i1][NXX][1]++; - tot[i1][SIM][1]+=sim_ref[a][b]; - } - } - } - } - } - - for (a=0; a<1; a++) - { - sp=(tot[a][N11][0])/(tot[a][N11][0]+tot[a][N10][0]); - fprintf ( stdout, "\n%15s N11 %5d SP %.2f ",compound, (int)tot[a][N11][0],sp); - sp=((tot[a][N11][1]+tot[a][N10][1])==0)?1:(tot[a][N11][1])/(tot[a][N11][1]+tot[a][N10][1]); - sn=(tot[a][N11][0]==0)?1:(tot[a][N11][1]/tot[a][N11][0]); - fprintf ( stdout, " N11 %5d SP %.2f SN %.2f SIM %.2f", (int)tot[a][N11][1], sp,sn, (tot[a][SIM][1]/tot[a][NXX][1])); - } - - myexit (0); - sp=((n11+n01)==0)?1:n11/(n11+n01); - sn=((n11+n01)==0)?1:n11/(n11+n10); - - fprintf ( stdout, "\nLimit: %d NPRED %d AVGSIM %d SN %.2f SP %.2f TP %d FP %d FN %d",L1, npred, tot_sim, sn, sp, (int)n11, (int)n01, (int)n10); - myexit (EXIT_SUCCESS); - return A; -} - -Alignment * sar2simpred2 (Alignment *A, Alignment *SAR, char *seqlist, char *posfile, char *compound, int L ) -{ - int a,b, c,c1, c2, p, s; - float n11, n10, n01, n00, n, sn2, prediction,sp, n1, n0, t, entropy, Delta; - int *rlist, *tlist, *pred, *npred, tsim, psim; - int **sim, **sim_ref; - int nr=0; - int nrs; - char *out; - int delta_max; - Alignment *B; - int printall=1; - - out=vcalloc (A->nseq+1, sizeof (char)); - rlist=vcalloc ( A->nseq, sizeof (int)); - tlist=vcalloc ( A->nseq, sizeof (int)); - pred=vcalloc(2, sizeof (int)); - npred=vcalloc(2, sizeof (int)); - - nrs=0; - if ( strm (seqlist, "first")) - { - for ( a=0; anseq; a++) - { - if ( strm ( compound, SAR->name[a])) - { - for ( b=0; bnseq; b++) - { - if ( SAR->seq_al[a][b]=='I') - { - fprintf ( stderr, "COMP: %s REF SEQ: %s\n", A->name[b], compound); - rlist[nrs]=b; - tlist[rlist[nrs]]=1; - nrs++; - break; - } - } - } - } - } - else if (strm (seqlist, "all")) - { - for ( a=0; a< A->nseq; a++) - { - rlist[nrs]=a; - tlist[rlist[a]]=1; - nrs++; - } - } - else if ((a=name_is_in_list ( seqlist, A->name, A->nseq, 100))!=-1) - { - rlist[nrs]=a; - tlist[rlist[nrs]]=1; - nrs++; - } - else - { - Alignment *R; - R=main_read_aln (seqlist, NULL); - for (a=0; anseq; a++) - { - rlist[a]=name_is_in_list( R->name[a], A->name, A->nseq, 100); - tlist[rlist[a]]=1; - } - free_aln (R); - } - - c=name_is_in_list ( compound, SAR->name, SAR->nseq, 100); - - sim_ref=aln2sim_mat (A, "idmat"); - if (strm (posfile, "all")) - { - sim=sim_ref; - B=A; - } - else - { - B=copy_aln ( A,NULL); - B=extract_aln3(B,posfile); - sim=aln2sim_mat (B, "idmat"); - } - - n11=n10=n01=n00=n=n1=n0=0; - delta_max=0; - for (a=0; anseq; a++) - { - if ( tlist[a] && !strm (seqlist, "all")) - out[a]=(SAR->seq_al[c][a]=='I')?'Z':'z';/*SAR->seq_al[c][a];*/ - else - { - - pred[0]=pred[1]=0; - npred[0]=npred[1]=1; - c1=(SAR->seq_al[c][a]=='I')?1:0; - for (nr=0,tsim=0,psim=0,b=0; bseq_al[c][rlist[b]]=='o'); - else - { - c2=(SAR->seq_al[c][rlist[b]]=='I')?1:0; - nr+=c2; - s=sim[a][rlist[b]]; - tsim+=sim_ref[a][rlist[b]]; - psim+=sim[a][rlist[b]]; - if (s>=L) - { - pred[c2]+=s; - npred[c2]++; - } - } - } - - if (c1==0)n0++; - else n1++; - t++; - - - Delta=pred[1]-pred[0]; - - if (Delta<-delta_max){p=0;out[a]= (c1==0)?'O':'o';} - else if (Delta>delta_max){p=1;out[a]=(c1==1)?'I':'i';} - else {p=-1; out[a]=(c1==1)?'U':'u';} - - if ( p==-1); - else if ( p && c1)n11++; - else if ( p && !c1)n10++; - else if ( !p && !c1)n00++; - else if ( !p && c1)n01++; - - if (p!=-1)n++; - if (printall)fprintf ( stdout, ">%-15s %d %c OVERALL_SIM:%d POSITION_SIM %d\n%s\n", B->name[a], c1, out[a],tsim/nrs,psim/nrs,B->seq_al[a]); - } - } - sp=((n11+n10)==0)?1:n11/(n11+n10); - sn2=((n1)==0)?1:n11/n1; - prediction=(n11+n00)/(n1+n0); - entropy=(float)(M_chooses_Nlog (nr, nrs)/M_chooses_Nlog(nrs/2, nrs)); - - fprintf ( stdout, ">%-15s Sp %.2f Sn %.2f Pred %.2f E %.2f\n", compound,sp, sn2,prediction,entropy ); - fprintf ( stdout, "%s\n", out); - - myexit (EXIT_SUCCESS); - return A; -} -/************************************************************************************/ -/* ALIGNMENT ANALYZE : SAR FOR OR */ -/************************************************************************************/ - -void display_or_help(); -void display_or_help() -{ - fprintf ( stdout, "\nor_sar options:"); - fprintf ( stdout, "\n_ORCL_: Command_line in a file"); - - fprintf ( stdout, "\n_ROTATE_ : rotate the sar matrix (if each entry is a compound rather than a sequence)"); - fprintf ( stdout, "\n_JNIT_ : number cycles of Jacknife"); - fprintf ( stdout, "\n_JNSEQ_ : Number of sequences picked up in alignment [0 to keep them all, 10 by default]"); - fprintf ( stdout, "\n_JSAR_ : Number of compounds picked up in the SAR matrix [0 to keep them all => default"); - fprintf ( stdout, "\n_JRSAR_ : Randomization of the SAR file between each JNIT iteration: S, C, R, SC, SR... (S: seq, C, column, R: residue"); - fprintf ( stdout, "\n_JRALN_ : Randomization of the ALN file between each JNIT iteration: S, C, R, SC, SR... (S: seq, C, column, R: residue"); - - - fprintf ( stdout, "\n_NPOS_ : Number of positions used to make the prediction [4 by default]"); - fprintf ( stdout, "\n_DEPTH_ : Depth of the motif degenerated alphabet [Default=2]"); - fprintf ( stdout, "\n_POSFILE_ : Predefined list of positions in a filename, A->nseq); - S=aln2jacknife (S, 0, sarlen); -} -Alignment *or_scan (Alignment *A,Alignment *S, char *pmode) -{ - int l, a,ax,cx, b; - char mode[100]; - int start, offset,w; - int nl, *poslist; - - - fprintf ( stdout, "\nPARAMETERS: %s\n", pmode); - fprintf ( stderr, "\nPARAMETERS: %s\n", pmode); - - strget_param (pmode, "_SW_", "15", "%d",&w); - strget_param (pmode, "_SMINW_", "5", "%d",&start); - strget_param (pmode, "_SSTART_", "5", "%d",&offset); - strget_param (pmode, "_SMODE_", "single", "%s",&mode); - - - l=intlen (A->len_aln); - poslist=vcalloc ( A->len_aln, sizeof (int)); - nl=0; - - for ( a=0; a< A->len_aln; a++)poslist[nl++]=a+1; - - if ( strm (mode, "single")) - { - for ( a=0; alen_aln-2; a++) - { - int c, gap, score; - for (gap=0,c=0; cnseq; c++)gap+=(A->seq_al[c][a]=='-'); - if ( !gap) - { - Alignment *B; - B=extract_aln (A, a+1, a+2); - B=or_sar (B, S, pmode, NO_PRINT); - score=B->score_aln; - free_aln (B); - } - else - { - score=0; - } - fprintf ( stdout, "P: %*d S: %4d\n",l,a+1, score); - //fprintf ( stderr, "P: %*d S: %4d\n",l,a+1, score); - - } - } - else if strm (mode, "scan") - { - - for ( ax=0; axA->len_aln)continue; - if (pend<1 || pend>A->len_aln)continue; - B=extract_aln (A, a-w, a+w); - - B=or_sar (B, S,pmode, NO_PRINT); - - - if (B->score_aln>=best_score){best_score=B->score_aln; best_pos=a;best_w=b;best_start=pstart; best_end=pend;} - free_aln (B); - } - fprintf ( stdout, "P: %*d I: %*d %*d Score: %5d L: %2d\n", l,best_pos, l,best_start+offset, l,best_end, best_score,(best_w*2)+1 ); - } - } - else if ( strm ( mode, "scan_comp")) - { - Alignment *NS=NULL; - int *tresults; - int s, n=0, p1; - int nbest; - int *poscache; - float sc, eval; - int **index; - int **poslist; - int len=1; - float best_sc, best_eval; - int best_pos; - char *best_word; - int sth; - int sev; - - index=aln2pos_simple (A, A->nseq); - strget_param (pmode, "_NPOS_", "1", "%d", &len); - strget_param (pmode, "_STH_", "0", "%d", &sth); - strget_param (pmode, "_SEV_", "0", "%d", &sev); - - if (sev==0)for (a=0, sev=1; alen_aln; - - tresults=vcalloc ( A->len_aln, sizeof (int)); - poscache=vcalloc ( A->len_aln, sizeof (int)); - poslist=generate_array_int_list (len, 0, A->len_aln-1,1, &n, NULL); - for (s=0; slen_aln; s++) - { - - char *word; - NS=aln2block (S, s+1, s+2, NS); - fprintf ( stderr, "\nProcess: %s ...\n", get_compound_name(s, mode)); - - for (best_sc=0,best_pos=0,best_word=NULL,a=0; abest_sc) - { - best_sc=sc; - best_eval=eval; - best_word=word; - best_pos=a; - nbest=1; - } - else if ( word && sc>=best_sc) - { - nbest++; - } - else - vfree (word); - } - nbest/=len; - for (a=0; a<=A->nseq; a++) - { - - - - fprintf (stdout, "\n>%s PPPP: S: %s SC: %d EV: %d NBest: %d W: %s", get_compound_name(s, mode),(a==A->nseq)?"cons":A->name[a],(int)best_sc, (int) best_eval, nbest,best_word); - for ( b=0; bnseq) - { - p1=poslist[best_pos][b]; - if (best_sc>sth && nbest0)p1++; - } - fprintf ( stdout, " %d ", p1); - - } - } - - } - for ( p1=0; p1len_aln;p1++) - fprintf ( stdout, "\n>TOT_P: %d %d", p1+1, tresults[p1]); - } - - else if ( strm ( mode, "scan_comp_old")) - { - Alignment *NS=NULL, *BLOCK=NULL; - int **results, *tresults; - int s, n, p1, p2; - int npos=1; - int *poscache; - - results=declare_int (A->len_aln*A->len_aln, 3); - tresults=vcalloc ( A->len_aln, sizeof (int)); - poscache=vcalloc ( A->len_aln, sizeof (int)); - for (s=0; slen_aln; s++) - { - int count; - NS=aln2block (S, s+1, s+2, NS); - fprintf ( stderr, "\nProcess: %s ...", get_compound_name(s, mode)); - for (n=0,p1=0; p1len_aln-w; p1++, count ++) - { - if ( count == 50){fprintf ( stderr, "*");count=0;} - for ( p2=p1; p2len_aln-w; p2++, n++) - { - poscache[p1]=1; - poscache[p2]=1; - - BLOCK=alnpos2block (A,poscache,BLOCK); - - if ( aln2ngap(BLOCK)<=0)BLOCK=or_sar (BLOCK,NS, pmode, NO_PRINT); - else BLOCK->score_aln=0; - - //if ( BLOCK->score_aln>0)HERE ("P: %d %d %d", p1, p2,BLOCK->score_aln); - results[n][0]=p1; - results[n][1]=p2; - results[n][2]=BLOCK->score_aln; - poscache[p1]=poscache[p2]=0; - - } - } - sort_int_inv (results, 3, 2, 0, n-1); - for (p1=0; p1%s PPPP: %d %d SC: %d", get_compound_name(s, mode), results[p1][0]+1,results[p1][1]+1, results[p1][2]); - fprintf ( stderr, "\n>%s PPPP: %d %d SC: %d", get_compound_name(s, mode), results[p1][0]+1,results[p1][1]+1, results[p1][2]); - tresults[results[p1][0]]++; - tresults[results[p1][1]]++; - } - } - for ( p1=0; p1len_aln;p1++) - if ( tresults[p1])fprintf ( stdout, "\n>TOT_P: %d %d", p1+1, tresults[p1]); - } - - exit (EXIT_SUCCESS); -} - - -ORP * or_sar_compound(Alignment *A, Alignment *S, char *mode, int print); -ORP* set_orp_name ( ORP* P, char *name); -ORP* set_orp_offset ( ORP* P,int offset); -Alignment * or_sar(Alignment *inA, Alignment *inS, char *mode, int print) -{ - char rsar[4],raln[4], sarmode[100]; - int rotate, a, b, start, end; - ORP **R, *ORS; - Alignment *A, *S; - - if ( mode && (strstr (mode, "help") || strstr (mode, "HELP"))) - display_or_help(); - - strget_param (mode, "_SARMODE_", "single", "%s", &sarmode);//single | all - strget_param (mode, "_RSAR_", "NO", "%s", rsar); - strget_param (mode, "_RALN_", "NO", "%s", raln); - strget_param (mode, "_ROTATE_", "0", "%d", &rotate); - - strget_param (mode, "_START_", "1", "%d", &start);start--; - strget_param (mode, "_END_", "0", "%d", &end); - - A=copy_aln (inA, NULL); - S=copy_aln (inS, NULL); - - if ( end==0)end=A->len_aln; - if ( start!=0 || end!=A->len_aln) - { - int c; - if ( start>=A->len_aln || end<=start || end >A->len_aln) - printf_exit (EXIT_FAILURE, stderr, "ERROR: _START_%d and _END_%d are incompatible with the aln [len=%d][FATAL:%s]", start, end, A->len_aln, PROGRAM); - for (b=0; bnseq; b++) - for (c=0,a=start; aseq_al[b][c]=A->seq_al[b][a]; - A->len_aln=c; - for (b=0; bnseq; b++)A->seq_al[b][c]='\0'; - } - - - S=aln2random_aln (S, rsar); - A=aln2random_aln (A, raln); - - - - if (rotate) - { - Alignment *rS; - - if (S->len_aln!=A->nseq) - printf_exit ( EXIT_FAILURE,stderr, "ERROR: Alignment and SAR matrix are incompatible [FATAL:%s]", PROGRAM); - - rS=rotate_aln (S, NULL); - - for (a=0; a< A->nseq; a++)sprintf (rS->name[a], "%s", A->name[a]); - free_aln (S); - S=rS; - } - - R=vcalloc ( S->len_aln+2, sizeof (ORP*)); - if (strm (sarmode, "all"))R[0]=or_sar_compound (A, S, mode, print); - else if ( strm (sarmode, "single")) - { - for ( a=0; alen_aln; a++) - { - Alignment *NS=NULL; - - NS=aln2block (S, a+1, a+2, NS); - - R[a]=or_sar_compound (A, NS, mode, NO_PRINT); - set_orp_name ((R[a]), get_compound_name (a, mode)); - set_orp_offset(R[a], start); - display_or_summary (R[a], mode, stdout, print); - - } - } - - - - ORS=combine_n_predictions (R, A, S); - - display_or_summary (ORS, mode, stdout, print); - inA->score_aln=(int)(ORS->best*(float)1000); - - free_orp_list (R); - free_orp(ORS); - - return inA; - } -ORP* set_orp_offset ( ORP* P,int offset) -{ - if (!P) return NULL; - else - { - P->offset=offset; - return set_orp_offset(P->PR, offset); - } -} - -ORP* set_orp_name ( ORP* P, char *name) -{ - if (!P) return NULL; - else - { - sprintf ( P->name, "%s", name); - return set_orp_name(P->PR, name); - } -} - -ORP * combine_n_predictions (ORP**R, Alignment *A, Alignment *S) -{ - int a=0; - ORP*N=NULL; - while (R[a]) - { - N=combine_2_predictions (R[a++], N, A, S); - } - sprintf ( N->name, "ALL"); - sprintf ( N->mode, "COMBINED"); - - return N; -} - -ORP *combine_2_predictions ( ORP*IN, ORP *TO,Alignment *A, Alignment *S) -{ - int a; - - if ( !TO) - { - TO=declare_or_prediction (IN->ncomp, IN->nseq, IN->len); - TO->A=A; - TO->S=S; - TO->P=copy_aln(S, NULL); - TO->offset=IN->offset; - TO->ncomp=0; - } - - for (a=0; a< IN->len; a++) - { - TO->pos[a]+=IN->pos[a]; - } - - TO->fp+=IN->fp; - TO->fn+=IN->fn; - TO->tp+=IN->tp; - TO->tn+=IN->tn; - rates2sensitivity (TO->tp, TO->tn, TO->fp, TO->fn, &(TO->sp), &(TO->sn), &(TO->sen2), &(TO->best)); - - - for (a=0; a<(TO->A)->nseq; a++) - { - (TO->P)->seq_al[a][TO->ncomp]=(IN->P)->seq_al[a][0]; - //(TO->S)->seq_al[a][TO->ncomp]=(IN->S)->seq_al[a][0]; - } - TO->ncomp++; - (TO->P)->len_aln=TO->ncomp; - (TO->A)->score_aln=TO->best; - - return TO; -} - - - -ORP * display_or_summary (ORP *CP, char *mode, FILE *fp, int print) -{ - int a; - char *pred; - char *exp; - char *motif; - Alignment *A, *P, *S; - - - - A=CP->A; - P=CP->P; - S=CP->S; - - - - pred=vcalloc ( P->nseq*P->len_aln*2, sizeof (char)); - exp=vcalloc ( P->nseq*P->len_aln*2, sizeof (char)); - motif=vcalloc (CP->len+1, sizeof (char)); - - - - if (P && S) - { - for ( a=0; anseq; a++) - { - strcat (pred,P->seq_al[a]); - strcat (exp, S->seq_al[a]); - } - CP->evalue=profile2evalue(pred, exp); - } - a=0; - while ( CP->motif && CP->motif[a] && CP->motif[0][a][0])strcat (motif, CP->motif[0][a++]); - - if ( print==PRINT) - { - fprintf (fp, "\n>%-10s Mode: %s Accuracy: %6.2f E-value: %6.2f Motif: ",CP->name,CP->mode, CP->best, CP->evalue); - - if (motif[0]) - { - fprintf (fp, " %s",motif); - } - for ( a=0; alen; a++) - { - if ( CP->pos[a]) fprintf ( fp, "\n>%-10s Mode: %s P: cons %3d SC: %4d", CP->name, CP->mode, a+1+CP->offset, CP->pos[a]); - } - fprintf ( fp, "\n"); - } - vfree (pred); vfree(motif); vfree(exp); - if (CP->PR)display_or_summary (CP->PR, mode, fp, print); - return CP; -} - - -ORP * or_sar_compound(Alignment *A, Alignment *S, char *mode, int print) -{ - char rmode[100]; - Alignment *P=NULL; - ORP *PR=NULL; - - strget_param (mode, "_MODE_", "predict", "%s", rmode); - - - - if (strm (rmode, "jack"))P=or_jack (A, S, mode); - else if (strm (rmode, "loo")) PR=or_loo (A, S, mode,NULL, print); - else if (strm (rmode, "comploo")) P=or_comp_loo (A, S, mode,NULL, print); - else if ( strm (rmode, "comppos")){or_comp_pos ( A, S, mode,print);exit(0); return NULL;} - else if ( strm (rmode, "pos"))P=or_aln2pos_aln ( A, S, mode); - else if ( strm (rmode, "predict"))P=or_predict ( A, S, mode); - else if ( strm (rmode, "self"))PR=or_self_predict ( A, S, mode, NULL, print); - else if ( strm (rmode, "sim"))P=or_sim ( A, S, mode); - - else if ( strm (rmode, "test"))P=or_test ( A, S, mode); - else - { - printf_exit (EXIT_FAILURE, stderr, "ERROR: %s is an unknown mode of or_sar [FATAL:%s]\n", rmode,PROGRAM); - return NULL; - } - - if (!PR) - { - PR=vcalloc (1, sizeof (ORP)); - PR->P=P; - } - return PR; -} -Alignment * or_test ( Alignment *inA, Alignment *inS, char *mode) -{ - return inA; -} - - - -float or_id_evaluate ( Alignment *A, Alignment *S, char *mode, int *pos, int print) -{ - char *w; - float score; - - w=or_id_evaluate2(A,S, mode, pos,print, &score); - - vfree (w); - return score; -} -char* or_id_evaluate2 ( Alignment *A, Alignment *S, char *mode, int *pos, int print, float *rscore) -{ - static char **words; - static int *plist; - char *bword; - - int res, p,nl, w, c, s, exp, pred; - int tp, tn, fp, fn; - float sn, sp, sen2, best, best_score; - - if (!A) - {free_char (words, -1); - vfree (plist); - return NULL; - } - rscore[0]=0; - - plist=pos2list (pos, A->len_aln, &nl); - words=declare_char (A->nseq, nl+1); - bword=vcalloc (nl+1, sizeof (char)); - for (p=0; pnseq; s++) - { - res=A->seq_al[s][plist[p]]; - if (res=='-'){or_id_evaluate2 (NULL, NULL, NULL, NULL, 0, NULL);vfree (bword);return 0;} - words[s][p]=res; - } - } - - for (best_score=0,w=0; wnseq; w++) - { - tp=fp=fn=tn=0; - - for (c=0; clen_aln; c++) - { - for (s=0; snseq; s++) - { - exp=(S->seq_al[s][c]=='I')?1:0; - pred=strm (words[w], words[s]); - if ( exp && pred)tp++; - else if ( exp && !pred)fn++; - else if (!exp && !pred)tn++; - else if (!exp && pred)fp++; - } - } - rates2sensitivity (tp, tn, fp,fn,&sp, &sn, &sen2, &best); - if ( best>best_score) - { - best_score=best; - sprintf (bword, "%s", words[w]); - } - } - rscore[0]=(float)1000*best_score; - or_id_evaluate2 (NULL, NULL, NULL, NULL, 0, NULL); - return bword; -} - -float or_loo_evaluate2 ( Alignment *A, Alignment *S, char *mode, int *pos, int print) -{ - int c, s, p, res, sar; - - int *plist, nl; - int tp, tn, fp, fn; - float sn, sp, sen2, best; - char **words, **positive, **negative; - - tp=tn=fp=fn=0; - plist=pos2list (pos, A->len_aln, &nl); - - words=declare_char (A->nseq, nl+1); - - for (p=0; pnseq; s++) - { - res=A->seq_al[s][plist[p]]; - if (res=='-'){vfree (plist);free_char (words, -1); return 0;} - words[s][p]=res; - } - } - positive=vcalloc ( A->nseq, sizeof (char*)); - negative=vcalloc ( A->nseq, sizeof (char*)); - for (c=0; clen_aln; c++) - { - //Fill the match matrix - for (p=0; pnseq; s++) - { - sar=S->seq_al[s][c]; - if (sar=='I')positive[s]=words[s]; - else if ( sar=='O')negative[s]=words[s]; - } - } - - //Evaluate the scores - for (s=0; s< A->nseq; s++) - { - int pos=0, neg=0, pred; - sar=S->seq_al[s][c]; - positive[s]=negative[s]=NULL; - - if ( name_is_in_list (words[s], positive, A->nseq, nl+1)!=-1) - pos=1; - if ( name_is_in_list (words[s], negative, A->nseq, nl+1)!=-1) - neg=1; - - if (pos & !neg) pred=1; - else pred=0; - - if ( pred && sar=='I')tp++; - else if (!pred && sar=='I')fn++; - else if (!pred && sar=='O')tn++; - else if ( pred && sar=='O')fp++; - - if ( sar=='I')positive[s]=words [s]; - else negative[s]=words[s]; - } - } - - vfree (negative); vfree (positive); - vfree (plist); free_char (words, -1); - rates2sensitivity (tp, tn, fp,fn,&sp, &sn, &sen2, &best); - - return (float)1000*best; -} -float or_loo_evaluate ( Alignment *A, Alignment *S, char *mode, int *pos, int print) -{ - int c, s, p, res, sar; - int **matP,**matN; - int *plist, nl; - int tp, tn, fp, fn; - float sn, sp, sen2, best; - - tp=tn=fp=fn=0; - plist=pos2list (pos, A->len_aln, &nl); - matP=declare_int (nl, 256); - matN=declare_int (nl, 256); - - for (c=0; clen_aln; c++) - { - //Fill the match matrix - for (p=0; pnseq; s++) - { - res=A->seq_al[s][plist[p]]; - sar=S->seq_al[s][c]; - if (res=='-'){vfree (plist); free_int (matP, -1);free_int (matN, -1); return 0;} - if (sar=='I')matP[p][res]++; - if (sar=='O')matN[p][res]++; - } - } - - //Evaluate the scores - for (s=0; s< A->nseq; s++) - { - int scoreP, scoreN; - int pred, valP, valN; - - sar=S->seq_al[s][c]; - for (scoreN=0,scoreP=0,p=0; pseq_al[s][plist[p]]; - - valP=matP[p][res]-(sar=='I')?1:0; - scoreP+=(valP>0)?1:0; - - valN=matN[p][res]-(sar=='O')?1:0; - scoreN+=(valN>0)?1:0; - } - - if ( scoreP==nl && scoreNnseq; s++) - { - res=A->seq_al[s][plist[p]]; - sar=S->seq_al[s][c]; - if (sar=='I')matP[p][res]=0; - else matN[p][res]=0; - } - } - } - - vfree (plist); free_int (matP, -1);free_int (matN, -1); - rates2sensitivity (tp, tn, fp,fn,&sp, &sn, &sen2, &best); - - return (float)1000*best; -} -int* or_comp_pos ( Alignment *inA, Alignment *inS, char *mode,int print) -{ - Alignment *A=NULL, *S=NULL, *inS2=NULL; - int a, b, c; - int *main_pos, *pos=NULL; - - set_sar (inA, inS, mode); - main_pos=vcalloc ( inA->len_aln, sizeof (int)); - - inS2=copy_aln (inS, NULL); - inS2->len_aln=1; - - - - //Run every SAR, one at a time - for ( c=0; c< inS->len_aln; c++) - { - int max, p; - - fprintf ( stdout, ">%d\n", c); - for (a=0; a< inS->nseq; a++) - { - inS2->seq_al[a][0]=inS->seq_al[a][c]; - inS2->seq_al[a][1]='\0'; - } - - vfree (pos); - free_aln (S); - free_aln (A); - - pos=vcalloc (inA->len_aln, sizeof (int)); - A=copy_aln (inA, NULL); - S=copy_aln (inS2, NULL); - set_sar (A,S, mode); - pos=aln2predictive_positions (A, S, mode,PRINT); - - for (max=0,b=0; blen_aln; b++) - { - main_pos[b]+=pos[b]; - if (main_pos[b]>max) - { - max=main_pos[b]; - p=b+1; - } - } - - - for (a=0; anseq; a++) - { - fprintf ( stdout, "\t"); - for ( b=0; blen_aln; b++) - if ( pos[b]) fprintf ( stdout, "%c", A->seq_al[a][b]); - fprintf ( stdout, " %c\n", inS2->seq_al[a][0]); - } - fprintf ( stdout, "\n\tBest: %d %d\n", p, max); - - } - - if (print==PRINT) - { - for ( a=0; alen_aln; a++)fprintf ( stdout, "\nP2: cons %4d %4d [FINAL]", a+1, main_pos[a]); - } - return main_pos; -} -Alignment * or_comp_loo ( Alignment *inA, Alignment *inS, char *mode, int *pos,int print) -{ - int a, b,c, n; - char **keep, **remove; - Alignment *A, *S, *P, *P1, *SEQ, *inS2; - int **main_pos, *compound_pos, **comp_list; - int pos_exists=0; - char *comp_pred, *comp_exp; - int sar_threshold; - - strget_param (mode, "_SARTHRES_", "3", "%d", &sar_threshold); - - if (pos)pos_exists=1; - set_sar (inA, inS, mode); - P=copy_aln (inS, NULL); - keep=declare_char (inA->nseq, MAXNAMES); - remove=declare_char (inA->nseq, MAXNAMES); - - main_pos=declare_int ( inA->len_aln,4); - comp_list=declare_int (inA->len_aln, sizeof (int*)); - inS2=copy_aln (inS, NULL); - inS2->len_aln=1; - - - comp_pred=vcalloc ( inA->nseq+1, sizeof (int)); - comp_exp=vcalloc ( inA->nseq+1, sizeof (int)); - compound_pos=NULL; - //Run every SAR, one at a time - for ( c=0; c< inS->len_aln; c++) - { - for (a=0; a< inS->nseq; a++) - { - inS2->seq_al[a][0]=inS->seq_al[a][c]; - inS2->seq_al[a][1]='\0'; - } - vfree (compound_pos); - compound_pos=vcalloc (inA->len_aln, sizeof (int)); - for (a=0; anseq; a++) - { - char ***motifs; - - A=copy_aln (inA, NULL); - S=copy_aln (inS2, NULL); - - for (n=0,b=0; bnseq; b++) - { - if (b!=a)sprintf (keep[n++ ], "%s", A->name [b]); - } - sprintf ( remove[0], "%s", A->name[a]); - reorder_aln (A,keep, A->nseq-1); - - set_sar (A,S, mode); - if (!pos_exists) - { - pos=aln2predictive_positions (A, S, mode,NO_PRINT); - } - for (b=0; blen_aln; b++) - { - compound_pos[b]+=pos[b]; - } - - motifs=compounds2motifs (A, S, pos,0, mode, NO_PRINT); - - SEQ=copy_aln (inA, NULL); - SEQ=reorder_aln (SEQ, remove, 1); - - P1=aln2prediction (SEQ, motifs, pos); - comp_pred[a]=P1->seq_al[0][0]; - comp_exp[a]=inS2->seq_al[a][0]; - P->seq_al[a][c]=P1->seq_al[0][0]; - - - free_aln (SEQ); - free_aln (S); - free_aln (A); - free_aln (P1); - free_arrayN( (void *)motifs, 3); - if (!pos_exists)vfree (pos); - } - if (print==PRINT) - fprintf ( stdout, ">%-15s SC: %.2f E; %.2f\n%s\n%s\n", get_compound_name(c, mode),profile2sensitivity (comp_pred, comp_exp, NULL, NULL, NULL, NULL),profile2evalue(comp_pred, comp_exp),comp_pred, comp_exp); - for (b=0; blen_aln; b++) - { - main_pos[b][2]+=compound_pos[b]; - if (compound_pos[b])main_pos[b][3]++; - if (profile2evalue(comp_pred, comp_exp)>sar_threshold) - { - main_pos[b][0]+=compound_pos[b]; - if (compound_pos[b]) - { - main_pos[b][1]++; - comp_list[b][0]++; - comp_list[b]=vrealloc (comp_list[b], sizeof (int)*(comp_list[b][0]+1)); - comp_list[b][comp_list[b][0]]=c; - } - } - } - - } - - P->score_aln=(int)((float)1000*evaluate_prediction (P, inS, mode,print)); - - if (print==PRINT) - { - for ( a=0; alen_aln; a++)fprintf ( stdout, "\nP: cons %4d RS: %4d RC: %5d FC: %4d %4d[FINAL]", a+1, main_pos[a][2], main_pos[a][3], main_pos[a][0], main_pos[a][1]); - - for ( a=0; alen_aln; a++) - { - fprintf ( stdout, "\nP: cons %4d RS: %4d RC: %5d FC: %4d %4d CLIST: ", a+1, main_pos[a][2], main_pos[a][3], main_pos[a][0], main_pos[a][1]); - for ( c=1; c<=comp_list[a][0]; c++) - { - fprintf ( stdout, "%s ", get_compound_name(comp_list[a][c], mode)); - } - fprintf ( stdout, " [COMP_LIST]"); - } - } - free_int (main_pos, -1); - free_int (comp_list, -1); - return P; -} - -ORP* or_loo ( Alignment *inA, Alignment *inS, char *mode, int *pos,int print) -{ - int a, b, n; - char **keep, **remove; - Alignment *A, *S, *P, *P1, *SEQ; - - int pos_exists=0; - ORP *PR; - - - - if (pos)pos_exists=1; - set_sar (inA, inS, mode); - PR=declare_or_prediction (inS->nseq, inA->nseq, inA->len_aln); - sprintf (PR->mode, "loo "); - P=copy_aln (inS, NULL); - - - keep=declare_char (inA->nseq, MAXNAMES); - remove=declare_char (inA->nseq, MAXNAMES); - - - PR->A=inA; - PR->P=P; - PR->S=inS; - - for (a=0; anseq; a++) - { - char ***motifs; - - A=copy_aln (inA, NULL); - S=copy_aln (inS, NULL); - - for (n=0,b=0; bnseq; b++) - { - if (b!=a)sprintf (keep[n++ ], "%s", A->name [b]); - } - sprintf ( remove[0], "%s", A->name[a]); - reorder_aln (A,keep, A->nseq-1); - - set_sar (A,S, mode); - - if (!pos_exists) - { - pos=aln2predictive_positions (A, S, mode,print); - - } - - for (b=0; blen_aln; b++) - { - PR->pos[b]+=pos[b]; - } - - motifs=compounds2motifs (A, S, pos,0, mode, print); - - SEQ=copy_aln (inA, NULL); - SEQ=reorder_aln (SEQ, remove, 1); - SEQ->nseq=1; - - P1=aln2prediction (SEQ, motifs, pos); - - - if (print==PRINT) - { - fprintf ( stdout, "\n%s\nPred: %s\nReal: %s\n", P1->name[0], P1->seq_al[0], inS->seq_al[a]); - } - sprintf ( P->seq_al[a], "%s", P1->seq_al[0]); - free_aln (P1); - - - free_aln (SEQ); - free_aln (S); - free_aln (A); - - free_arrayN( (void *)motifs, 3); - if (!pos_exists)vfree (pos); - - } - free_char (keep, -1); - free_char (remove, -1); - - - PR=new_evaluate_prediction (PR, mode,print); - - - PR->PR=or_self_predict(inA, inS, mode,NULL, print); - - if (print==PRINT)for ( a=0; alen_aln; a++)fprintf ( stdout, "\nP: cons %d %d [FINAL]", a+1, PR->pos[a]); - - - - return PR; -} - - - -Alignment * or_jack(Alignment *inA, Alignment *inS, char *mode) -{ - int a,b; - int n_cycles=100; - int subnseq=10; - int subsar=0; - Alignment *A, *S; - int *main_pos,*pos; - char jrsar[10], jraln[10]; - - strget_param (mode, "_JNIT_", "100", "%d", &n_cycles); - strget_param (mode, "_JNSEQ_", "10", "%d", &subnseq); - strget_param (mode, "_JNAR_", "0", "%d", &subsar); - - strget_param (mode, "_JRSAR_", "NO", "%s", jrsar); - strget_param (mode, "_JRALN_", "NO", "%s", jraln); - - - - main_pos=vcalloc ( inA->len_aln, sizeof (int)); - for (a=0; alen_aln; b++)main_pos[b]+=pos[b]; - vfree (pos); - - } - display_pos (A, S, main_pos, mode); - - - return inA; -} - -Alignment * display_pos (Alignment *A, Alignment *S, int *pos,char *mode) -{ - Alignment *B; - int a, b; - int **index; - - int intl; - - intl=intlen (A->len_aln); - index=aln2pos_simple (A, A->nseq); - B=copy_aln (A,NULL); - B->len_aln=0; - for ( a=0; alen_aln; a++) - fprintf ( stdout, "\nP: cons %*d %*d S: %4d [DISPLAY_FULL_POS]", intl,a+1,intl, a+2, pos[a]); - fprintf ( stdout, "\n\n"); - for (a=0; alen_aln; a++) - { - if (pos[a]) - { - for ( b=0; bnseq; b++) - { - B->seq_al[b][B->len_aln]=A->seq_al[b][a]; - if (index[b][a]>0)fprintf ( stdout, "\nP: %s %d %d S: %d [DISPLAY_POS]",A->name[b], index[b][a], index[b][a]+1, pos[a]); - } - B->len_aln++; - fprintf ( stdout, "\nP: cons %d %d S: %d [DISPLAY_POS]", a+1, a+2, pos[a]); - } - } - fprintf ( stdout, "\n"); - for (a=0; anseq; a++)B->seq_al[a][B->len_aln]='\0'; - return B; -} -Alignment * or_aln2pos_aln (Alignment *A, Alignment *S, char *mode) -{ - Alignment *B; - - int *pos; - char outaln[100], outtree[100]; - - - strget_param (mode, "_OUTALN_", "NO", "%s", outaln); - strget_param (mode, "_OUTTREE_", "NO", "%s", outtree); - - set_sar (A, S, mode); - pos=aln2predictive_positions (A, S,mode, PRINT); - - B=display_pos (A, S, pos, mode); - - - if (!strm(outaln, "NO")) vfclose (output_aln (B, vfopen (outaln, "w"))); - if (!strm(outtree, "NO"))vfclose (print_tree (aln2tree(B), "newick", vfopen (outtree, "w"))); - - return B; -} -Alignment * or_sim(Alignment *A, Alignment *S, char *mode) -{ - //Predict all the sequences that are not both in inS and inA - int *pos; - - set_sar (A, S, mode); - pos=aln2predictive_positions (A, S,mode, PRINT); - fprintf ( stdout, "R: %.3f", pos2sim (A,S, pos)); - - exit (EXIT_SUCCESS); - return A; -} -ORP* or_self_predict(Alignment *A, Alignment *S, char *mode,int *pos, int print) -{ - //Predict all the sequences that are not both in inS and inA - Alignment *P; - char ***motifs; - - - int a; - - int pre_set_pos=0; - ORP *PR; - - - set_sar (A, S, mode); - PR=declare_or_prediction (S->nseq, A->nseq, A->len_aln); - sprintf (PR->mode, "self"); - PR->A=A; - PR->S=S; - - if (!pos) - { - pos=aln2predictive_positions (A, S,mode,print); - pre_set_pos=0; - } - else - pre_set_pos=1; - - for (a=0; a< A->len_aln; a++) - PR->pos[a]=pos[a]; - - - PR->motif=motifs=compounds2motifs (A, S, pos,0, mode, print); - P=PR->P=aln2prediction (A, motifs, pos); - - if (!pre_set_pos)vfree (pos); - - PR=new_evaluate_prediction (PR, mode,print); - return PR; -} - - -Alignment * or_predict(Alignment *inA, Alignment *inS, char *mode) -{ - //Predict all the sequences that are not both in inS and inA - Alignment *P, *A, *S, *T; - char ***motifs; - int *pos; - - int a, b; - - - - - - - A=copy_aln (inA, NULL); - S=copy_aln (inS, NULL); - set_sar (A, S, mode); - - pos=aln2predictive_positions (A, S,mode,PRINT); - motifs=compounds2motifs (A, S, pos,0, mode, PRINT); - T=get_prediction_target (inA, inS, mode); - - - P=aln2prediction (T, motifs, pos); - //recall=evaluate_prediction (S, P, mode); - for ( a=0; alen_aln; a++) - { - for (b=0; bnseq; b++) - { - if (tolower(P->seq_al[b][a])=='i')fprintf (stdout, "\n>%20s %20s %c", T->name [0],get_compound_name (a, mode), P->seq_al[b][a]); - } - } - fprintf ( stdout, "\n"); - return P; -} - -Alignment *get_prediction_target (Alignment *A, Alignment *S, char *param) -{ - char **name; - int n, a; - Alignment *T; - - T=copy_aln (A, NULL); - name=declare_char (A->nseq, 100); - for (n=0,a=0; a< A->nseq; a++) - { - if ( name_is_in_list (A->name[a], S->name, S->nseq, 100)==-1) - { - sprintf (name[n++], "%s", A->name[a]); - } - } - T=reorder_aln (T,name, n); - return T; -} - -Alignment *set_sar (Alignment *A, Alignment *S, char *param) -{ - char **name; - int n, a; - - name=declare_char (A->nseq, 100); - for (n=0,a=0; a< A->nseq; a++) - { - if ( name_is_in_list (A->name[a], S->name, S->nseq, 100)!=-1) - { - sprintf (name[n++], "%s", A->name[a]); - } - } - A=reorder_aln (A,name, n); - S=reorder_aln (S,name, n); - free_char (name, -1); - return S; -} - -ORP* new_evaluate_prediction (ORP *PR, char *mode, int print) -{ - int a,b, i, r, p; - int tp, tn, fp, fn; - float sn, sp, sen2, best; - float tot_best_seq=0; - float tot_best_comp=0; - Alignment *P, *R; - - int ns=0; - float *recall; - - - P=PR->P; - R=PR->S; - - recall=vcalloc (P->len_aln, sizeof (float)); - if (P->len_aln!=R->len_aln) - { - HERE ("Mismatch between number of compounds in prediction and reference"); - exit (0); - } - if (print==PRINT)fprintf ( stdout, "\n"); - - for (a=0; anseq; a++) - { - tp=tn=fp=fn=0; - if ((i=name_is_in_list (P->name[a], R->name, R->nseq, 100))!=-1) - { - - for (b=0;blen_aln; b++) - { - r=R->seq_al[i][b]; - p=P->seq_al[a][b]; - - if ( p=='I' && r=='I')tp++; - else if ( p=='I' && r=='O')fp++; - else if ( p=='O' && r=='I')fn++; - else if ( p=='O' && r=='O')tn++; - } - rates2sensitivity (tp, tn, fp, fn, &sp, &sn, &sen2, &best); - if (print==PRINT)fprintf (stdout, ">%-s sp: %.2f sn: %.2f sn2: %.2f best: %.2f [SEQ]\n",P->name[a], sp, sn, sen2, best); - if ( best>0) - { - ns++; - tot_best_seq+=best; - } - } - } - if (ns) - { - tot_best_seq/=ns; - } - if (print==PRINT)fprintf ( stdout, ">TotSeq sp: %.2f N: %d[SEQ]\n",tot_best_seq, ns); - - tot_best_comp=0; - for (ns=0,b=0; blen_aln; b++) - { - tp=tn=fp=fn=0; - for (a=0; anseq;a++) - { - if ((i=name_is_in_list (P->name[a], R->name, R->nseq, 100))!=-1) - { - r=R->seq_al[i][b]; - p=P->seq_al[a][b]; - - if ( p=='I' && r=='I'){PR->tp++;tp++;} - else if ( p=='I' && r=='O'){PR->fp++;fp++;} - else if ( p=='O' && r=='I'){PR->fn++;fn++;} - else if ( p=='O' && r=='O'){PR->tn++;tn++;} - } - } - rates2sensitivity (tp, tn, fp, fn, &sp, &sn, &sen2, &best); - - if (print==PRINT) fprintf (stdout, ">%-25s sp: %.2f sn: %.2f sen2: %.2f best: %.2f [COMP]\n",get_compound_name (b, mode), PR->sp, PR->sn, PR->sen2,PR->best); - if ( best>0) - { - ns++; - tot_best_comp+=best; - } - } - - if (ns) - { - tot_best_comp/=ns; - } - rates2sensitivity (PR->tp, PR->tn, PR->fp,PR->fn,&(PR->sp), &(PR->sn), &(PR->sen2), &(PR->best)); - if (print==PRINT)fprintf ( stdout, ">FullTot sp: %.2f sn: %.2f sen2: %.2f best: %.2f N: %d[COMP]\n", PR->sp, PR->sn, PR->sen2,PR->best, ns); - P->score_aln=(int)((float)1000*(PR->best)); - return PR; -} -float evaluate_prediction (Alignment *R, Alignment *P, char *mode, int print) -{ - int a,b, i, r, p; - int tp, tn, fp, fn; - int tot_tp, tot_tn, tot_fp, tot_fn; - float sn, sp, sen2, best; - float tot_sp=0; - float tot_sn=0; - float tot_sen2=0; - float tot_best_seq=0; - float tot_best_comp=0; - float tot_best=0; - - int ns=0; - float *recall; - - - - - recall=vcalloc (P->len_aln, sizeof (float)); - if (P->len_aln!=R->len_aln) - { - HERE ("Mismatch between number of compounds in prediction and reference"); - exit (0); - } - if (print==PRINT)fprintf ( stdout, "\n"); - for (a=0; anseq; a++) - { - tp=tn=fp=fn=0; - if ((i=name_is_in_list (P->name[a], R->name, R->nseq, 100))!=-1) - { - - for (b=0;blen_aln; b++) - { - r=R->seq_al[i][b]; - p=P->seq_al[a][b]; - - if ( p=='I' && r=='I')tp++; - else if ( p=='I' && r=='O')fp++; - else if ( p=='O' && r=='I')fn++; - else if ( p=='O' && r=='O')tn++; - } - rates2sensitivity (tp, tn, fp, fn, &sp, &sn, &sen2, &best); - if (print==PRINT)fprintf (stdout, ">%-s sp: %.2f sn: %.2f sn2: %.2f best: %.2f [SEQ]\n",P->name[a], sp, sn, sen2, best); - if ( best>0) - { - ns++; - tot_best_seq+=best; - tot_sn+=sn; - tot_sp+=sp; - tot_sen2+=sen2; - } - } - } - if (ns) - { - tot_best_seq/=ns; - tot_sn/=ns; - tot_sp/=ns; - tot_sen2/=ns; - } - if (print==PRINT)fprintf ( stdout, ">Tot sp: %.2f sn: %.2f sen2: %.2f best: %.2f N: %d[SEQ]\n", tot_sp, tot_sn, tot_sen2,tot_best_seq, ns); - - tot_fp=tot_fn=tot_tp=tot_tn=0; - tot_sp=tot_sn=tot_sen2=tot_best_comp=0; - for (ns=0,b=0; blen_aln; b++) - { - tp=tn=fp=fn=0; - for (a=0; anseq;a++) - { - if ((i=name_is_in_list (P->name[a], R->name, R->nseq, 100))!=-1) - { - r=R->seq_al[i][b]; - p=P->seq_al[a][b]; - - if ( p=='I' && r=='I'){tot_tp++;tp++;} - else if ( p=='I' && r=='O'){tot_fp++;fp++;} - else if ( p=='O' && r=='I'){tot_fn++;fn++;} - else if ( p=='O' && r=='O'){tot_tn++;tn++;} - } - } - rates2sensitivity (tp, tn, fp, fn, &sp, &sn, &sen2, &best); - - if (print==PRINT) fprintf (stdout, ">%-25s sp: %.2f sn: %.2f sen2: %.2f best: %.2f [COMP]\n",get_compound_name (b, mode), sp, sn, sen2,best); - recall[b]=sen2; - if ( best>0) - { - ns++; - tot_best_comp+=best; - tot_sn+=sn; - tot_sp+=sp; - tot_sen2+=sen2; - } - } - - if (ns) - { - tot_best_comp/=ns; - tot_sn/=ns; - tot_sp/=ns; - tot_sen2/=ns; - } - rates2sensitivity (tot_tp, tot_tn, tot_fp,tot_fn,&tot_sp, &tot_sn, &tot_sen2, &tot_best); - if (print==PRINT)fprintf ( stdout, ">FullTot sp: %.2f sn: %.2f sen2: %.2f best: %.2f N: %d[COMP]\n", tot_sp, tot_sn, tot_sen2,tot_best, ns); - return tot_best; -} - - - -Alignment * aln2prediction (Alignment *A,char ***motif, int *pos) -{ - int a, b,nc, nl; - int *list; - char **array, **sar; - Alignment *R; - Sequence *S; - nc=read_array_size ((void *)motif, sizeof (char***)); - - - list=pos2list (pos, A->len_aln, &nl); - - - array=declare_char (A->nseq, nl+1); - sar=declare_char(A->nseq, nc+1); - for (a=0; anseq; a++) - { - for (b=0; bseq_al[a][list[b]]; - } - - for (a=0; anseq; b++) - { - - sar[b][a]=(match_motif (array[b], motif[a]))?'I':'O'; - } - } - - - S=fill_sequence_struc (A->nseq,sar,A->name); - R=seq2aln (S, NULL, KEEP_GAP); - free_sequence (S, S->nseq); - free_char (sar, -1); - vfree (list); - free_char (array, -1); - return R; -} - -int * file2pos_list (Alignment *A, char *posfile) -{ - char ***file; - int **index; - int *pos; - int i, n, p; - - //pos_file: - // 1 2 3 4 - - - if ( !check_file_exists (posfile)) - { - printf_exit ( EXIT_FAILURE, stderr, "ERROR: Could not read posfile %s\n", posfile); - } - - file=file2list (posfile, " "); - - index=aln2inv_pos (A); - pos=vcalloc ( A->len_aln, sizeof (int)); - - n=0; - while (file[n]) - { - - if ( !strm (file[n][1], "P:")); - else - { - if ( (strm (file[n][2], "cons"))) - p=atoi(file[n][3])-1; - else - { - i=name_is_in_list ( file[n][2], A->name, A->nseq, MAXNAMES+1); - if (i!=-1) - p=index[i][atoi(file[n][3])]-1; - else p=-1; - } - if (p!=-1)pos[p]+=atoi(file[n][4]); - } - n++; - } - - - free_int (index, -1); - free_arrayN ( (char **)file, 3); - return pos; -} -int * aln2predictive_positions (Alignment *A, Alignment *B, char *mode, int print) -{ - char posmode[100]; - - if (!mode) return NULL; - HERE ("%s", mode); - strget_param (mode, "_POSMODE_", "scan", "%s", posmode); - if ( strm (posmode, "mat"))return aln2predictive_positions_mat (A, B, mode, print); - else if ( strm (posmode, "scan")) return aln2predictive_positions_scan (A, B, mode, print); - else - { - printf_exit (EXIT_FAILURE,stderr, "ERROR: %s is an unknown _POSMODE_ mode",posmode); - return NULL; - } -} - -int * aln2predictive_positions_mat (Alignment *A, Alignment *B, char *mode, int print) - { - int a, b, c,gap, res1, res2, sar1, sar2, npos, s, idscore; - float id1,id2,id3,nid1,nid2,nid3; - int **pos, *fpos; - pos=declare_int (A->len_aln,2); - fpos=vcalloc ( A->len_aln, sizeof (int)); - - strget_param (mode, "_NPOS_", "2", "%d", &npos); - for ( a=0; a< A->len_aln; a++) - { - pos[a][0]=a; - id1=id2=id3=nid1=nid2=nid3=0; - for ( gap=0,b=0; bnseq; b++)gap+=(A->seq_al[b][a]=='-'); - if ( gap>0){pos[a][1]=0;continue;} - - for (s=0; slen_aln; s++) - { - for ( gap=0,b=0; bnseq-1; b++) - { - sar1=B->seq_al[b][s]; - res1=A->seq_al[b][a]; - - for ( c=b+1; cnseq; c++) - { - sar2=B->seq_al[c][s]; - res2=A->seq_al[c][a]; - - idscore=(res1==res2)?1:0; - if ( sar1 == 'I' && sar2=='I'){id1+=idscore;nid1++;} - else if ( sar1 =='0' && sar2=='0'){id2+=idscore;nid2++;} - else {id3+=idscore; nid3++;} - - } - } - id1=(nid1==0)?1:id1/nid1; - id2=(nid1==0)?1:id2/nid2; - id3=(nid3==0)?1:id3/nid3; - pos[a][1]=(int)((float)1000*id1*(1-id3)); - - } - } - - sort_int (pos, 2,1, 0, A->len_aln-1); - for ( a=MAX(0,(A->len_aln-npos));alen_aln; a++) - { - fpos[pos[a][0]]=1; - } - - free_int (pos, -1); - return fpos; - } -int * aln2predictive_positions_scan (Alignment *A, Alignment *B, char *mode, int print) -{ - int a, b, c, best_pos,nl, nplist=0, max, posw; - float best_score, score; - static int *list, *tpos,**plist,*array; - int *pos; - - - char posfile[100]; - char predmode[100]; - char target_posfile[100]; - - - - if (!A) - { - vfree (list); - vfree (tpos); - - free_int (plist, -1); - vfree (array); - return NULL; - } - - strget_param (mode, "_PREDMODE_", "ID", "%s", predmode); - strget_param (mode, "_POSW_", "1", "%d", &posw); - strget_param (mode, "_NPOS_", "2", "%d", &max); - strget_param (mode, "_POSFILE_", "NO", "%s", posfile); - strget_param (mode, "_TPOSFILE_", "NO", "%s", target_posfile); - - if ( !strm(posfile, "NO"))return file2pos_list (A,posfile); - if ( !strm(target_posfile, "NO"))tpos=file2pos_list (A,target_posfile); - else - { - tpos=vcalloc (A->len_aln, sizeof (int)); - for (a=0; alen_aln; a++)tpos[a]=1; - } - - //Declare the positions that are going to be scanned - - - if (posw==1) - { - plist=declare_int (A->len_aln, 2); - nplist=0; - for (a=0; alen_aln; a++) - { - if(tpos[a]) - { - plist[nplist][0]=1; - plist[nplist][1]=a; - nplist++; - } - } - } - else if ( posw==2) - { - nplist=0; - plist=declare_int (A->len_aln*A->len_aln, 3); - for (a=0; alen_aln; a++) - for (b=0; blen_aln; b++) - { - plist[nplist][1]=a; - plist[nplist][2]=b; - plist[nplist][0]=2; - nplist++; - } - } - else if ( posw==3) - { - nplist=0; - plist=declare_int (A->len_aln*A->len_aln*A->len_aln, 3); - for (a=0; alen_aln; a++) - for (b=0; blen_aln; b++) - { - plist[nplist][1]=a; - plist[nplist][2]=b; - plist[nplist][3]=c; - - - plist[nplist][0]=3; - nplist++; - } - } - - - pos=vcalloc ( A->len_aln, sizeof (int)); - if (max==0)max=A->len_aln; - else if ( max==-1) - { - for (a=0; alen_aln; a++)if (tpos[a]){pos[a]=1;} - aln2predictive_positions_scan (NULL, NULL, NULL, 0); - return pos; - } - - - - pos=vcalloc ( A->len_aln, sizeof (int)); - list=vcalloc (A->len_aln, sizeof (int)); - nl=0; - - - - for (a=0; a< max; a++) - { - int previous_best_pos=-1; - for (best_score=-9999,best_pos=0,b=0; bbest_score) - { - best_score=score; - best_pos=b; - } - for (c=1; c<=plist[b][0]; c++)pos[plist[b][c]]=0; - - } - if (best_pos==previous_best_pos)break; - else previous_best_pos=best_pos; - - //update the best_pos_list - for (b=1; b<=plist[best_pos][0]; b++) - list[nl++]=plist[best_pos][b]; - - - if ( print==PRINT) - { - for (b=0; blen_aln, sizeof (char**)); - for (a=0; alen_aln; a++) - { - - motifs[a]=compound2motif (A, B, pos, depth, a, mode, print); - } - - return motifs; -} -char ** compound2regexp_motif (Alignment *A, Alignment *B, int *pos, int depth, int c, char *mode, int print); -char ** compound2word_motif (Alignment *A, Alignment *B, int *pos, int depth, int c, char *mode, int print); - -char ** compound2motif (Alignment *A, Alignment *B, int *pos, int depth, int c, char *mode, int print) -{ - char mmode[100]; - - strget_param (mode, "_MOTIFMODE_", "word", "%s", mmode); //words, regexp - if ( strm (mmode, "regexp"))return compound2regexp_motif (A,B,pos, depth, c, mode, print); - else if ( strm (mmode, "word"))return compound2word_motif (A,B,pos, depth, c, mode, print); - else return NULL;} -char ** compound2word_motif (Alignment *A, Alignment *B, int *pos, int depth, int c, char *mode, int print) -{ - int a,l; - char *word, **motif; - float score; - - - word=or_id_evaluate2 (A, B, mode, pos,print, &score); - if ( !word) return NULL; - l=strlen (word); - - motif=declare_char (l+1, 2); - for (a=0; anseq=O->nseq=I->len_aln=O->len_aln=0; - for (a=0; alen_aln; a++) - { - if (pos[a]) - { - for (i=o=0,b=0; bnseq; b++) - { - if ( is_gap(A->seq_al[b][a]))return 0; - if (B->seq_al[b][c]=='I')I->seq_al[i++][I->len_aln]=A->seq_al[b][a]; - else O->seq_al[o++][O->len_aln]=A->seq_al[b][a]; - } - I->len_aln++; - O->len_aln++; - } - } - - if (O->len_aln==0 || I->len_aln==0) return 0; - O->nseq=o; - I->nseq=i; - for (a=0; aseq_al[a][O->len_aln]='\0'; - for (a=0; aseq_al[a][I->len_aln]='\0'; - - if (!I->nseq) return NULL; - - - - best_pred=best_motif=best_sn=best_sp=best_sen2=0; - - motif_file=vtmpnam (NULL); - - n=0; - if (depth>0) - { - alp=vcalloc ( sizeof (char**), I->len_aln); - alp_size= vcalloc ( I->len_aln, sizeof (int)); - for (a=0; alen_aln; a++) - { - char *col; - alp[a]=string2alphabet ( (col=aln_column2string (I,a)),depth, &alp_size[a]); - vfree (col); - } - generate_array_string_list (I->len_aln, alp, alp_size, &n, motif_file, OVERLAP); - } - else - { - int *used; - char r; - - used=vcalloc (256, sizeof (int)); - fpp=vfopen (motif_file,"w"); - for (a=0;alen_aln; a++) - { - for (b=0; bnseq; b++) - { - r=I->seq_al[b][a]; - if (!used[(int)r]){fprintf (fpp, "%c", r);used[(int)r]=1;} - } - for (b=0; bnseq; b++) - { - r=I->seq_al[b][a]; - used[(int)r]=0; - } - fprintf (fpp, " "); - } - fprintf (fpp, "\n"); - vfree (used); - vfclose (fpp); - - n=1; - depth=I->nseq; - } - - buf=vcalloc (2*(I->len_aln*depth)+1, sizeof (char)); - best_buf=vcalloc (2*(I->len_aln*depth)+1, sizeof (char)); - fpp=vfopen (motif_file, "r"); - - for (a=0; anseq; b++) - { - if (match_motif (I->seq_al[b], m2))tp++; - else fn++; - } - for (b=0; bnseq; b++) - { - if (match_motif (O->seq_al[b], m2))fp++; - else tn++; - } - rates2sensitivity (tp, tn, fp, fn, &sp, &sn, &sen2, &best); - - if (best>= best_pred) - { - best_pred=best; - best_sp=sp; - best_sen2=sen2; - best_sn=sn; - sprintf (best_buf, "%s", buf); - } - m2--; - free_char (m2, -1); - } - vfclose (fpp); - if (print==PRINT)fprintf ( stdout, "\nMotifCompound %25s sp: %.2f sn: %.2f sen2: %.2f best: %.2f motif: ", get_compound_name(c, mode), best_sp, best_sn, best_sen2, best_pred); - m2=string2list (best_buf); - m=declare_char (I->len_aln+1, depth+1); - - for (a=0; alen_aln; a++) - { - sprintf (m[a], "%s", m2[a+1]); - if (print==PRINT) fprintf ( stdout, "[%2s]",m[a]); - } - if (print==PRINT)fprintf ( stdout, " N-motifs %d", n); - free_char (m2, -1); - - if (alp)free_arrayN((void ***) alp, 3); - if (alp_size)vfree (alp_size); - vfree (buf); vfree(best_buf); - - return m; -} - -double pos2sim (Alignment *A, Alignment *B, int *pos) -{ - return sar_aln2r (A, B,pos, PRINT); -} -double sar_aln2r (Alignment *A, Alignment *B, int *pos, int print) -{ - int a, b, c, d,r1, r2, n, score, sim; - double *r, result; - static double **slist; - int declare=0; - static int **M; - - - - if (!M)M=read_matrice ("blosum62mt"); - if (!slist) - { - int maxslist; - maxslist=A->nseq*A->nseq*10; - slist=declare_double (maxslist, 2); - } - - if (pos==NULL) - { - - declare=1; - pos=vcalloc ( A->len_aln+1, sizeof (int)); - for (a=0; alen_aln; a++)pos[a]=1; - pos[a]=-1; - - } - - for (n=0,a=0; a< A->nseq-1; a++) - { - - for (b=a+1; bnseq; b++) - { - - - for (sim=d=0,c=0; clen_aln; c++) - { - - if (pos[c]==0)continue; - - r1=A->seq_al[a][c]; - r2=A->seq_al[b][c]; - if (is_gap(r1) || is_gap(r2))return 0; - - sim+=M[r1-'A'][r2-'A']*pos[c]; - d+=MAX((M[r1-'A'][r1-'A']),(M[r2-'A'][r2-'A'])); - } - sim=(d==0)?0:(100*sim)/d; - score=(int)get_sar_sim(B->seq_al[a], B->seq_al[b]); - slist[n][0]=(double)sim; - slist[n][1]=(double)score; - if (print==PRINT)fprintf ( stdout, "SIM: %d %d [%s %s]\n", sim, score, A->name[a], A->name[b]); - n++; - } - } - - r=return_r(slist, n); - for (a=0; alen_aln; c++) - { - - I->nseq=O->nseq=I->len_aln=O->len_aln=0; - for (a=0; alen_aln; a++) - { - if (pos[a]) - { - for (i=o=0,b=0; bnseq; b++) - { - if ( is_gap(A->seq_al[b][a]))return 0; - if (B->seq_al[b][c]=='I')I->seq_al[i++][I->len_aln]=A->seq_al[b][a]; - else O->seq_al[o++][O->len_aln]=A->seq_al[b][a]; - } - I->len_aln++; - O->len_aln++; - } - } - if (O->len_aln==0 || I->len_aln==0) return 0; - O->nseq=o; - I->nseq=i; - for (a=0; aseq_al[a][O->len_aln]='\0'; - for (a=0; aseq_al[a][I->len_aln]='\0'; - - delta+=aln2sim(I,"blosum62mt")-aln2sim(O, "blosum62mt"); - - } - - return delta; -} - -char * get_compound_name (int c, char *mode) -{ - static int isset; - static Alignment *S; - static char *lname; - - if (!isset) - { - char *comp_list; - isset=1; - lname=vcalloc (100, sizeof (char)); - - if (!mode); - else - { - strget_param (mode, "_COMPLIST_", "NO", "%s", comp_list=vcalloc (100, sizeof (char))); - if (strm(comp_list, "NO")); - else - { - S=main_read_aln (comp_list, NULL); - vfree (comp_list); - } - } - } - if (!S || c>=S->nseq)sprintf (lname, "%d", c); - else - { - sprintf (lname, "%s", S->name [c]); - } - return lname; -} -ORP * declare_or_prediction ( int ncomp, int nseq, int len) -{ - ORP *P; - P=vcalloc ( 1, sizeof (ORP)); - P->ncomp=ncomp; - P->nseq=nseq; - P->len=len; - P->PR=NULL; - - P->pos=vcalloc (len+1, sizeof (int)); - - return P; -} - -void free_orp_list ( ORP**P) -{ - int a=0; - while (P[a]) - { - free_orp(P[a++]); - } -} -void free_orp ( ORP*P) -{ - if (!P) return; - free_aln (P->A); - free_aln (P->S); - free_aln (P->P); - vfree (P->pos); - free_arrayN((void **)P->motif, 3); - if (P->PR)free_orp(P->PR); - vfree (P); -} - - - - - - - - - - - - - - - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_analyse_constraints_list.c b/binaries/src/tcoffee/t_coffee_source/util_analyse_constraints_list.c deleted file mode 100644 index f6e1b96..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_analyse_constraints_list.c +++ /dev/null @@ -1,115 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" - - -void print_list(Constraint_list *CL) - { - - fprintf ( stderr, "\nPRINT LIST"); - output_list ( CL, stderr); - } -void save_full_list (Constraint_list *CL, char*fname) - { - FILE *fp; - fp=vfopen ( fname, "w"); - fp=output_list ( CL, fp); - fclose ( fp); - } -FILE * output_list ( Constraint_list *CL, FILE *fp) - { - int a; - - fprintf ( fp, "\nPRINT LIST: %d Elements\n", CL->ne); - for ( a=0; ane; a++)fp=output_pair(CL, a, fp); - fprintf (fp, "\n"); - return fp; - } -FILE * output_pair (Constraint_list *CL,int p, FILE *fp) - { - int a; - fprintf (fp, "\n"); - for ( a=0; aentry_len; a++) - { - fprintf (fp, "%4d ", vread_clist(CL,p,a)); - } - return fp; - } -void print_pair (Constraint_list *CL,int p) - { - int a; - fprintf ( stderr, "\n"); - for ( a=0; aentry_len; a++) - { - fprintf ( stderr, "%d ", vread_clist(CL,p,a)); - } - fprintf ( stderr, "\n"); - } - -int** bin_list (Constraint_list *CL,int field, int Threshold) - { - int a, c; - int max; - int **bin_list; - CLIST_TYPE x; - - max=return_max_constraint_list (CL, CONS); - - bin_list=declare_int (max+1, 5); - for (c=0,a=0; a<(CL->ne); a++) - if ( vread_clist(CL,a,field)!=UNDEFINED && vread_clist(CL,a,field)>Threshold ) - { - x=vread_clist(CL,a,CONS); - bin_list[x][0]=x; - bin_list[x][1]++; - bin_list[x][2]+=vread_clist(CL, a, field); - c++; - } - - for ( a=0; a<= max; a++) - { - if (bin_list[a][0]>0) - { - bin_list[a][3]=bin_list[a][2]/bin_list[a][1]; - bin_list[a][4]=(a==0)?0:(bin_list[a][3]/a); - } - } - - return bin_list; - } - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_constraints_list.c b/binaries/src/tcoffee/t_coffee_source/util_constraints_list.c deleted file mode 100644 index 0d24c42..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_constraints_list.c +++ /dev/null @@ -1,6739 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include - - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" -static int entry_len; -int compare_constraint_list_entry ( const void*vx, const void*vy); -int compare_constraint_list_entry4bsearch ( const void*vx, const void*vy); -/*********************************************************************/ -/* */ -/* PRODUCE IN LIST */ -/* */ -/* */ -/*********************************************************************/ -Constraint_list * make_test_lib(Constraint_list *CL); -Constraint_list *fork_line_produce_list ( Constraint_list *CL, Sequence *S, char * method,char *weight,char *mem_mode,Job_TC *job, FILE *loca_stderr); -Constraint_list *fork_cell_produce_list ( Constraint_list *CL, Sequence *S, char * method,char *weight,char *mem_mode,Job_TC *job, FILE *local_stderr); -Constraint_list *nfork_produce_list ( Constraint_list *CL, Sequence *S, char * method,char *weight,char *mem_mode,Job_TC *job, FILE *local_stderr); -Constraint_list *fork_subset_produce_list ( Constraint_list *CL, Sequence *S, char * method,char *weight,char *mem_mode, Job_TC *job, FILE *local_stderr); -int job2first_seq(Job_TC *job); -Constraint_list *produce_list ( Constraint_list *CL, Sequence *S, char * method,char *weight,char *mem_mode) -{ - Job_TC *job=NULL; - FILE *local_stderr; - int njob; - - store_seq_type (S->type); - if ( CL==NULL)CL=declare_constraint_list ( S,NULL, NULL, 0,(strm(mem_mode, "disk"))?vtmpfile():NULL, NULL); - local_stderr=(CL->local_stderr!=NULL)?CL->local_stderr:stderr; - - CL->local_stderr=vfopen("/dev/null", "w"); - job=queue2heap(method2job_list ( method,S,weight, CL->lib_list,CL->DM, CL)); - njob=queue2n(job)+1; - - if ( get_nproc()==1 || njob==1)return nfork_produce_list (CL, S, method, weight, mem_mode,job, local_stderr); - else if (strstr ( CL->multi_thread, "jobcells"))return fork_cell_produce_list (CL, S, method, weight, mem_mode,job,local_stderr); - else if (strstr ( CL->multi_thread, "joblines"))return fork_line_produce_list (CL, S, method, weight, mem_mode,job, local_stderr); - else if (strstr ( CL->multi_thread, "jobs"))return fork_subset_produce_list (CL, S, method, weight, mem_mode,job, local_stderr); //Recommended default - else return nfork_produce_list (CL, S, method, weight, mem_mode,job, local_stderr); -} -int job2first_seq(Job_TC *job) -{ - int *seqlist; - int r; - - if (!job) return -1; - else if ( !job->param)return -1; - else if ( !(job->param)->seq_c) return -1; - seqlist=string2num_list ((job->param)->seq_c); - if (seqlist[0]<2)r=-1; - else r=seqlist[2]; - vfree (seqlist); - return r; -} - -Constraint_list *fork_subset_produce_list ( Constraint_list *CL, Sequence *S, char * method,char *weight,char *mem_mode, Job_TC *job, FILE *local_stderr) -{ - //forks lines of the matrix - int a,b; - Job_TC *heap,*end,*start, ***jl; - TC_method *M; - int n_elements_in, n_new_elements; - char **pid_tmpfile; - int *pid_list; - int pid,npid, njob; - int nproc, max_nproc, submited; - int cseq, seq, nlines; - int n_aln; - - max_nproc=nproc=get_nproc(); - max_nproc*=2; - /*OUT_MODE: - A-> alignment provided in a file - L-> list provided in a file - aln-> alignment computed and provided in a file - list->list computed and provided in a file - */ - - - - - if ( job->jobid==-1) - { - M=(job->param)->TCM; - fprintf (local_stderr, "\n\tMethod %s: No Suitable Sequences [Type: %s]\n", method,M->seq_type); - return CL; - } - - job=queue2heap (job_list2multi_thread_job_list (job, CL->multi_thread, CL)); - heap=job; - njob=queue2n (job); - n_elements_in=CL->ne; - M=(job->param)->TCM; - if (M)M->PW_CL=method2pw_cl ( M, CL); - pid_tmpfile=vcalloc (MAX(njob,get_nproc())+1, sizeof (char*)); - pid_list =vcalloc (MAX_N_PID, sizeof (int *)); - - fprintf ( local_stderr, "\n\tMulti Core Mode: %d processors [subset]\n", get_nproc()); - - jl=split_job_list(job,get_nproc()); - a=npid=0; - - a=npid=0; - while (jl[a]) - { - - start=job=jl[a][0]; - end=jl[a][1]; - pid_tmpfile[a]=vtmpnam(NULL); - pid=vfork(); - - if (pid==0)//child process - { - FILE *fp; - int done, todo, t; - - initiate_vtmpnam(NULL); - fp=vfopen (pid_tmpfile[a],"a"); - todo=0; - while (job!=end){todo++;job=job->c;} - job=start; - - done=0; - while (job!=end) - { - if (a==0)output_completion ( local_stderr,done,todo,1, "Submit Job"); - job=print_lib_job (job, "io->CL=%p control->submitF=%p control->retrieveF=%p control->mode=%s",duplicate_constraint_list4lib_computation (CL),submit_lib_job, retrieve_lib_job, CL->multi_thread ); - job=submit_job (job); - retrieve_job (job); - t=((job->io)->CL)->ne*((job->io)->CL)->entry_len; - for (b=0; bio)->CL)->L[b]); - job=job->c; - done++; - } - vfclose (fp); - exit (EXIT_SUCCESS); - } - else - { - pid_list[pid]=npid; - set_pid(pid); - npid++; - a++; - } - } - - //wait for all processes to finish - for (a=0; ane - n_elements_in; - compact_list (CL, n_elements_in,n_new_elements, "best"); - compact_list (CL, 0, CL->ne, "default"); - CL->local_stderr=local_stderr; - free_queue (heap); - return CL; -} - - -Constraint_list *fork_line_produce_list ( Constraint_list *CL, Sequence *S, char * method,char *weight,char *mem_mode, Job_TC *job, FILE *local_stderr) -{ - //forks lines of the matrix - int a,b; - Job_TC *heap; - TC_method *M; - int n_elements_in, n_new_elements; - char **pid_tmpfile; - int *pid_list; - int pid,npid, njob; - int nproc, max_nproc, submited; - int cseq, seq, nlines; - - max_nproc=nproc=get_nproc(); - max_nproc*=2; - /*OUT_MODE: - A-> alignment provided in a file - L-> list provided in a file - aln-> alignment computed and provided in a file - list->list computed and provided in a file - */ - - - - - if ( job->jobid==-1) - { - M=(job->param)->TCM; - fprintf (local_stderr, "\n\tMethod %s: No Suitable Sequences [Type: %s]\n", method,M->seq_type); - return CL; - } - - - job=queue2heap (job_list2multi_thread_job_list (job, CL->multi_thread, CL)); - - heap=job; - M=(job->param)->TCM; - if (M)M->PW_CL=method2pw_cl ( M, CL); - n_elements_in=CL->ne; - - /* Cf. parse method for possible out_mode flags*/ - - njob=queue2n(job)+1; - pid_tmpfile=vcalloc (njob, sizeof (char*)); - - pid_list =vcalloc (MAX_N_PID, sizeof (int *)); - fprintf ( local_stderr, "\n\tMulti Core Mode: %d processors [jobline]\n", get_nproc()); - - - //count the number of lines - cseq=-1; - nlines=0; - while (job) - { - nlines++; - seq=job2first_seq(job); - if ( seq!=cseq) - { - cseq=seq; - while (job && cseq==job2first_seq(job))job=job->c; - } - } - job=heap; - - npid=submited=0; - cseq=-1; - while (job) - { - seq=job2first_seq(job); - if ( seq!=cseq) - { - cseq=seq; - pid_tmpfile[npid]=vtmpnam(NULL); - - pid=vfork(); - - if (pid==0)//Child Process - { - initiate_vtmpnam(NULL); - while (job && cseq==job2first_seq(job)) - { - job=print_lib_job (job, "io->CL=%p control->submitF=%p control->retrieveF=%p control->mode=%s",duplicate_constraint_list4lib_computation (CL),submit_lib_job, retrieve_lib_job, CL->multi_thread ); - job=submit_job (job); - retrieve_job (job); - constraint_list2raw_file ((job->io)->CL,pid_tmpfile[npid], "a"); - job=job->c; - } - exit (EXIT_SUCCESS); - } - else //parent process - { - - pid_list[pid]=npid; - set_pid (pid); - npid++; - submited++; - if (submited>max_nproc) - { - //wait for nproc - for (a=0; ac; - } - } - - for (a=0; ane - n_elements_in; - compact_list (CL, n_elements_in,n_new_elements, "best"); - compact_list (CL, 0, CL->ne, "default"); - CL->local_stderr=local_stderr; - - free_queue (heap); - - return CL; -} - -Constraint_list *fork_cell_produce_list ( Constraint_list *CL, Sequence *S, char * method,char *weight,char *mem_mode, Job_TC *job, FILE *local_stderr) - { - //forks cells of the matrix - int a,b, n; - Job_TC *heap; - TC_method *M; - int n_elements_in, n_new_elements; - int *pid_list; - char **pid_tmpfile; - int pid,npid, njob; - int nproc, max_nproc; - int submited; - - max_nproc=nproc=get_nproc(); - - /*OUT_MODE: - A-> alignment provided in a file - L-> list provided in a file - aln-> alignment computed and provided in a file - list->list computed and provided in a file - */ - - - if ( job->jobid==-1) - { - M=(job->param)->TCM; - fprintf (local_stderr, "\n\tMethod %s: No Suitable Sequences [Type: %s]\n", method,M->seq_type); - return CL; - } - - - job=queue2heap (job_list2multi_thread_job_list (job, CL->multi_thread, CL)); - - heap=job; - M=(job->param)->TCM; - if (M)M->PW_CL=method2pw_cl ( M, CL); - n_elements_in=CL->ne; - - /* Cf. parse method for possible out_mode flags*/ - - njob=queue2n(job)+1; - pid_tmpfile=vcalloc (njob, sizeof (char*)); - pid_list =vcalloc (MAX_N_PID, sizeof (int *)); - - fprintf ( local_stderr, "\n\tMulti Core Mode: %d processors:\n", get_nproc()); - npid=0; - submited=0; - while (job) - { - job=print_lib_job (job, "io->CL=%p control->submitF=%p control->retrieveF=%p control->mode=%s",duplicate_constraint_list4lib_computation (CL),submit_lib_job, retrieve_lib_job, CL->multi_thread ); - pid_tmpfile[npid]=vtmpnam(NULL); - pid=vfork (); - if ( pid==0) - { - initiate_vtmpnam (NULL); - job=submit_job (job); - retrieve_job (job); - constraint_list2raw_file ((job->io)->CL,pid_tmpfile[npid], "w"); - exit (EXIT_SUCCESS); - } - else - { - job=job->c; - pid_list[pid]=npid; - set_pid(pid); - npid++; - submited++; - - if (submited>max_nproc) - { - for (a=0; ane - n_elements_in; - compact_list (CL, n_elements_in,n_new_elements, "best"); - compact_list (CL, 0, CL->ne, "default"); - CL->local_stderr=local_stderr; - - free_queue (heap); - - return CL; - } -Constraint_list *nfork_produce_list ( Constraint_list *CL, Sequence *S, char * method,char *weight,char *mem_mode, Job_TC *job, FILE *local_stderr) - { - int b; - int n_aln; - Job_TC *heap; - TC_method *M; - int n_elements_in, n_new_elements; - - - /*OUT_MODE: - A-> alignment provided in a file - L-> list provided in a file - aln-> alignment computed and provided in a file - list->list computed and provided in a file - */ - - - if ( job->jobid==-1) - { - M=(job->param)->TCM; - fprintf (local_stderr, "\n\tMethod %s: No Suitable Sequences [Type: %s]\n", method,M->seq_type); - return CL; - } - - - job=queue2heap (job_list2multi_thread_job_list (job, CL->multi_thread, CL)); - - heap=job; - n_aln=queue2n (job); - M=(job->param)->TCM; - - if (M)M->PW_CL=method2pw_cl ( M, CL); - - - n_elements_in=CL->ne; - - - /* Cf. parse method for possible out_mode flags*/ - - - - b=0; - - while (job) - { - local_stderr=output_completion ( local_stderr, b, n_aln+1,1, "Submit Job"); - job=print_lib_job (job, "io->CL=%p control->submitF=%p control->retrieveF=%p control->mode=%s",duplicate_constraint_list4lib_computation (CL),submit_lib_job, retrieve_lib_job, CL->multi_thread ); - job=submit_job (job); - if (retrieve_job (job)) - { - CL=merge_constraint_list ((job->io)->CL, CL, "default"); - free_constraint_list4lib_computation ( (job->io)->CL); - } - job=job->c; - b++; - } - job=heap; - fprintf ( local_stderr, "\n"); - - while (job) job=delete_job (job); - - n_new_elements=CL->ne - n_elements_in; - compact_list (CL, n_elements_in,n_new_elements, "best"); - compact_list (CL, 0, CL->ne, "default"); - CL->local_stderr=local_stderr; - - free_queue (heap); - - return CL; - } - - -Job_TC *job_list2multi_thread_job_list (Job_TC* ojob, char *mt, Constraint_list *CL) -{ - FILE *fp=NULL; - int mtv, n, nj; - char *met_file, *seq_file, *lib_file, *lib_list; - char common[1000], command[1000], T_file[100]; - Job_TC *njob, *heap; - TC_method *TCM; - - - mtv=(mt==NULL)?0:atoi (mt); - if ( !mtv || mtv==1)return ojob; - - HERE ("***"); exit (0); - heap=ojob; - nj=(queue2n(ojob)/mtv)+1; - nj=(nj==0)?1:nj; - - met_file=vtmpnam (NULL); - TC_method2method_file ((ojob->param)->TCM,met_file=vtmpnam (NULL)); - - TCM=method_file2TC_method (method_name2method_file ("tc2")); - sprintf (T_file,"%s",(CL->S)->template_file); - - HERE ("2"); - - seq_file=vtmpnam (NULL); - sprintf ( common, "%s -in M%s S%s -lib_only ", TCM->executable, met_file, seq_file); - if ( T_file[0] && check_file_exists (T_file)) - { - strcat (common, " -template_file ");strcat (common, T_file); strcat ( common , " "); - } - - njob=vcalloc ( 1, sizeof (Job_TC)); - njob->jobid=-1; - n=0; - - HERE ("3"); - - while ( ojob && nc=print_lib_job (NULL, "param->TCM=%p param->method=%s param->aln_c=%s io->in=%s io->out=%s",TCM, "tc2", command,seq_file, lib_file); - - njob=queue_cat (njob, njob->c); - } - - fprintf ( fp, "%s\n", (ojob->param)->seq_c); - ojob=ojob->c; - if ( (++n)==nj){vfclose (fp);n=0;} - } - HERE ("4"); - if (fp && n)vfclose (fp); - free_queue (heap); - return njob; -} - - - - - - - -Job_TC *retrieve_lib_job ( Job_TC *job) -{ - Job_param_TC *p; - Job_io_TC *io; - TC_method *M; - - p=job->param; - io=job->io; - M=(job->param)->TCM; - - - if ( job->status==EXIT_SUCCESS) - { - static char *log_output; - static int log; - - if ( !M) return job; - else if (strm2(M->out_mode, "aln", "A")) - { - io->CL=read_constraint_list (io->CL,io->out,"aln","disk",M->weight); - } - else if (strm2(M->out_mode, "lib","L")) - { - io->CL=read_constraint_list (io->CL,io->out,"lib","disk",M->weight); - } - return job; - } - else - return NULL; -} - -int add_method_output2method_log (char *l,char *command,Alignment *A, Constraint_list *CL, char *io_file) -{ - static int header; - static int log; - static char *file, *log_file; - static int set; - - if ( set && log_file==NULL && l==NULL) return 0; - if (!set ){log_file=get_string_variable ("method_log");if (log_file && strm (log_file, "no"))log_file=NULL; set=1;} - if (!file)file=vtmpnam (NULL); - - if ( l); - else if (!l && log_file) l=log_file; - else return 0; - - - if (!header){printf_file ( l, "w", "# CONC_MSF_FORMAT_01\n");header=1;} - if (command)printf_file (l, "a", "%s\n#----------------------------------------------\n#%s\n", TC_REC_SEPARATOR,command); - - - if ( A) - { - - io_file=file; - output_fasta_aln (io_file, A); - } - else if (CL) - { - io_file=file; - vfclose (save_constraint_list ( CL, 0, CL->ne,io_file, NULL, "ascii",CL->S)); - } - else - file_cat (io_file,l); - - - return 1; -} - - - -Job_TC *submit_lib_job ( Job_TC *job) -{ - Job_param_TC *p; - Job_io_TC *io; - TC_method *M; - static char *l; - static int log; - - p=job->param; - io=job->io; - M=(job->param)->TCM; - add_method_output2method_log (NULL, p->aln_c, NULL, NULL, NULL); - if ( getenv4debug ("DEBUG_LIBRARY"))fprintf ( stderr, "\n[DEBUG_LIBRARY:produce_list] Instruction: %s\n", p->aln_c); - - if ( !M) - { - return job; - } - else if (strm4 (M->out_mode,"A", "L", "aln", "lib")) - { - - seq_list2in_file ( M, (io->CL)->S, p->seq_c, io->in); - - my_system (p->aln_c);/*Multi Threaded jobs must be sent here*/ - - add_method_output2method_log (NULL,NULL, NULL, NULL, io->out); - - if (!evaluate_sys_call_io (io->out,p->aln_c, "") || (strm (M->out_mode, "aln") && !(is_aln (io->out) || is_seq(io->out))) ) - { - add_warning (stderr, "\nFAILED TO EXECUTE:%s [SERIOUS:%s]", p->aln_c, PROGRAM); - job->status=EXIT_FAILURE; - //myexit (EXIT_FAILURE); - return job; - } - } - else if ( strm2 (M->out_mode, "fA", "fL")) - { - io->CL= seq2list(job); - if (!io->CL) - { - add_warning (stderr, "\nFAILED TO EXECUTE:%s [SERIOUS:%s]", p->aln_c, PROGRAM); - job->status=EXIT_FAILURE; - } - } - else - { - fprintf ( stderr, "\nERROR: Unknown out_mode=%s for method[FATAL:%s]\n", M->out_mode, M->executable); - myexit (EXIT_FAILURE); - } - -return job; -} - - - -Job_TC* method2job_list ( char *method_name,Sequence *S, char *weight, char *lib_list, Distance_matrix *DM, Constraint_list *CL) - { - int preset_method; - static char *fname, *bufS, *bufA; - char *in,*out; - TC_method *method; - char aln_mode[100]; - char out_mode[100]; - Job_TC *job; - int hijack_P_jobs=1; - - /*A method can be: - 1- a pre computed alignment out_mode=A - 2- a precomputed Library out_mode=L - 3- a method producing an alignment out_mode=aln - 4- a method producing an alignment out_mode=list - 5- a function producing an alignment out_mode=faln - 6- a function producing a library out_mode=flist - */ - - if ( !fname) - { - fname=vcalloc ( 1000, sizeof (char)); - bufS=vcalloc ( S->nseq*10, sizeof (char)); - } - - /*Make sure that fname is a method*/ - - - sprintf(fname, "%s", method_name); - - if ( fname[0]=='A' || fname[0]=='L') - { - method=method_file2TC_method("no_method"); - sprintf ( method->out_mode, "%c", fname[0]); - - if (!strm (weight, "default"))sprintf ( method->weight, "%s", weight); - - return print_lib_job(NULL,"param->out=%s param->TCM=%p",fname+1, method); - } - else if ( fname[0]=='M' && is_in_pre_set_method_list (fname+1)) - { - preset_method=1; - fname++; - } - else if ( is_in_pre_set_method_list (fname)) - { - preset_method=1; - } - else - { - char buf[1000]; - if ( check_file_exists ( fname)); - else if (fname[0]=='M' && check_file_exists(fname+1)); - else - { - sprintf ( buf, "%s/%s", get_methods_4_tcoffee(), fname); - if( check_file_exists(buf)){sprintf ( fname, "%s", buf);} - else - { - fprintf ( stderr, "\n%s (%s) is not a valid method [FATAL:%s]\n", buf,fname, PROGRAM); - myexit (EXIT_FAILURE); - } - } - } - - - method=method_file2TC_method(fname); - job=print_lib_job (NULL, "param->TCM=%p", method); - job->jobid=-1; - - if (!strm (weight, "default"))sprintf ( method->weight, "%s", weight); - - sprintf ( aln_mode, "%s", method->aln_mode); - sprintf ( out_mode, "%s", method->out_mode); - - - if (lib_list && lib_list[0]) - { - static char **lines, **list=NULL; - int a,i, x, n, nl; - - - - if ( lines) free_char (lines, -1); - - - if ( strstr (lib_list, "prune")) - { - lines=file2lines (list2prune_list (S,DM->similarity_matrix)); - } - else - { - lines=file2lines (lib_list); - } - - nl=atoi (lines[0]); - for (a=1; a 2 && strm (aln_mode, "pairwise"))continue; - if ( n==2 && strm (aln_mode, "multiple"))continue; - - for (i=2; iname, S->nseq, 100))!=-1)sprintf(list[i], "%d", x); - else - { - add_warning ( stderr, "\nWARNING: %s is not part of the sequence dataset \n", list[i]); - continue; - } - } - sprintf ( bufS, "%s", list[1]); - for ( i=2; i')==NULL)strcat (bufA,TO_NULL_DEVICE); - if ( check_seq_type ( method, bufS, S)) - { - job->c=print_lib_job (NULL, "param->TCM=%p param->method=%s param->aln_c=%s param->seq_c=%s io->in=%s io->out=%s ", method, fname, bufA, bufS, in, out); - job=queue_cat (job, job->c); - } - vfree (bufA); - - } - } - else if ( strcmp (aln_mode, "multiple")==0) - { - int d; - char buf[1000]; - - sprintf (bufS, "%d",S->nseq); - for (d=0; d< S->nseq; d++) - { - sprintf ( buf," %d",d); - strcat ( bufS, buf); - } - - bufA=make_aln_command (method, in=vtmpnam(NULL),out=vtmpnam(NULL)); - - if (strrchr(bufA, '>')==NULL)strcat (bufA,TO_NULL_DEVICE); - - if ( check_seq_type ( method, bufS, S)) - { - - job->c=print_lib_job (NULL, "param->TCM=%p param->method=%s param->aln_c=%s param->seq_c=%s io->in=%s io->out=%s ", method, fname, bufA, bufS, in, out, S->template_file); - - job=queue_cat (job, job->c); - - } - vfree (bufA); - - } - else if ( strstr(aln_mode, "pairwise")) - { - - int do_mirror, do_self, x, y, id; - do_mirror=(strstr(aln_mode, "m_"))?1:0; - do_self=(strstr(aln_mode, "s_"))?1:0; - - - for (x=0; x< S->nseq; x++) - for ( y=(do_mirror)?0:x; y< S->nseq; y++) - { - - id=DM->similarity_matrix[x][y]; - - if ( x==y && !do_self); - else if ( !is_in_range(id,method->minid, method->maxid)); - else - { - sprintf (bufS, "2 %d %d",x,y); - bufA=make_aln_command (method,in=vtmpnam(NULL),out=vtmpnam (NULL)); - - if (strrchr(bufA, '>')==NULL)strcat (bufA, TO_NULL_DEVICE); - if (check_seq_type (method, bufS, S)) - { - job->c=print_lib_job (job->c, "param->TCM=%p param->method=%s param->aln_c=%s param->seq_c=%s io->in=%s io->out=%s ",method,fname,bufA, bufS, in, out); - job=queue_cat (job, job->c); - } - else if ( method->seq_type[0]=='P' && hijack_P_jobs) - { - //Hijack _P_ jobs without enough templates - static TC_method *proba_pairM; - - fprintf (stderr, "\n\t Information: Method %s cannot be applied to [%s vs %s]. Use proba_pair instead", method, (CL->S)->name[x], (CL->S)->name [y]); - if (!proba_pairM) - { - proba_pairM=method_file2TC_method(method_name2method_file ("proba_pair")); - proba_pairM->PW_CL=method2pw_cl(proba_pairM, CL); - } - job->c=print_lib_job (job->c, "param->TCM=%p param->method=%s param->aln_c=%s param->seq_c=%s io->in=%s io->out=%s ",proba_pairM,fname,bufA, bufS, in, out); - job=queue_cat (job, job->c); - } - - vfree (bufA); - } - } - } - - return job; - } - -int check_seq_type (TC_method *M, char *list,Sequence *S) -{ - char t1, t2; - int s1, s2, n1, nseq, ntype, i; - int *slist; - Template *T1, *T2; - - - slist=string2num_list (list); - - nseq=slist[1]; - ntype=strlen (M->seq_type); - t1=M->seq_type[0]; - t2=M->seq_type[1]; - n1=0; - - /*Profiles and Sequences MUST NOT be distinguished so that sequences and profiles can easily be aligned*/ - if ( tolower(t1)=='r')t1='S'; - if ( tolower(t2)=='r')t2='S'; - - - if ( strm ( M->aln_mode, "pairwise") && nseq>2)n1=0; - else if (strm ( M->aln_mode, "multiple" ) && ntype>1)n1=0; - else if (ntype==1) - { - - for (n1=0, i=0; iT[s1]; - - n1+=(strchr (T1->seq_type,t1) || check_profile_seq_type (S, s1, t1))?1:0; - } - n1=(n1==nseq)?1:0; - } - else if (ntype==2) - { - int s1_has_t1; - int s1_has_t2; - int s2_has_t1; - int s2_has_t2; - - s1=slist[2]; - s2=slist[3]; - T1=(S->T[s1]); - T2=(S->T[s2]); - - s1_has_t1=(strchr ( T1->seq_type, t1) || check_profile_seq_type (S, s1, t1))?1:0; - s1_has_t2=(strchr ( T1->seq_type, t2) || check_profile_seq_type (S, s1, t2))?1:0; - s2_has_t1=(strchr ( T2->seq_type, t1) || check_profile_seq_type (S, s2, t1))?1:0; - s2_has_t2=(strchr ( T2->seq_type, t2) || check_profile_seq_type (S, s2, t2))?1:0; - n1=((s1_has_t1 && s2_has_t2) || (s1_has_t2 && s2_has_t1))?1:0; - } - - vfree (slist); - return n1; -} - -int check_profile_seq_type (Sequence *S, int i, char t) -{ - Alignment *A; - Template *T; - int a; - - /*returns 1 if the sequence S is associated with a profile containing the right sequence*/ - A=seq2R_template_profile (S, i); - if (A==NULL || A->S==NULL) return 0; - for ( a=0; a< A->nseq; a++) - { - - T=(A->S)->T[a]; - if ( T && strchr( T->seq_type,t))return 1; - - } - return 0; -} - - -char **method_list2method4dna_list ( char **list, int n) -{ - int a; - static char *buf; - - if ( !buf)buf=vcalloc ( 1000, sizeof (char)); - - if ( !list || n==0)return list; - buf=vcalloc ( 1000, sizeof (char)); - - for ( a=0; a< n; a++) - { - - sprintf ( buf,"%s",list[a]); - - if ( strm ( list[a], "4dna")); - else - { - - char **para ; - int b; - - para=string2list2 (list[a], "@"); - sprintf ( buf, "%s4dna", para[1]); - if (method_name2method_file (buf) || method_name2method_file(buf+1)) - { - sprintf ( list[a],"%s", buf); - for (b=2; b< atoi (para[0]); b++) - { - strcat (list[a], "@"); - strcat (list[a], para[b]); - } - } - - free_char (para, -1); - } - } - return list; -} - -int is_in_pre_set_method_list ( char *method) -{ - char *new_name; - - - - - new_name=method_name2method_file (method); - - if ( !new_name) return 0; - else - { - - sprintf ( method, "%s", new_name); - return 1; - } -} -char *** display_method_names (char *mode, FILE *fp) -{ - char ***list, ***l2; - int n=0, a, ml1=0, ml2=0, ml3=0; - int status; - - - - - - - list=produce_method_file (NULL); - l2=declare_arrayN(3,sizeof (char), 1000, 10, 100); - - fprintf ( fp, "\n####### Compiling the list of available methods ... (will take a few seconds)\n"); - a=0; - while (list[a]) - { - - - sprintf (l2[a][0], "%s", method_file_tag2value (list[a][1],"ADDRESS")); - sprintf (l2[a][1], "%s", method_file_tag2value (list[a][1],"PROGRAM")); - sprintf (l2[a][2], "%s", method_file_tag2value (list[a][1],"ALN_MODE")); - sprintf (l2[a][3], "%s", method_file_tag2value (list[a][1],"SEQ_TYPE")); - ml1=MAX((strlen (list[a][0])),ml1); - ml2=MAX((strlen (l2 [a][0])),ml2); - ml3=MAX((strlen (l2 [a][1])),ml3); - l2[a][4][0]= check_program_is_installed (l2[a][1],NULL,NULL,l2[a][0],NO_REPORT); - if (l2[a][4][0])l2[a][5]=pg2path (l2[a][1]); - a++; - } - - fprintf ( fp, "\n####### Methods For which an Interface is available in T-Coffee\n"); - fprintf ( fp, "You must install the packages yourself when required (use the provided address)\n"); - fprintf ( fp, "Contact us if you need an extra method to be added [%s]\n", EMAIL); - - - fprintf ( fp, "\n****** Pairwise Sequence Alignment Methods:\n"); - fprintf ( fp, "--------------------------------------------\n"); - a=0; - while (list[a]) - { - if ( strm (l2[a][2], "pairwise") && !strstr (l2[a][3], "P")) - fprintf ( fp, "%-*s %-*s [pg: %*s is %s Installed][%s]\n", ml1,list[a][0],ml2, l2[a][0], ml3,l2[a][1], (l2[a][4][0]==0)?"NOT":"",(l2[a][5])?l2[a][5]:""); - a++; - } - - fprintf ( fp, "\n****** Pairwise Structural Alignment Methods:\n"); - fprintf ( fp, "--------------------------------------------\n"); - a=0; - while (list[a]) - { - if ( strm (l2[a][2], "pairwise") && strstr (l2[a][3], "P")) - fprintf ( fp, "%-*s %-*s [pg: %*s is %s Installed][%s]\n", ml1,list[a][0],ml2, l2[a][0], ml3,l2[a][1], (l2[a][4][0]==0)?"NOT":"",(l2[a][5])?l2[a][5]:""); - a++; - } - fprintf ( fp, "\n****** Multiple Sequence Alignment Methods:\n"); - fprintf ( fp, "--------------------------------------------\n"); - a=0; - while (list[a]) - { - if ( strm (l2[a][2], "multiple")) - fprintf ( fp, "%-*s %-*s [pg: %*s is %s Installed][%s]\n", ml1,list[a][0],ml2, l2[a][0], ml3,l2[a][1], (l2[a][4][0]==0)?"NOT":"",(l2[a][5])?l2[a][5]:""); - a++; - } - fprintf ( fp, "\n####### Prediction Methods available to generate Templates\n"); - fprintf ( fp, "-------------------------------------------------------------\n"); - a=0; - while (list[a]) - { - if ( strm (l2[a][2], "predict")) - fprintf ( fp, "%-*s %-*s [pg: %*s is %s Installed][%s]\n", ml1,list[a][0],ml2, l2[a][0], ml3,l2[a][1], (l2[a][4][0]==0)?"NOT":"",(l2[a][5])?l2[a][5]:""); - a++; - } - fprintf ( fp, "\n\n\nAll these Methods are supported by T-Coffee, but you HAVE to install them yourself [use the provided address]\n\n"); - fprintf ( fp, "\nThese methods were selected because they are freeware opensource, easy to install and well supported"); - fprintf ( fp, "\nContact us if you need an extra method to be added [%s]\n", EMAIL); - - return l2; -} - -char* method_name2method_file (char *method) - { - char *fname=NULL; - char ***mlist, *p; - char address[100]; - char program[100]; - int a; - - if ( check_file_exists (method) || (toupper(method[0])==method[0] && check_file_exists (method+1)))return NULL; - - if ( (p=strstr (method, "@"))!=NULL && !strstr (method, "em@"))p[0]='\0'; - mlist=produce_method_file (method); - - - - a=0; - while (mlist[a]) - { - if ( lstrstr (method,mlist[a][0])){fname=mlist[a][1];break;} - else {a++;} - } - if (p)p[0]='@'; - if ( fname==NULL) return NULL; - else - { - sprintf (address, "%s", method_file_tag2value (fname,"ADDRESS")); - sprintf (program, "%s", method_file_tag2value (fname,"PROGRAM")); - check_program_is_installed (program,NULL,NULL,address,INSTALL_OR_DIE); - if ( (method=strstr (method, "EP@"))!=NULL) - { - int a; - char **list; - FILE *fp; - list=string2list2 ( method, "@"); - fp=vfopen (fname, "a"); - for ( a=2; a - if (!line) - { - line=vcalloc (LONG_STRING+1, sizeof ( char)); - subcommand=vcalloc ( LONG_STRING, sizeof (char)); - } - - m=vcalloc ( 1, sizeof (TC_method)); - - /*set default parameter values*/ - m->gop=m->gep=UNDEFINED; - sprintf (m->seq_type, "S"); - sprintf (m->weight, "sim"); - m->minid=0; - m->maxid=100; - - fp=vfopen (method, "r"); - while ( (c=fgetc (fp))!=EOF) - { - ungetc ( c, fp); - fgets ( line,LONG_STRING, fp); - - - line=substitute (line, "\n", " "); - line=substitute (line, "%s", " "); - line=substitute (line, "%e", "="); - line=substitute (line, "%m", "-"); - - if ( (line && (line[0]=='*' || line[0]=='#' || line[0] == '$'|| line[0]==' ' || line[0]=='\0' )))subcommand[0]='\0'; - //Parse PARAM, PARM1 and PARAM2 first because they may contain keywords - else if ( (p=strstr (line, "PARAM1" ))) - { - sprintf (subcommand, " %s ", p+6); - strcat ( m->param1, subcommand); - } - else if ( (p=strstr (line, "PARAM2" ))) - { - sprintf (subcommand, " %s ", p+6); - strcat ( m->param2, subcommand); - } - else if ( (p=strstr (line, "PARAM" ))) - { - sprintf (subcommand, " %s ", p+5); - strcat ( m->param, subcommand); - }else if ( (p=strstr (line, "EXECUTABLE2" ))) - { - sscanf (p, "EXECUTABLE2 %s", m->executable2); - } - else if ( (p=strstr (line, "EXECUTABLE" ))) - { - sscanf (p, "EXECUTABLE %s", m->executable); - } - else if ( (p=strstr (line, "IN_FLAG2" ))) sscanf (p, "IN_FLAG2 %s" , m->in_flag2); - else if ( (p=strstr (line, "IN_FLAG" ))) sscanf (p, "IN_FLAG %s" , m->in_flag); - else if ( (p=strstr (line, "OUT_FLAG" ))) sscanf (p, "OUT_FLAG %s" , m->out_flag); - else if ( (p=strstr (line, "OUT_MODE" ))) sscanf (p, "OUT_MODE %s" , m->out_mode); - else if ( (p=strstr (line, "ALN_MODE" ))) sscanf (p, "ALN_MODE %s" , m->aln_mode); - else if ( (p=strstr (line, "SEQ_TYPE" ))) sscanf (p, "SEQ_TYPE %s" , m->seq_type); - else if ( (p=strstr (line, "WEIGHT" ))) sscanf (p, "WEIGHT %s" , m->weight); - else if ( (p=strstr (line, "MATRIX" ))){ sscanf (p, "MATRIX %s" , m->matrix);} - else if ( (p=strstr (line, "GOP" ))) sscanf (p, "GOP %d" , &m->gop); - else if ( (p=strstr (line, "GEP" ))) sscanf (p, "GEP %d" , &m->gep); - else if ( (p=strstr (line, "MAXID" ))) sscanf (p, "MAXID %d" , &m->maxid); - else if ( (p=strstr (line, "MINID" ))) sscanf (p, "MINID %d" , &m->minid); - - } - vfclose ( fp); - - - - return m; -} - -int TC_method2method_file( struct TC_method*m,char *fname ) -{ - FILE *fp; - if ( !m) return 0; - fp=vfopen ( fname, "w"); - if ( m->executable[0])fprintf (fp, "EXECUTABLE %s\n", m->executable); - if (m->in_flag[0])fprintf (fp, "IN_FLAG %s\n", m->in_flag); - if (m->out_flag[0])fprintf (fp, "OUT_FLAG %s\n", m->out_flag); - if (m->out_mode[0])fprintf (fp, "OUT_MODE %s\n", m->out_mode); - if (m->aln_mode[0])fprintf (fp, "ALN_MODE %s\n", m->aln_mode); - if (m->seq_type)fprintf (fp, "SEQ_TYPE %s\n", m->seq_type); - if (m->weight[0])fprintf (fp, "WEIGHT %s\n", m->weight); - if (m->matrix[0])fprintf (fp, "MATRIX %s\n", m->matrix); - if (m->gop!=UNDEFINED)fprintf (fp, "GOP %d\n", m->gop); - if (m->gep!=UNDEFINED)fprintf (fp, "GEP %d\n", m->gep); - if (m->minid!=0 )fprintf (fp, "MINID %d\n", m->minid); - if (m->maxid!=100)fprintf (fp, "MAXID %d\n", m->maxid); - if (m->param[0])fprintf (fp, "PARAM %s\n", m->param); - if (m->param1[0])fprintf (fp, "PARAM1 %s\n", m->param1); - if (m->param2[0])fprintf (fp, "PARAM1 %s\n", m->param2); - if (m->in_flag2[0])fprintf (fp, "IN_FLAG2 %s\n", m->in_flag2); - - vfclose ( fp); - return 1; -} - -char *make_aln_command(TC_method *m, char *seq, char *aln) - { - char *command; - char buf[1000]; - - // sprintf ( buf, "%s %s %s%s %s%s %s", m->executable, m->param1, m->in_flag, seq,m->param2, m->out_flag,aln, m->param); - sprintf ( buf, "%s %s %s%s %s %s%s %s", m->executable, m->param1, m->in_flag, seq,m->param2, m->out_flag,aln, m->param); - command=vcalloc ( strlen (buf)+100, sizeof (char)); - sprintf ( command, "%s", buf); - - - command=substitute (command, "&bnsp", " "); - command=substitute (command, "no_name", ""); - - return command; - } - - - - - -/*********************************************************************/ -/* */ -/* WRITE IN LIST */ -/* */ -/* */ -/*********************************************************************/ - - -int dump_constraint_list (Constraint_list *CL) -{ - int *L; - int a, b; - if (!CL->L) return 0; - L=CL->L; - CL->L=NULL; - CL->fp=tmpfile (); - - for ( a=0; ane; a++) - for (b=0; bentry_len; b++) - vwrite_clist (CL, a, b, L[a*CL->entry_len+b]); - - vfree (CL->L); CL->L=NULL; - return 1; -} - -int vread_clist ( Constraint_list *CL, int a, int b ) - { - int x; - - if ( a>= CL->ne) - { - return UNDEFINED_2; - - } - else if (CL->fp) - { - - fseek (CL->fp, a*CL->el_size*CL->entry_len+b*CL->el_size, SEEK_SET); - fread (&x, CL->el_size, 1, CL->fp); - - return x; - } - else if ( CL->L) - { - return CL->L[a*CL->entry_len+b]; - } - else if (CL->M) - { - return (CL->M)[a][b]; - } - else - { - return UNDEFINED_2; - - } - return UNDEFINED_2; - } -int vwrite_clist ( Constraint_list *CL, int a, int b, CLIST_TYPE x) - { - - CL->seq_indexed=0; - CL->residue_indexed=0; - - - if (CL->fp) - { - fseek (CL->fp, a*CL->el_size*CL->entry_len+b*CL->el_size, SEEK_SET); - fwrite(&x, CL->el_size, 1, CL->fp); - } - else if (!CL->M) - { - int i,l; - - - i=a*CL->entry_len+b; - if (CL->L) - { - Memcontrol *p; - p=(Memcontrol *)CL->L; - p-=2; - l=(int)p[0].size/p[0].size_element; - //read_size_int (CL->L,sizeof (int)); - } - else - { - l=CL->chunk; - (CL->L)=vcalloc (l,sizeof (int)); - } - - if (l<=i) - { - l+=CL->chunk; - (CL->L)=vrealloc ( (CL->L),l*sizeof (int)); - } - (CL->L)[i]=x; -#ifdef gagaga - if (((a*CL->entry_len)+b)>=(CL->entry_len*CL->max_L_len)) - { - - if (!(CL->L)) - { - (CL->L)=vcalloc ((CL->chunk+a)*CL->entry_len+b, sizeof (int)); - } - else - { - CL->max_L_len+=CL->chunk; - (CL->L)=vrealloc ( (CL->L),CL->entry_len*sizeof (int)*CL->max_L_len+b); - } - } - - (CL->L)[i]=x; -#endif - } - return x; - } - - -/*********************************************************************/ -/* */ -/* INDEXING FUNCTIONS */ -/* */ -/* */ -/*********************************************************************/ - -Constraint_list *index_res_constraint_list ( Constraint_list *CL, int field) - { - /* - function documentation: start - Constraint_list *index_res_constraint_list ( Constraint_list *CL, int field) - This function reorganises the content of the CL->L matrix, so that a single look up gives - the constraints associated with any residue - - 1-if CL->residue_indexed=1 return - 2-CL->residue_index[Seq X][Res Y of Seq X][0]=Z - Z=Number of residues matching X(Y)*3+1 - CL->residue_index[Seq X][Res Y of Seq X][0]=Z - CL->residue_index[Seq X][Res Y of Seq X][c+0]=seq W - CL->residue_index[Seq X][Res Y of Seq X][c+1]=res V - CL->residue_index[Seq X][Res Y of Seq X][c+2]=weight W(V) Vs X(Y) - - NOTE: Works All right with a sequence against itself - NOTE: Any modification of CL->L should result in residue_indexed to be reset - function documentation: end - */ - int a, b, s1, s2, r1, r2, w; - - - - - - - - if ( CL->residue_indexed && CL->residue_field==field); - else - { - - if ( CL->residue_index) - { - for ( a=0; a< (CL->S)->nseq; a++) - for ( b=0; b<= (CL->S)->len[a]; b++) - { - vfree(CL->residue_index[a][b]); - CL->residue_index[a][b]=vcalloc_nomemset (1, sizeof (int)); - CL->residue_index[a][b][0]=1; - } - } - else if ( !CL->residue_index) - { - - - CL->residue_index=vcalloc_nomemset ( (CL->S)->nseq, sizeof (int**)); - for ( a=0; a< (CL->S)->nseq; a++) - { - - CL->residue_index[a]=vcalloc_nomemset ( ((CL->S)->len[a]+1), sizeof (int*)); - for ( b=0; b<= (CL->S)->len[a]; b++) - { - CL->residue_index[a][b]=vcalloc_nomemset (1,sizeof (int)); - CL->residue_index[a][b][0]=1; - } - } - } - for (a=0;ane; a++) - { - - s1=vread_clist (CL, a, SEQ1); - s2=vread_clist (CL, a, SEQ2); - r1=vread_clist (CL, a, R1); - r2=vread_clist (CL, a, R2); - w=vread_clist (CL, a, field); - - - CL->residue_index[s1][r1][0]+=3; - CL->residue_index[s1][r1]=vrealloc ( CL->residue_index[s1][r1], CL->residue_index[s1][r1][0]*sizeof (int)); - CL->residue_index[s1][r1][CL->residue_index[s1][r1][0]-3]=s2; - CL->residue_index[s1][r1][CL->residue_index[s1][r1][0]-2]=r2; - CL->residue_index[s1][r1][CL->residue_index[s1][r1][0]-1]=w; - - CL->residue_index[s2][r2][0]+=3; - CL->residue_index[s2][r2]=vrealloc ( CL->residue_index[s2][r2], CL->residue_index[s2][r2][0]*sizeof (int)); - CL->residue_index[s2][r2][CL->residue_index[s2][r2][0]-3]=s1; - CL->residue_index[s2][r2][CL->residue_index[s2][r2][0]-2]=r1; - CL->residue_index[s2][r2][CL->residue_index[s2][r2][0]-1]=w; - - } - CL->residue_indexed=1; - CL->residue_field=field; - } - return CL; - } - -Constraint_list *index_constraint_list ( Constraint_list *CL) - { - /* - Function Documentation: start - Constraint_list *index_constraint_list ( Constraint_list *CL); - Indexes a constraint list - 1-Checks the flag seq_indexed - 2-if flag set to 0 - CL->start_index[seq1][seq2] indicatse the first position for seq1 Vs seq2 - CL->end_index[seq1][seq3] indicatse the last position for seq1 Vs seq2 - Any modif to CL->L should cause the flag 1 to be set to 0; - Function Documentation: end - */ - int a, csA, csB, sA, sB; - - - if ( CL->seq_indexed); - else - { - if ( CL->start_index!=NULL)free_int ( CL->start_index,-1); - CL->start_index=declare_int ( (CL->S)->nseq, (CL->S)->nseq); - - if ( CL->end_index!=NULL)free_int ( CL->end_index,-1); - CL->end_index=declare_int ( (CL->S)->nseq, (CL->S)->nseq); - - csA=vread_clist (CL, 0, SEQ1); - csB=vread_clist (CL, 0, SEQ2); - - CL->start_index[csA][csB]=0; - CL->start_index[csB][csA]=0; - for ( a=1; ane; a++) - { - sA=vread_clist (CL, a, SEQ1); - sB=vread_clist (CL, a, SEQ2); - if (sA!=csA || sB!=csB) - { - CL->end_index[csA][csB]=a; - CL->end_index[csB][csA]=a; - csA=sA; - csB=sB; - CL->start_index[csA][csB]=a; - CL->start_index[csB][csA]=a; - } - } - CL->end_index[csB][csA]=CL->ne; - CL->end_index[csA][csB]=CL->ne; - CL->seq_indexed=1; - } - return CL; - } - - -char ** reindex_constraint_list (char **profile, int np,char **list, int *inL, Sequence *S) -{ - int a, nl=0, l; - int ***cache, **index, *entry; - char **nlist, **cons, *nlib_name; - Sequence *NS; - Alignment **P, *A; - Constraint_list *CL, *NCL; - - - l=inL[0]; - /*1: Pre-Process the profiles*/ - cache=vcalloc (np, sizeof (int**)); - cons=vcalloc (np, sizeof (char*)); - P=vcalloc (np, sizeof (Alignment *)); - - for ( a=0; a< np; a++) - { - int e,b,c, **ca; - A=P[a]=main_read_aln (profile[a], NULL); - - ca=cache[a]=declare_int (A->nseq, A->len_aln+1); - cons[a]=aln2cons_seq_mat ( A, "blosum62mt"); - for (b=0;bnseq; b++) - { - for (e=0,c=0; clen_aln; c++) - { - if (is_gap(A->seq_al[b][c])); - else ca[b][++e]=c+1; - } - } - } - - /*2: Index The Sequences*/ - index=declare_int (S->nseq,2); - for (a=0; anseq; a++)index[a][0]=index[a][1]=-1; - for (a=0; a< S->nseq; a++) - { - char *name; - int p, b; - name=S->name[a]; - for (b=0; b< np; b++) - { - if ((p=name_is_in_list(name, (P[b])->name, (P[b])->nseq, MAXNAMES))!=-1) - { - index[a][0]=b; - index[a][1]=p; - } - } - } - - /*3: Read the primary Library*/ - - nlist=declare_char (read_array_size_new ((void *)list), read_array_size_new ((void *)list[0])); - for (nl=0,a=0; a< l; a++) - if ( list[a][0]=='L' || list[a][0]=='A') - nlist[nl++]=list[a]; - - CL=declare_constraint_list ( S,NULL, NULL, 0,NULL, NULL); - CL=read_n_constraint_list (nlist,nl,NULL,NULL,"sim",S->type,stdout, CL, "S"); - vfree (nlist); - - NS=fill_sequence_struc (np, cons, profile); - NCL=declare_constraint_list ( NS,NULL, NULL, 0,NULL, NULL); - entry=vcalloc ( CL->entry_len, CL->el_size); - for (a=0; ane; a++) - { - int s1, s2, r1, r2, ps1, ps2, pps1, pps2; - - entry=extract_entry (entry, a, CL); - s1=entry[SEQ1]; s2=entry[SEQ2]; - r1=entry[R1]; r2=entry[R2]; - ps1=index[s1][0];ps2=index[s2][0]; - pps1=index[s1][1];pps2=index[s2][1]; - - if (ps1==ps2 || ps1==-1 || ps2==-1)continue; - entry[SEQ1]=ps1; - entry[SEQ2]=ps2; - - entry[R1]=cache[ps1][pps1][r1]; - entry[R2]=cache[ps2][pps2][r2]; - - add_entry2list (entry, NCL); - - } - - compact_list (NCL, 0, NCL->ne, "default"); - nlib_name=vtmpnam(NULL); - vfclose(save_constraint_list ( NCL, 0, NCL->ne,nlib_name, NULL, "lib",NCL->S)); - - - nlist=declare_char (read_array_size_new ((void *)list), read_array_size_new ((void *)list[0])); - for (nl=0,a=0; a< l; a++) - if ( list[a][0]!='L' && list[a][0]!='A') - sprintf (nlist[nl++], "%s",list[a]); - sprintf (nlist[nl++], "L%s", nlib_name); - - free_arrayN ((void *)cache, 3); - free_arrayN ((void *)cons, 2); - free_arrayN ((void *)index, 2); - vfree (entry); - for (a=0;aS; - NS=NCL->S; - - L=CL->L; - NL=NCL->L; - - - - if (NCL->residue_index) - { - free_arrayN((void*)NCL->residue_index, 3); - NCL->residue_index=NULL; - } - - for ( a=0; a< S->nseq; a++) - { - sprintf ( NS->seq[a], "%s", S->seq[a]); - NS->len[a]=strlen (S->seq[a]); - } - - - for ( a=0; a<2; a++) - { - s=A->order[ls[a][0]][0]; - vfree (NS->seq[s]); - NS->seq[s]=sub_aln2cons_seq_mat (A, ns[a], ls[a], "blosum62mt"); - NS->len[s]=strlen (NS->seq[s]); - } - for ( max_len=0, a=0; anseq; a++)max_len=MAX(max_len, S->len[a]); - index_seq=declare_int ( NS->nseq, max_len+1); - index_res=declare_int ( NS->nseq, max_len+1); - - - for ( a=0; a< 2; a++) - { - for (b=0; b< ns[a]; b++) - { - s=A->order[ls[a][b]][0]; - seq=A->seq_al[ls[a][b]]; - l=strlen (seq); - - for ( d=0, c=0; corder[ls[a][0]][0]; - } - } - } - } - - for ( a=0; a< CL->ne; a++) - { - s1=L[a*CL->entry_len+SEQ1]; - s2=L[a*CL->entry_len+SEQ2]; - r1=L[a*CL->entry_len+R1]; - r2=L[a*CL->entry_len+R2]; - - if ( index_res[s1][r1]) - { - NL[a*CL->entry_len+SEQ1]=index_seq[s1][r1]; - NL[a*CL->entry_len+R1]=index_res[s1][r1]; - } - else - { - NL[a*CL->entry_len+SEQ1]=s1; - NL[a*CL->entry_len+R1]=r1; - } - - if ( index_res[s2][r2]) - { - NL[a*CL->entry_len+SEQ2]=index_seq[s2][r2]; - NL[a*CL->entry_len+R2]=index_res[s2][r2]; - } - else - { - NL[a*CL->entry_len+SEQ2]=s2; - NL[a*CL->entry_len+R2]=r2; - } - } - - NCL->ne=CL->ne; - NCL->residue_indexed=0; - - NCL=compact_list (NCL, 0, NCL->ne, "best"); - - NCL=index_res_constraint_list (NCL, WE); - - free_int ( index_res, -1); free_int (index_seq, -1); - - return NCL; -} -/*********************************************************************/ -/* */ -/* LIST EXTENTION */ -/* */ -/* */ -/*********************************************************************/ -Constraint_list *extend_list_pair (Constraint_list *CL,char *store_mode, int s1, int s2) - { - static Sequence *S; - Constraint_list *CLout; - /* - function documentation: start - Constraint_list *extend_list_pair (Constraint_list *CL,char *store_mode, int s1, int s2) - This function takes a pair of sequences s1, s2 and perrforms the extention - It returns the incoming list CL, with CL->L[s1][s2] now extended - See main documentation for store_mode - function documentation: end - */ - - if ( S==NULL)S=declare_sequence ((CL->S)->min_len, (CL->S)->max_len,(CL->S)->nseq); - sprintf ( S->name[0], "%s",(CL->S)->name[s1]); - sprintf ( S->name[1],"%s",(CL->S)->name[s2]); - S->nseq=2; - - CLout=extend_list (CL, store_mode, CL->extend_clean_mode, CL->extend_compact_mode,CL->do_self, S); - return CLout; - } -Constraint_list *extend_list (Constraint_list *CLin, char *store_mode,char *clean_mode, char *compact_mode,int do_self, Sequence *DO_LIST) - { - int a, b, c, d, e, f; - int wA, wC,w, rA, rC, miscA, miscC, misc; - static int **posA; - static int **posC; - int start_ba, end_ba, start_bc, end_bc, start_ac, end_ac; - int len; - int lenA=0; - int lenC=0; - int *translation; - Constraint_list *CLout=NULL; - - - /*Do not extend if the List is a Matrix*/ - if ( !CLin->L && CLin->M) - { - CLin->extend_jit=0; - return CLin; - } - - translation=vcalloc ( (CLin->S)->nseq, sizeof (int)); - for ( a=0; a<(CLin->S)->nseq; a++) - { - translation[a]=name_is_in_list ((CLin->S)->name[a],DO_LIST->name, DO_LIST->nseq, 100); - translation[a]++;/* set translation to -1+1=0 if seq not in list*/ - } - - CLout=declare_constraint_list (CLin->S, NULL,NULL,0, strm("disk", store_mode)?tmpfile():NULL, NULL); - - for ( a=0; a<(CLin->S)->nseq-(1-do_self); a++) - { - fprintf (CLin->local_stderr, "\nSeq %3d: %5d", a+1,CLout->ne); - for ( c=a+(1-do_self); c<(CLin->S)->nseq; c++) - { - if ( translation[a] && translation[c]) - { - get_bounds (CLin,a, c, &start_ac, &end_ac); - for ( d=start_ac; dentry_len; e++) - vwrite_clist(CLout,CLout->ne, e, vread_clist(CLin,d, e)); - CLout->ne++; - } - - for ( b=0; b<(CLin->S)->nseq; b++) - { - len=strlen ( (CLin->S)->seq[b]); - - get_bounds (CLin,b, a, &start_ba, &end_ba); - posA=fill_pos_matrix (CLin,start_ba, end_ba, len, posA, &lenA,(b>a)); - - if ((c!=b && a!=b) ||(do_self==1)) - { - - get_bounds (CLin, b, c, &start_bc, &end_bc); - posC=fill_pos_matrix (CLin, start_bc, end_bc, len, posC, &lenC, (b>c)); - - for (d=1; d<=len; d++) - { - if ( posA[d][1]==0 || posC[d][1]==0); - else - { - for (e=2; e<=posA[d][1]+1; e+=(CLin->entry_len-4)) - for ( f=2; f<=posC[d][1]+1; f+=(CLin->entry_len-4)) - { - wA =posA[d][e+1]; - miscA=posA[d][e+2]; - - wC =posC[d][f+1]; - miscC=posC[d][f+2]; - - rA=posA[d][e]; - rC=posC[d][f]; - - w =MIN(wA,wC); - - misc=MAX(miscA, miscC); - - vwrite_clist( CLout, CLout->ne, SEQ1, a); - vwrite_clist( CLout, CLout->ne, SEQ2, c); - vwrite_clist( CLout, CLout->ne, R1 ,rA); - vwrite_clist( CLout, CLout->ne, R2 ,rC); - vwrite_clist( CLout, CLout->ne, WE , w); - vwrite_clist( CLout, CLout->ne, CONS, 1); - vwrite_clist( CLout, CLout->ne, MISC,misc); - CLout->ne++; - } - } - } - } - } - - CLout=compact_list (CLout,0,CLout->ne,"mirror_sum"); - CLout=clean ( clean_mode,CLout, 0, CLout->ne); - } - } - } - - - vfree (translation); - return CLout; - } -void get_bounds (Constraint_list *CL, int s1, int s2, int *start, int *end) - { - - CL=index_constraint_list (CL); - - if ( s1>s2)SWAP(s1, s2); - - start[0]=CL->start_index[s1][s2]; - end [0]=CL->end_index [s1][s2]; - } - - -int ** fill_pos_matrix (Constraint_list *CL, int beg, int end, int slen, int **pos, int *len, int mirrored) - { - int small_chunck; - int a, r1,r2; - - - small_chunck=2*CL->entry_len; - - if ( pos==NULL) - { - pos=declare_int (slen+1, small_chunck); - for ( a=0; a<=slen; a++)pos[a][0]=small_chunck; - len[0]=slen+1; - } - else if ( len[0]<=slen) - { - free_int ( pos, len[0]); - pos=declare_int (slen+1, small_chunck); - for ( a=0; a<=slen; a++)pos[a][0]=small_chunck; - len[0]=slen+1; - } - else - { - for ( a=0; a<=slen; a++)pos[a][1]=0; - } - - - - - for ( a=beg; aentry_len))>pos[r1][0]) - { - pos[r1]=vrealloc (pos[r1], (pos[r1][0]+small_chunck)*sizeof (int)); - pos[r1][0]+=small_chunck; - } - pos[r1][pos[r1][1]+2]=r2; - pos[r1][pos[r1][1]+3]=vread_clist(CL,a,WE); - pos[r1][pos[r1][1]+4]=vread_clist(CL,a,MISC); - pos[r1][1]+=(CL->entry_len-4); - } - return pos; - } -Constraint_list * evaluate_constraint_list_reference ( Constraint_list *CL) - { - static CLIST_TYPE *entry; - int a, b, c, s1, s2, r1, r2, w; - int ***max_res; - - - - if ( CL->M) - { - CL->max_value=CL->max_ext_value=20; - - } - else - { - - CL->max_value=CL->max_ext_value=0; - max_res=vcalloc ( (CL->S)->nseq, sizeof (int**)); - - for ( a=0; a< (CL->S)->nseq; a++) - { - max_res[a]=vcalloc ( strlen ((CL->S)->seq[a])+1, sizeof (int*)); - for ( b=0; b<=(CL->S)->len[a]; b++) - { - max_res[a][b]=vcalloc ( (CL->S)->nseq+1, sizeof (int)); - } - } - - for ( a=0; a< CL->ne; a++) - { - entry=extract_entry ( entry, a, CL); - s1=entry[SEQ1]; - s2=entry[SEQ2]; - r1=entry[R1]; - r2=entry[R2]; - w= entry[WE]; - if ( w==UNDEFINED || ( (CL->moca) && (CL->moca)->forbiden_residues && ((CL->moca)->forbiden_residues[s1][r1]==UNDEFINED || (CL->moca)->forbiden_residues[s2][r2]==UNDEFINED))); - else - { - - max_res[s1][r1][s2]+=w; - max_res[s2][r2][s1]+=w; - CL->max_value=MAX(w, CL->max_value); - } - } - - for ( a=0; a< (CL->S)->nseq; a++) - for ( b=1; b<=(CL->S)->len[a]; b++) - { - for ( c=0; c< (CL->S)->nseq; c++) - { - max_res[a][b][(CL->S)->nseq]+= max_res[a][b][c]; - } - CL->max_ext_value=MAX(max_res[a][b][c],CL->max_ext_value); - } - - for ( a=0; a< (CL->S)->nseq; a++) - { - for ( b=0; b<=(CL->S)->len[a]; b++) - vfree ( max_res[a][b]); - vfree (max_res[a]); - } - CL->max_ext_value=MAX(1,CL->max_ext_value); - vfree ( max_res); - } - - if (CL->normalise) - { - - CL->nomatch=(CL->nomatch*CL->normalise)/CL->max_ext_value; - } - - return CL; - } - -/*********************************************************************/ -/* */ -/* ENTRY MANIPULATION */ -/* */ -/* */ -/*********************************************************************/ -Constraint_list * add_list_entry2list (Constraint_list *CL, int n_para, ...) - { - int a; - int *entry; - int field, val; - va_list ap; - - if (n_para>LIST_N_FIELDS) - { - crash ("Too Many Fields in List [FATAL/add_list_entry2list]"); - } - - va_start (ap,n_para); - entry=vcalloc (CL->entry_len, sizeof (int)); - - for ( a=0; ane++, CL); - } -Constraint_list* insert_entry2list(CLIST_TYPE * entry, int pos, Constraint_list *CL) - { - int a; - for ( a=0; a< CL->entry_len; a++) - vwrite_clist ( CL,pos, a,entry[a]); - return CL; - } -CLIST_TYPE* extract_entry(CLIST_TYPE * entry, int pos, Constraint_list *CL) - { - int a; - - if ( entry==NULL)entry=vcalloc ( CL->entry_len, CL->el_size); - - for (a=0; a< CL->entry_len; a++)entry[a]=vread_clist(CL, pos, a); - return entry; - } - - -/*********************************************************************/ -/* */ -/* SEARCH IN LIST (ARRAY AND FILE) */ -/* */ -/* */ -/*********************************************************************/ -FILE * compare_list (FILE *OUT, Constraint_list *CL1,Constraint_list *CL2) - { - int a; - float nw_score=0; - float w_score=0; - int *l; - - CLIST_TYPE *entry=NULL; - int p; - - entry_len=CL1->entry_len; - qsort ( (void *)CL1->L, CL1->ne, CL1->entry_len*sizeof (int), compare_constraint_list_entry); - qsort ( (void *)CL2->L, CL2->ne, CL2->entry_len*sizeof (int), compare_constraint_list_entry); - - entry=vcalloc ( CL1->entry_len, CL1->el_size); - for ( a=0; ane; a++) - { - entry=extract_entry (entry,a,CL1); - if ((l=main_search_in_list_constraint (entry,&p,4,CL2))!=NULL) - { - vwrite_clist ( CL2, p,MISC, 1); - vwrite_clist ( CL1, a,MISC, 1); - nw_score++; - w_score+=l[WE]; - } - } - fprintf ( OUT, "%-15s:%d pairs (Evaluated matrix), %d pairs in the other (%s)\n", CL2->list_name, CL2->ne, CL1->ne, CL1->list_name); - fprintf ( OUT, "%-15s:%d pairs\n", CL1->list_name, CL1->ne); - fprintf ( OUT, "Acurracy=%.2f%%\n", (nw_score/CL1->ne)*MAXID); - fprintf ( OUT, "Sensitiv=%.2f%%\n\n", (nw_score/CL2->ne)*MAXID); - return OUT; - } - - -CLIST_TYPE *main_search_in_list_constraint ( int *key,int *p,int k_len,Constraint_list *CL) - { - - - CLIST_TYPE *l=NULL; - int start, end; - - CL=index_constraint_list (CL); - - start=CL->start_index[key[SEQ1]][key[SEQ2]]; - end =CL->end_index [key[SEQ1]][key[SEQ2]]; - - - - entry_len=CL->entry_len; - l=bsearch (key, (CL->L)+(start*CL->entry_len), (end-start), sizeof (int)*CL->entry_len, compare_constraint_list_entry4bsearch); - p[0]=CL->L-l; - return l; - } -CLIST_TYPE return_max_constraint_list ( Constraint_list *CL, int field) - { - CLIST_TYPE max=0; - int a; - for ( a=0; a< CL->ne; a++)max=MAX( vread_clist(CL,a,field), max); - return max; - } - - /*********************************************************************/ -/* */ -/* */ -/* LIST SORTING */ -/* */ -/* */ -/* */ -/*********************************************************************/ -Constraint_list *sort_constraint_list_inv (Constraint_list *CL, int start, int len) - { - CL=sort_constraint_list (CL, start,len); - - - CL=invert_constraint_list (CL, start,len); - if ( start+len==CL->ne) - { - while (vread_clist(CL,CL->ne-1, SEQ1)==-1)CL->ne--; - } - - - return CL; - } - -Constraint_list *invert_constraint_list (Constraint_list *CL, int start,int len) - { - int a, b, c; - CLIST_TYPE tp; - - - for ( a=start, b=start+len-1; a<=b; a++, b--) - { - for (c=0; c< CL->entry_len; c++) - { - tp=vread_clist(CL, a, c); - vwrite_clist(CL,a, c, vread_clist(CL, b, c)); - vwrite_clist(CL,b, c, tp); - } - } - return CL; - } - -Constraint_list * sort_constraint_list(Constraint_list *CL, int start, int len) - { - - CL=sort_constraint_list_on_n_fields (CL, start, len, 0, CL->entry_len); - - return CL; - } - -Constraint_list * sort_constraint_list_on_n_fields (Constraint_list *CL, int start, int len, int first_field, int n_fields) - { - entry_len=CL->entry_len; - - if (CL->fp) - { - rewind( CL->fp); - fseek ( CL->fp, start*CL->el_size*CL->entry_len , SEEK_SET); - hsort_list_file ( CL->fp, len, CL->el_size, CL->entry_len,first_field,n_fields); - } - else if ( CL->L) - { - qsort ( (void *)CL->L, CL->ne, CL->entry_len*sizeof (int), compare_constraint_list_entry); - //hsort_list_array ((void**)(CL->L)+start, len, CL->el_size, CL->entry_len,first_field,n_fields); - } - return CL; - } -int compare_constraint_list_entry4bsearch ( const void*vx, const void*vy) -{ - int a; - const int *x=vx, *y=vy; - for (a=SEQ1; a<=R2; a++) - { - if (x[a]y[a]) return 1; - } - return 0; -} -int compare_constraint_list_entry ( const void*vx, const void*vy) -{ - int a; - const int *x=vx, *y=vy; - - for (a=SEQ1; a<=WE; a++) - { - if (x[a]y[a]) return 1; - } - return 0; -} -/*********************************************************************/ -/* */ -/* LIST PARSING */ -/* */ -/* */ -/*********************************************************************/ -Constraint_list* fork_read_n_constraint_list(char **fname,int n_list, char *in_mode,char *mem_mode,char *weight_mode, char *type,FILE *local_stderr, Constraint_list *CL, char *seq_source); -Constraint_list* nfork_read_n_constraint_list(char **fname,int n_list, char *in_mode,char *mem_mode,char *weight_mode, char *type,FILE *local_stderr, Constraint_list *CL, char *seq_source); - -Constraint_list* read_n_constraint_list(char **fname,int n_list, char *in_mode,char *mem_mode,char *weight_mode, char *type,FILE *local_stderr, Constraint_list *CL, char *seq_source) -{ - - if ( get_nproc()==1 || n_list<=2)return nfork_read_n_constraint_list(fname,n_list, in_mode,mem_mode,weight_mode,type,local_stderr, CL, seq_source); - else if ( strstr (CL->multi_thread, "methods")) - return fork_read_n_constraint_list(fname,n_list, in_mode,mem_mode,weight_mode,type,local_stderr, CL, seq_source); - else - return nfork_read_n_constraint_list(fname,n_list, in_mode,mem_mode,weight_mode,type,local_stderr, CL, seq_source); -} -Constraint_list* fork_read_n_constraint_list(char **fname,int n_list, char *in_mode,char *mem_mode,char *weight_mode, char *type,FILE *local_stderr, Constraint_list *CL, char *seq_source) - { - int a, b; - Sequence *S; - char **tmp_list; - int*proclist; - int nproc, ns; - - nproc=get_nproc(); - - proclist=vcalloc (65536, sizeof (int)); - tmp_list=vcalloc (n_list+1, sizeof (char*)); - for (a=0; aS) && (S=read_seq_in_n_list (fname, n_list,type,seq_source))==NULL) - { - fprintf ( stderr, "\nNO SEQUENCE WAS SPECIFIED[FATAL]\n"); - myexit(EXIT_FAILURE); - } - else if (CL->S==NULL) - { - CL->S=S; - } - - /*CHECK IF THERE IS A MATRIX AND GET RID OF OTHER METHODS*/ - for (b=0, a=0; a< n_list; a++)if (is_matrix(fname[a]) ||is_matrix(fname[a]+1) )b=a+1; - - if ( b) - { - if ( b==1); - else sprintf ( fname[0], "%s", fname[b-1]); - n_list=1; - return nfork_read_n_constraint_list(fname,n_list, in_mode,mem_mode,weight_mode, type,local_stderr, CL, seq_source); - } - - if (!CL)CL=declare_constraint_list ( S,NULL, NULL, 0,(strm(mem_mode, "disk"))?tmpfile():NULL, NULL); - - if (CL->ne) - { - constraint_list2raw_file(CL,tmp_list[n_list], "w"); - CL->ne=0; - } - - CL->local_stderr=local_stderr; - fprintf ( local_stderr, "\n\tMulti Core Mode: %d processors:\n", nproc); - for (ns=0,a=0; a< n_list; a++) - { - int pid; - ns++; - pid=vfork (); - if ( pid==0) - { - int in; - initiate_vtmpnam (NULL); - CL->local_stderr=vfopen("/dev/null", "w"); - in=CL->ne; - CL=read_constraint_list (CL, fname[a], in_mode, mem_mode,weight_mode); - if (CL->ne>in)constraint_list2raw_file (CL,tmp_list[a], "w"); - exit (EXIT_SUCCESS); - } - else - { - - set_pid (pid); - fprintf ( local_stderr, "\n\t--- Process Method/Library/Aln %s", fname[a], ns); - proclist[pid]=a; - if (ns>=nproc) - { - b=proclist[vwait(NULL)]; - fprintf (local_stderr, "\n\txxx Retrieved %s",fname[a]); - if (tmp_list[b] && check_file_exists (tmp_list[b])) - { - CL=read_constraint_list_raw_file (CL,tmp_list[b]); - compact_list (CL, 0, CL->ne, (strm (weight_mode, "cons")?"cons":"default")); - } - ns--; - } - } - } - - while (ns) - { - int pid2; - pid2=vwait(NULL); - a=proclist[pid2]; - fprintf (local_stderr, "\n\txxx Retrieved %s",fname[a]); - if (tmp_list[a] && check_file_exists (tmp_list[a])) - { - CL=read_constraint_list_raw_file (CL,tmp_list[a]); - compact_list (CL, 0, CL->ne, (strm (weight_mode, "cons")?"cons":"default")); - } - ns--; - } - fprintf ( local_stderr, "\n\n\tAll Methods Retrieved\n"); - - if (tmp_list[n_list] && check_file_exists (tmp_list[n_list])) - { - CL=read_constraint_list_raw_file (CL,tmp_list[n_list]); - compact_list (CL, 0, CL->ne, (strm (weight_mode, "cons")?"cons":"default")); - } - - CL->local_stderr=local_stderr; - CL=evaluate_constraint_list_reference (CL); - vfree (proclist); - vfree (tmp_list); - return CL; - } -Constraint_list* nfork_read_n_constraint_list(char **fname,int n_list, char *in_mode,char *mem_mode,char *weight_mode, char *type,FILE *local_stderr, Constraint_list *CL, char *seq_source) - { - int a, b; - Sequence *S; - - - if (!(CL->S) && (S=read_seq_in_n_list (fname, n_list,type,seq_source))==NULL) - { - fprintf ( stderr, "\nNO SEQUENCE WAS SPECIFIED[FATAL]\n"); - myexit(EXIT_FAILURE); - } - else if (CL->S==NULL) - { - CL->S=S; - } - - /*CHECK IF THERE IS A MATRIX AND GET RID OF OTHER METHODS*/ - for (b=0, a=0; a< n_list; a++)if (is_matrix(fname[a]) ||is_matrix(fname[a]+1) )b=a+1; - - if ( b) - { - if ( b==1); - else sprintf ( fname[0], "%s", fname[b-1]); - n_list=1; - - } - - if (!CL)CL=declare_constraint_list ( S,NULL, NULL, 0,(strm(mem_mode, "disk"))?tmpfile():NULL, NULL); - CL->local_stderr=local_stderr; - fprintf ( CL->local_stderr,"\nREAD/MAKE LIBRARIES:[%d]\n",n_list ); - - CL=read_constraint_list (CL, fname[0], in_mode, mem_mode,weight_mode); - compact_list (CL, 0, CL->ne, "default"); - for ( a=1; a< n_list; a++) - { - CL=read_constraint_list (CL, fname[a], in_mode, mem_mode,weight_mode); - if (strm (weight_mode, "cons"))compact_list (CL, 0, CL->ne, "cons"); - else - compact_list (CL, 0, CL->ne, "default"); - } - CL->local_stderr=local_stderr; - - CL=evaluate_constraint_list_reference (CL); - - return CL; - } -Constraint_list* read_constraint_list(Constraint_list *CL,char *in_fname,char *in_mode, char *mem_mode,char *weight_mode) - { - Sequence *SL=NULL, *TS=NULL; - int a; - Constraint_list *SUBCL=NULL; - static char *read_mode; - char *fname; - - fname=in_fname; - if ( !read_mode)read_mode=vcalloc ( STRING, sizeof (char)); - - if ( is_lib_list (in_fname))sprintf ( read_mode, "lib_list"); - else if ( in_mode)sprintf (read_mode, "%s", in_mode); - else if ( fname[0]=='A'){sprintf ( read_mode, "aln");fname++;} - else if ( fname[0]=='L'){sprintf ( read_mode, "lib");fname++;} - else if ( fname[0]=='M'){sprintf ( read_mode, "method");fname++;} - else if ( fname[0]=='S'){sprintf ( read_mode, "sequence");return CL;} - else if ( fname[0]=='P'){sprintf ( read_mode, "pdb") ;return CL;} - else if ( fname[0]=='R'){sprintf ( read_mode, "profile") ;return CL;} - else if ( fname[0]=='X'){sprintf ( read_mode, "matrix");++fname;} - else if ( fname[0]=='W'){sprintf ( read_mode, "structure");fname++;} - - else - { - fprintf ( stderr, "\nERROR: The descriptor %s could not be identified as a file or a method.[FATAL]\nIf it is a method file please indicate it with M%s\n", fname, fname); - myexit (EXIT_SUCCESS); - } - - fprintf (CL->local_stderr, "\n\t%s [%s]\n", fname, read_mode); - - - if ( strm (read_mode, "lib_list")) - { - int n, a; - char **l; - l=read_lib_list (fname, &n); - for ( a=0; aS, fname,weight_mode,mem_mode); - } - else if (strm(read_mode, "matrix")) - { - CL->L=NULL; - CL->extend_jit=0; - CL->M=read_matrice ( fname); - } - else if ( strm ( read_mode, "structure")) - { - if ( CL->ne>0) - { - fprintf ( stderr, "\nERROR: Wstructure must come before Mmethod or Aaln [FATAL:%s]",PROGRAM); - myexit (EXIT_FAILURE); - } - - if ( !(CL->STRUC_LIST)) - { - CL->STRUC_LIST=declare_sequence (1,1,10000); - (CL->STRUC_LIST)->nseq=0; - } - SL=CL->STRUC_LIST; - - if ( check_file_exists(fname)) - { - TS=main_read_seq ( fname); - for (a=0; anseq; a++)sprintf (SL->name[SL->nseq++], "%s", TS->name[a]); - free_sequence (TS, TS->nseq); - } - else - { - sprintf (SL->name[SL->nseq++], "%s", fname); - } - } - else if (strm (read_mode, "aln")) - { - SUBCL=aln_file2constraint_list ( fname,SUBCL,weight_mode); - } - else - { - SUBCL=read_constraint_list_file(SUBCL, fname); - } - - if (SUBCL) - { - CL=merge_constraint_list (SUBCL, CL, "default"); - free_constraint_list_full (SUBCL); - } - - return CL; - } - -#define is_seq_source(Symbol,Mode,SeqMode) (Symbol==Mode && (SeqMode==NULL || strm (SeqMode, "ANY") || (SeqMode[0]!='_' && strchr (SeqMode,Symbol)) || (SeqMode[0]=='_' && !strchr (SeqMode,Symbol)))) -Sequence * read_seq_in_n_list(char **fname, int n, char *type, char *SeqMode) - { - int nseq=0; - int a, b; - Alignment *A; - char **sequences=NULL; - char **seq_name=NULL; - Sequence *S=NULL; - Sequence *S1; - char mode; - - - - /*THE TYPE OF EACH FILE MUST BE INDICATED*/ - /*SeqMode indicates the type of file that can be used as sequence sources*/ - /* - ANY: any mode - SL: only sequences from Libraries and Sequences - _A: anything BUT sequences from A(lignments) - */ - - if ( n==0) - { - fprintf ( stderr, "\nERROR: NO IN FILE [FATAL:%s]\n", PROGRAM); - myexit (EXIT_FAILURE); - } - else - { - for ( a=0; a< n ; a++) - { - static char *buf; - char *lname; - if (buf)vfree (buf); - - buf=name2type_name(fname[a]);mode=buf[0];lname=buf+1; - - if (is_seq_source ('A', mode, SeqMode)) - { - - - A=main_read_aln (lname,NULL); - S1=aln2seq(A); - S1=seq2unique_name_seq (S1); - if ((S=merge_seq ( S1, S))==NULL){fprintf ( stderr, "\nERROR: Sequence Error in %s [FATAL:%s]\n",lname, PROGRAM); myexit(EXIT_FAILURE);} - free_aln (A); - free_sequence (S1, S1->nseq); - } - else if ( is_seq_source ('R', mode, SeqMode)) - { - S=add_prf2seq (lname, S); - - } - else if (is_seq_source ('P', mode, SeqMode)) - { - int i; - - S1=get_pdb_sequence (lname); - if (S1==NULL) - { - add_warning ( stderr, "\nWarning: Could not use PDB: %s", lname); - } - else - { - if ((S=merge_seq ( S1, S))==NULL){fprintf ( stderr, "\nERROR: Sequence Error in %s [FATAL:%s]\n",lname, PROGRAM); myexit(EXIT_FAILURE);} - i=name_is_in_list (S1->name[0], S->name, S->nseq, 100); - (S->T[i])->P=fill_P_template (S->name[i], lname, S); - } - free_sequence (S1, S1->nseq); - } - else if ( mode=='M'); - else if ( mode=='X'); - else if ( mode=='W'); - - else if (is_seq_source ('S', mode, SeqMode)) - { - /*1 Try with my routines (read t_coffee and MSF)*/ - if ( (A=main_read_aln ( lname, NULL))!=NULL) - { - - S1=aln2seq(A); - free_aln(A); - } - else - { - S1=main_read_seq (lname); - } - - for ( b=0; b< S1->nseq; b++)ungap(S1->seq[b]); - S1=seq2unique_name_seq (S1); - - - if ((S=merge_seq ( S1, S))==NULL){fprintf ( stderr, "\nSequence Error in %s [FATAL:%s]\n",lname,PROGRAM); myexit(EXIT_FAILURE);} - - free_sequence (S1,S1->nseq); - - } - else if (is_seq_source ('L', mode, SeqMode)) - { - - read_seq_in_list (lname,&nseq,&sequences,&seq_name); - - S1=fill_sequence_struc ( nseq, sequences, seq_name); - - for ( b=0; b< S1->nseq; b++)sprintf ( S1->file[b], "%s", lname); - nseq=0;free_char (sequences, -1); free_char ( seq_name, -1); - sequences=NULL; - seq_name=NULL; - S1=seq2unique_name_seq (S1); - - if ((S=merge_seq( S1, S))==NULL){fprintf ( stderr, "\nSequence Error in %s [FATAL:%s]\n",lname,PROGRAM); myexit(EXIT_FAILURE);} - free_sequence(S1, S1->nseq); - } - - else if ( !strchr ( "ALSMXPRWG", mode)) - { - fprintf ( stderr, "\nERROR: %s is neither a file nor a method [FATAL:%s]\n", lname, PROGRAM); - crash (""); - - } - } - - S=remove_empty_sequence (S); - - - if ( type && type[0] )sprintf ( S->type, "%s", type); - else S=get_sequence_type (S); - - if ( strm (S->type, "PROTEIN_DNA")) - { - for ( a=0; a< S->nseq; a++) - { - if (strm ( get_string_type ( S->seq[a]), "DNA") ||strm ( get_string_type ( S->seq[a]), "RNA") ); - else if ( strm ( get_string_type ( S->seq[a]), "PROTEIN")) - { - S->seq[a]=thread_aa_seq_on_dna_seq (S->seq[a]); - S->len[a]=strlen (S->seq[a]); - S->max_len=MAX(S->max_len, S->len[a]); - } - } - } - - - - return S; - } - - - - return NULL; - } - -int read_cpu_in_list ( char *fname) - { - FILE *fp; - int c; - int cpu=0; - - fp=vfopen ( fname, "r"); - while ( (c=fgetc(fp))!='#'); - while ( (c=fgetc(fp))!='C' && c!=EOF); - if ( c=='C')fscanf( fp, "PU %d\n", &cpu); - vfclose ( fp); - return cpu; - } -char * expand_constraint_list_file ( char *file) -{ - char *new_file; - FILE *IN, *OUT; - int a, b, c, n; - char **list; - static char *buf; - - if ( !grep_function ( "'+BLOCK+'", file))return file; - - new_file=vtmpnam (NULL); - IN=vfopen ( file,"r"); - OUT=vfopen (new_file, "w"); - - while ( (c=fgetc (IN))!=EOF) - { - ungetc (c, IN); - buf=vfgets (buf, IN); - if ( !strstr (buf, "+BLOCK+")) - fprintf (OUT, "%s", buf); - else - { - list=string2list (buf); - n=atoi (list[2]); - - for (a=0; a< n; a++) - { - fprintf ( OUT, "%5d %5d ",atoi(list[3])+a, atoi(list[4])+a); - for (b=5; bS; - - fprintf ( stderr, "\n"); - for (a=0; a< S->nseq-1; a++) - { - fprintf ( stderr, "[%d-%d]", a, CL->ne); - for ( b=a+1; b< S->nseq; b++) - { - l1=MIN(S->len[a], S->len[b]); - l1=MIN(10, l1); - for (c=0; cne,0,a); - vwrite_clist( CL,CL->ne,1,b); - vwrite_clist( CL,CL->ne,2,c+1); - vwrite_clist( CL,CL->ne,3,c+1); - vwrite_clist( CL,CL->ne,4,100); - CL->ne++; - } - } - } - return CL; -} -Constraint_list * read_constraint_list_file(Constraint_list *CL, char *fname) - { - int a, c,e,n,z; - int seq_len, sn; - int s1, s2; - FILE *fp; - static char *name; - char *sequence; - static char *mat; - static char *dp_mode; - int max_nseq=0; - static int *sn_list; - static int line=2; - int list_nseq; - static CLIST_TYPE *entry; - Sequence *S; - Sequence *small_S; - int seq_1_to_n=0; - Alignment *B=NULL; - char *buf; - int lline; - char *stripped_file1; - char *stripped_file; - - - - - - stripped_file1=strip_file_from_comments ("!", fname); - stripped_file=expand_constraint_list_file (stripped_file1); - small_S=read_seq_in_n_list (&fname, 1,NULL, NULL); - - if ( !CL) - { - CL=declare_constraint_list ( small_S,NULL, NULL, 0,NULL, NULL); - CL->S=small_S; - } - - small_S=read_seq_in_n_list (&fname, 1, (CL->S)->type, NULL); - - B=seq2aln (small_S, NULL, 1); - B=fix_aln_seq ( B, (CL->S)); - - if ( CL->S!=small_S)free_sequence (small_S, B->nseq); - - lline=measure_longest_line_in_file (fname)+1; - - if ( !mat) mat=vcalloc (STRING, sizeof (char)); - if ( !dp_mode) dp_mode=vcalloc (STRING, sizeof (char)); - fp=vfopen (fname, "r"); - while((c=fgetc(fp))!='#')if ( c=='\n')max_nseq++; - vfclose (fp); - - buf=vcalloc (lline, sizeof (char)); - sequence=vcalloc (lline, sizeof (char)); - if ( !name)name=vcalloc ( 100, sizeof (char)); - if ( !entry)entry=vcalloc ( CL->entry_len, CL->el_size); - if ( !sn_list)sn_list=vcalloc (max_nseq, sizeof (int)); - else - { - sn_list=vrealloc (sn_list, max_nseq*sizeof (int)); - } - S=CL->S; - - seq_1_to_n=((fp=find_token_in_file (fname, NULL, "SEQ_1_TO_N"))!=NULL); - vfclose (fp); - if ( sn_list==NULL)sn_list=vcalloc (max_nseq, sizeof (int)); - - /*Read Constraint list*/ - fp=vfopen(stripped_file,"r"); - fscanf ( fp, "%d\n", &list_nseq); - for ( a=0; aname, S->nseq, 100))==-1){continue;} - else - { - sn_list[a]=sn; - } - } - - while ((c=fgetc(fp))!=EOF) - { - ungetc(c, fp); - if ( c=='#') - { - fscanf ( fp, "#%d %d\n", &s1, &s2);line++; - /*Check If the sequence numbering is legal*/ - if ( seq_1_to_n){s1--; s2--;} - - if (s1<0 || s2 <0) - { - fprintf (stderr, "ERROR: Wrong Sequence Numbering in %s [FATAL:%s]\n",fname, PROGRAM); - myexit (EXIT_FAILURE); - } - - - - s1=sn_list[s1]; - s2=sn_list[s2]; - - while (isdigit((c=fgetc(fp)))) - { - - for ( z=0; z< CL->entry_len; z++)entry[z]=0; - ungetc(c, fp); - n=0; - entry[n++]=s1; - entry[n++]=s2; - while ( (c=fgetc(fp))!='\n') - { - - if ( isspace (c)); - else - { - ungetc(c, fp); - fscanf ( fp, "%d", &entry[n]); - n++; - } - - if ( n>CL->entry_len) - { - add_warning ( stderr, "\nWARNING:PARSING ERROR #1 (Too many Fields) IN %s AT LINE %d: C=%c n=%d\n", fname,line, c,n); - for ( e=2; e0 && n<3) - { - add_warning ( stderr, "\nWARNING:PARSING ERROR #2 IN %s (Not enough Fields) AT LINE %d: C=%c\n", fname,line-1, c); - for ( e=2; eseq_cache)?B->seq_cache[entry[SEQ1]][entry[R1]]:entry[R1]; - entry[R2]=(B->seq_cache)?B->seq_cache[entry[SEQ2]][entry[R2]]:entry[R2]; - - if ( entry[R1] && entry[R2]) - { - if ( entry[R1]<=0 || entry[R1]>(CL->S)->len[s1]) - { - fprintf ( stderr, "\nERROR: Seq1=%d (len=%d, name=%s), Seq2=%d (len=%d, name=%s), Res1 %d, Res2 %d\n", entry[SEQ1]+1,(CL->S)->len[s1],(CL->S)->name[s1], entry[SEQ2]+1,(CL->S)->len[s2],(CL->S)->name[s2],entry[R1], entry[R2]); - fprintf ( stderr, "\nERROR: Library %s, line %d, Field 1: Bad residue numbering (%d)[FATAL:%s]\n", fname, line-1,entry[R1], PROGRAM); - myexit (EXIT_FAILURE); - } - else if (entry[R2]<=0 || entry[R2]>(CL->S)->len[s2]) - { - fprintf ( stderr, "\nERROR: Seq1=%d (len=%d, name=%s), Seq2=%d (len=%d, name=%s), Res1 %d, Res2 %d\n", entry[SEQ1]+1,(CL->S)->len[s1],(CL->S)->name[s1], entry[SEQ2]+1,(CL->S)->len[s2],(CL->S)->name[s2],entry[R1], entry[R2]); - - fprintf ( stderr, "\nERROR: Seq1: %d, Seq2 %d, Res1 %d, Res2 %d\n", entry[SEQ1], entry[SEQ2], entry[R1], entry[R2]); - fprintf ( stderr, "\nERROR: Library %s, line %d, Field 2: Bad residue numbering (%d)[FATAL:%s]\n", fname, line-1, entry[R2],PROGRAM); - myexit (EXIT_FAILURE); - } - fscanf ( fp, "\n"); - if ( (entry[SEQ1]>entry[SEQ2])|| (entry[SEQ1]==entry[SEQ2] && entry[R1]>entry[R2])) - { - SWAP(entry[SEQ1],entry[SEQ2]); - SWAP(entry[R1], entry[R2]); - } - - for ( z=0; z< CL->entry_len; z++)vwrite_clist( CL,CL->ne, z, entry[z]); - - CL->ne++; - } - } - ungetc ( c, fp); - - } - else if ( c=='!' || c=='C' || c=='\n' || c=='\r'){while ((c=fgetc(fp))!='\n' && c!=EOF && c!='\r');} - else - { - fprintf ( stderr, "\n\n PARSING ERROR 3 IN %s AT LINE %d: [%c] \n[read_constraint_list_file]", fname,line,c); - while ((c=fgetc(fp))!='\n' && c!=EOF)fprintf ( stderr, "%c", c); - fprintf ( stderr, "\n"); - printf_system ( "cp %s faulty_library.tc_lib", fname); - myexit (EXIT_FAILURE); - } - if ( c==EOF)ungetc(c, fp); - } - - free_aln (B); - vfree(buf); - vfree(sequence); - vfclose (fp); - remove(stripped_file); - return CL; - } - -Constraint_list * read_constraint_list_raw_file(Constraint_list *CL, char *fname) -{ - FILE *fp; - int n=0, v; - static int *entry; - - if ( !entry)entry=vcalloc (CL->entry_len, sizeof (int)); - fp=vfopen (fname, "r"); - - while (fscanf (fp, "%d ", &v)==1) - { - if (n==CL->entry_len) - { - add_entry2list (entry, CL); - n=0; - } - entry[n++]=v; - } - if (n==CL->entry_len) - { - add_entry2list (entry, CL); - n=0; - } - vfclose (fp); - return CL; -} - -Constraint_list * fast_read_constraint_list_file(Constraint_list *CL, char *in_fname) - { - Sequence *NS; - int **index; - int *list=NULL; - int c; - FILE *fp; - CLIST_TYPE *entry=NULL; - char *buf=NULL, *buf2; - char *fname; - int i; - - - - fname=expand_constraint_list_file (in_fname); - - if (!CL) - { - return read_constraint_list_file (CL,fname); - } - - entry=vcalloc (sizeof (int), CL->entry_len); - NS=read_seq_in_n_list (&fname, 1,NULL, NULL); - index=index_seq_name(NS,CL->S); - - - /*Read Constraint list*/ - fp=vfopen(fname,"r"); - i=0; - while (i<=NS->nseq) - { - buf=vfgets ( buf, fp); - if (buf[0]!='!')i++; - } - - while ( (c=fgetc (fp))!='#' && c!=EOF); - if (c==EOF) - { - vfclose (fp); - vfree (list); - free_sequence (NS,-1); - vfree (entry); - vfree (buf); - add_warning (stderr, "Warning: incomplete library [%s]",PROGRAM); - return CL; - } - ungetc (c, fp); - - - - while ((buf2=vfgets ( buf, fp))!=NULL) - { - if (buf2[0]=='!')continue; - buf=buf2; - - list=string2num_list2 (buf, " #\n"); - - if (buf[0]=='#') - { - sscanf ( buf, "#%d %d", &entry[SEQ1], &entry[SEQ2]); - entry[SEQ1]=index[entry[SEQ1]-1][0]; - entry[SEQ2]=index[entry[SEQ2]-1][0]; - - } - else - { - sscanf (buf, "%d %d %d %d %d", &entry[R1], &entry[R2], &entry[WE], &entry[CONS], &entry[MISC]); - - - - - - - if ( (entry[SEQ1]>entry[SEQ2])|| (entry[SEQ1]==entry[SEQ2] && entry[R1]>entry[R2])) - { - SWAP(entry[SEQ1],entry[SEQ2]);SWAP(entry[R1], entry[R2]); - } - add_entry2list (entry, CL); - } - } - vfclose (fp); - - - - vfree (list); - free_sequence (NS,-1); - vfree (entry); - vfree (buf); - - return CL; - } - -int read_seq_in_list ( char *fname, int *nseq, char ***sequences, char ***seq_name) - { - int a; - int seq_len, sn; - - FILE *fp; - char name[1000]; - char *sequence; - static int max_nseq; - static int *sn_list; - int list_nseq; - int lline; - - - - fp=vfopen (fname, "r"); - fp=skip_commentary_line_in_file ('!', fp); - fscanf (fp, "%d\n", &max_nseq); - for ( lline=0,a=0; aS)->nseq; a++) - { - for ( b=a; b<(CL->S)->nseq; b++) - { - - if ( a==b && !CL->do_self)continue; - fp=save_extended_constraint_list_pair(CL, mode, (CL->S)->name[a], (CL->S)->name[b], fp); - } - } - fprintf (fp, "! SEQ_1_TO_N\n"); - return fp; -} - - -FILE * save_extended_constraint_list_pair ( Constraint_list *CL, char *mode, char* seq1, char * seq2,FILE *fp) - { - int a, b, t; - int s1, s2, score; - char *p; - - - if ((p=strstr (mode, "THR"))!=NULL)t=atoi(p+3); - else t=0; - - s1=name_is_in_list (seq1,(CL->S)->name, (CL->S)->nseq, 100); - s2=name_is_in_list (seq2,(CL->S)->name, (CL->S)->nseq, 100); - - if ( s1==-1) - { - fprintf ( stderr, "Output Error: %s is not a sequence [FATAL:%s]\n", seq1, PROGRAM); - crash (""); - } - if ( s2==-1) - { - fprintf ( stderr, "Output Error: %s is not a sequence [FATAL:%s]\n", seq2, PROGRAM); - crash (""); - } - - if ( strstr (mode, "pair"))fprintf (fp, "# 1 2\n"); - else if ( strstr (mode, "lib"))fprintf (fp, "# %d %d\n", s1+1, s2+1); - - for ( a=0; a<(CL->S)->len[s1]; a++) - { - for ( b=0; b<(CL->S)->len[s2]; b++) - { - if ( a>=b && s1==s2)continue; - if ( strstr (mode, "pc"))score=residue_pair_extended_list_pc (CL, s1,a+1, s2, b+1); - else if ( strstr (mode, "raw"))score=residue_pair_extended_list_raw (CL, s1,a+1, s2, b+1); - else - score=CL->evaluate_residue_pair (CL, s1,a+1, s2, b+1); - - if (score<=t) continue; - fprintf (fp, "%5d %5d %5d \n", a+1, b+1, score); - - } - } - return fp; - } - - -/*********************************************************************/ -/* */ -/* LIST OUTPUT */ -/* */ -/* */ -/*********************************************************************/ -#ifdef MMMMMMMM -FILE *save_extended_constraint_list ( Constraint_list *CL,Sequence *S, char *fname) -{ - int a, b, c, d; - int *tr, *ns; - int **pos0, **l_s; - int epsilon=0; - Alignment *A; - FILE *fp; - - fp=vfopen (fname, "w"); - fp=save_sub_list_header(fp, S->nseq, S->name, CL); - - tr=vcalloc (S->nseq+1, sizeof (int)); - for ( b=0,a=0; a< S->nseq; a++) - { - int i; - if ( (i=name_is_in_list(S->name[a],(CL->S)->name,(CL->S)->nseq, 100))==-1) - { - printf_exit (EXIT_FAILURE, stderr, "\nERROR: Sequence %s is not part of the sequence dataset [FATAL:%s]", S->name[a], PROGRAM); - - } - else - { - tr[a]=i; - } - } - - A=declare_aln (S); - pos0=vcalloc ( S->nseq, sizeof (int*)); - for (a=0; anseq; a++) - { - int l; - l=strlen (S->seq[a]); - A->seq_al[a]=S->seq[a]; - pos0[a]=vcalloc (l+1, sizeof (int)); - for (b=0; bnseq-1; a++) - for ( b=a+1; bnseq; b++) - { - int pos_i, pos_j, s; - l_s[0]=tr[a];l_s[1]=tr[b]; - for ( pos_i=0; pos_i< S->len[a]; pos_i++) - for (pos_j=0; pos_jlen[b]; pos_j++) - { - s=(CL->get_dp_cost) ( A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],pos_j-1, CL); - if (s>epsilon)fprintf (fp, "%d %d %d", i, j, s); - } - } - return fp; -} -#endif - -FILE * save_constraint_list ( Constraint_list *CL,int start, int len, char *fname, FILE *fp,char *mode, Sequence *S) - { - int a, b; - static int* translation; - - - - - if ( fp==NULL) - { - if ( translation!=NULL)vfree(translation); - translation=vcalloc ( (CL->S)->nseq+1, sizeof (int)); - for ( b=0,a=0; a< (CL->S)->nseq; a++) - { - if ( name_is_in_list((CL->S)->name[a],S->name,S->nseq, 100)==-1) - { - (CL->S)->len[a]=-1; - translation [a]=-1; - } - else - { - translation[a]=b++; - } - } - - } - if (strm2(mode, "lib","ascii")) - { - if ( fp==NULL)fp=vfopen ( fname, "w"); - fp=save_list_header (fp,CL); - fp=save_constraint_list_ascii(fp, CL, 0, CL->ne, translation); - } - else if (strm(mode, "binary")) - { - if ( fp==NULL)fp=vfopen ( fname, "wb"); - fp=save_constraint_list_bin (fp, CL, 0, CL->ne, translation); - } - else - { - fprintf (stderr,"\nUNKOWN MODE FOR OUTPUT: %s [FATAL]\n",mode); - crash (""); - } - return fp; - } - -FILE * save_sub_list_header ( FILE *OUT, int n, char **name, Constraint_list *CL) - { - int a,b; - int nseq=0; - - - - for ( a=0; a<(CL->S)->nseq; a++) - for ( b=0; bS)->name[a])) - nseq+=((CL->S)->len[a]!=-1); - - fprintf ( OUT, "! TC_LIB_FORMAT_01\n%d\n",nseq); - for ( a=0; aS)->nseq; b++) - if (strm (name[a] , (CL->S)->name[b])) - if ((CL->S)->len[b]!=-1) fprintf ( OUT, "%s %d %s\n", (CL->S)->name[b], (CL->S)->len[b],(CL->S)->seq[b]); - - return OUT; - } -FILE * save_list_header ( FILE *OUT,Constraint_list *CL) - { - int a; - int nseq=0; - - for ( a=0; a<(CL->S)->nseq; a++)nseq+=((CL->S)->len[a]!=-1); - - - fprintf ( OUT, "! TC_LIB_FORMAT_01\n%d\n",nseq); - for ( a=0; a<(CL->S)->nseq; a++) - if ((CL->S)->len[a]!=-1) - { - fprintf ( OUT, "%s %d %s\n", (CL->S)->name[a], (CL->S)->len[a],(CL->S)->seq[a]); - - } - return OUT; - } - -FILE *save_list_footer (FILE *OUT,Constraint_list *CL) - { - if ( CL->cpu)fprintf (OUT, "! CPU %d\n",get_time()); - fprintf (OUT, "! SEQ_1_TO_N\n"); - return OUT; - } -int constraint_list2raw_file ( Constraint_list *CL, char *fname, char *mode) -{ - FILE *fp; - if ( !CL || !CL->ne || !fname){return 0;} - - fp=vfopen (fname,mode); - fp=save_raw_constraint_list (fp, CL, 0, CL->ne, NULL); - vfclose (fp); - return CL->ne; - } -FILE * save_raw_constraint_list ( FILE *fp,Constraint_list *CL, int start,int len, int *translation) -{ - int a, b; - for ( b=0; bentry_len*CL->ne; b++) - { - fprintf ( fp, "%d ", CL->L[b]); - } - - fprintf (fp, "\n"); - return fp; -} -FILE * save_constraint_list_ascii ( FILE *OUT,Constraint_list *CL, int start,int len, int *translation) - { - int a, b, s1, s2; - CLIST_TYPE x1, x2; - - if (len==start && CL->cpu!=-1) - { - fprintf (OUT, "! CPU %d\n",get_time()); - return OUT; - } - else - { - - s1=translation[vread_clist(CL,start,SEQ1)]; - s2=translation[vread_clist(CL,start,SEQ2)]; - - - if ( s1!=-1 && s2!=-1)fprintf ( OUT, "#%d %d\n", s1+1, s2+1); - for ( a=start; a<(len+start); a++) - { - x1=translation[vread_clist(CL,a,SEQ1)]; - x2=translation[vread_clist(CL,a,SEQ2)]; - if ( x1==-1 || x2==-1); - else - { - if ( x1!=s1 || x2!=s2) - { - s1=x1; - s2=x2; - fprintf ( OUT, "#%d %d\n", s1+1, s2+1); - } - for ( b=2; bentry_len; b++) fprintf ( OUT, "%5d ", vread_clist(CL, a, b)); - fprintf (OUT, "\n"); - } - } - } - return save_list_footer (OUT, CL); - - } -FILE * save_constraint_list_bin ( FILE *OUT,Constraint_list *CL, int start,int len, int *translation) - { - int a, b; - - CLIST_TYPE x1, x2; - - - if (len==start && CL->cpu!=-1) - { - - return OUT; - } - else - { - for ( a=start; a<(len+start); a++) - { - x1=translation[vread_clist(CL,a,SEQ1)]; - x2=translation[vread_clist(CL,a,SEQ2)]; - if ( x1==-1 || x2==-1); - else - { - for ( b=2; bentry_len; b++) - { - x1=vread_clist(CL,a,b); - fwrite (&x1, CL->el_size, 1, OUT); - } - } - } - } - return OUT; - } - -/*********************************************************************/ -/* */ -/* LIST CONVERTION */ -/* */ -/* */ -/*********************************************************************/ -Constraint_list * filter_constraint_list (Constraint_list *CL, int field, int T) -{ - int a,b,c; - if (!CL || !CL->L)return CL; - for (a=0, b=0; ane; a++) - { - if (CL->L[a*CL->entry_len+field]entry_len; c++)CL->L[b*CL->entry_len+c]=CL->L[a*CL->entry_len+c]; - b++; - } - } - CL->ne=b; - CL->residue_indexed=0; - return CL; -} -int constraint_list_is_connected ( Constraint_list *CL) -{ - int a, b, c, s1, s2; - int *connexions; - int rv =1; - Sequence*S; - - if (!CL->ne) return 1; - - S=CL->S; - - connexions=vcalloc ((CL->S)->nseq+1, sizeof (int)); - for ( a=0; a< CL->ne; a++) - { - s1=CL->L[a*CL->entry_len+SEQ1]; - s2=CL->L[a*CL->entry_len+SEQ2]; - connexions[s1]++; - connexions[s2]++; - } - for (a=0; anseq; a++) - { - if (!connexions[a]) - { - add_warning ( stderr, "ERROR: Sequence %s is not connected\n", (CL->S)->name[a]); - rv=0; - } - } - return rv; -} -Constraint_list * nfork_relax_constraint_list (Constraint_list *CL); -Constraint_list * fork_relax_constraint_list (Constraint_list *CL); -Constraint_list * relax_constraint_list (Constraint_list *CL) -{ - if ( get_nproc()==1)return nfork_relax_constraint_list (CL); - else if (strstr ( CL->multi_thread, "relax"))return fork_relax_constraint_list (CL); - else return nfork_relax_constraint_list (CL); -} - -Constraint_list * fork_relax_constraint_list (Constraint_list *CL) -{ - int a, s1, s2, r1, r2,n; - int score; - int thr; - int chunk, npid, job,pid; - FILE *fp; - char **pid_tmpfile; - int * pid_list; - int in; - - in=CL->ne; - if (!CL || !CL->L)return CL; - - fprintf ( CL->local_stderr, "\nLibrary Relaxation: Multi_proc [%d] ", get_nproc()); - - if ((chunk=CL->ne/get_nproc())==0)chunk=get_nproc(); - - - pid_tmpfile=vcalloc ((CL->ne/chunk)+1, sizeof (char*)); - pid_list =vcalloc (MAX_N_PID, sizeof (int *)); - - for (npid=0,job=0; jobne; job+=chunk) - { - pid_tmpfile[npid]=vtmpnam(NULL); - pid=vfork (); - if (pid==0) - { - int s,e; - - initiate_vtmpnam (NULL); - s=job; - e=MIN((s+chunk),CL->ne); - fp=vfopen (pid_tmpfile[npid], "w"); - for (a=s; alocal_stderr,a,chunk,1, "Submit Job"); - s1=CL->L[a*CL->entry_len+SEQ1]; - s2=CL->L[a*CL->entry_len+SEQ2]; - - r1=CL->L[a*CL->entry_len+R1]; - r2=CL->L[a*CL->entry_len+R2]; - score=residue_pair_extended_list_pc (CL,s1, r1,s2, r2); - CL->L[a*CL->entry_len+WE]=score; - fprintf (fp, "%d %d ", a, score); - } - vfclose (fp); - exit (EXIT_SUCCESS); - } - else - { - pid_list[pid]=npid; - set_pid (pid); - npid++; - } - } - - for (a=0; aL[i*CL->entry_len+WE]=score;j++;} - vfclose (fp); - remove(pid_tmpfile[pid_list[pid]]); - } - - vfree (pid_list); - vfree (pid_tmpfile); - - thr=10; - - for (n=0,a=0; a< CL->ne; a++) - { - score=CL->L[a*CL->entry_len+WE]; - - if (score<=thr); - else - { - CL->L[n*CL->entry_len+SEQ1]=CL->L[a*CL->entry_len+SEQ1]; - CL->L[n*CL->entry_len+SEQ2]=CL->L[a*CL->entry_len+SEQ2]; - CL->L[n*CL->entry_len+R1]=CL->L[a*CL->entry_len+R1]; - CL->L[n*CL->entry_len+R2]=CL->L[a*CL->entry_len+R2]; - CL->L[n*CL->entry_len+WE]=score; - n++; - } - - } - - CL->L=vrealloc (CL->L, n*CL->entry_len*sizeof (int)); - CL->ne=n; - - fprintf ( CL->local_stderr, "\nTotal Relaxation: [%d]--->[%d] Entries\n",in, CL->ne); - CL->residue_indexed=0; - return CL; - -} - -Constraint_list * nfork_relax_constraint_list (Constraint_list *CL) -{ - int a, s1, s2, r1, r2; - int max, score, n; - int thr; - - if (!CL || !CL->L)return CL; - - fprintf ( CL->local_stderr, "\nLibrary Relaxation:[%d] ", CL->ne); - for (max=0,a=0; ane; a++) - { - - s1=CL->L[a*CL->entry_len+SEQ1]; - s2=CL->L[a*CL->entry_len+SEQ2]; - - r1=CL->L[a*CL->entry_len+R1]; - r2=CL->L[a*CL->entry_len+R2]; - - score=residue_pair_extended_list_pc (CL,s1, r1,s2, r2); - //HERE ("%d %d", CL->L[a*CL->entry_len+WE],score); - CL->L[a*CL->entry_len+WE]=score; - - } - - thr=10; - - for (n=0,a=0; a< CL->ne; a++) - { - score=CL->L[a*CL->entry_len+WE]; - - if (score<=thr); - else - { - CL->L[n*CL->entry_len+SEQ1]=CL->L[a*CL->entry_len+SEQ1]; - CL->L[n*CL->entry_len+SEQ2]=CL->L[a*CL->entry_len+SEQ2]; - CL->L[n*CL->entry_len+R1]=CL->L[a*CL->entry_len+R1]; - CL->L[n*CL->entry_len+R2]=CL->L[a*CL->entry_len+R2]; - CL->L[n*CL->entry_len+WE]=score; - n++; - } - - } - CL->L=vrealloc (CL->L, n*CL->entry_len*sizeof (int)); - CL->ne=n; - - fprintf ( CL->local_stderr, "--->[%d]\n", CL->ne); - CL->residue_indexed=0; - return CL; - -} - -Constraint_list * shrink_constraint_list (Constraint_list *CL) -{ - int a, b, n, tot; - Constraint_list *CL2; - Alignment *A, *B; - int *ns, **ls; - - ns=vcalloc (2, sizeof (int)); - ls=declare_int ((CL->S)->nseq, 2); - - A=seq2aln (CL->S,NULL, RM_GAP); - B=seq2aln (CL->S,NULL, RM_GAP); - CL2=declare_constraint_list (CL->S,NULL, NULL, 0,NULL, NULL); - n=(CL->S)->nseq; - tot=((n*n)-n)/2; - fprintf ( CL->local_stderr, "\n\n\tSHRINK Constraint List [%d element(s)]", CL->ne); - for (n=0,a=0; a<(CL->S)->nseq-1; a++) - for (b=a+1; b<(CL->S)->nseq; b++, n++) - { - output_completion (CL->local_stderr,n, tot, 100, "slow_pair"); - ns[0]=ns[1]=1; - ls[0][0]=a; - ls[1][0]=b; - ungap (A->seq_al[a]); - ungap (A->seq_al[b]); - linked_pair_wise (A, ns, ls, CL); - B->seq_al[0]=A->seq_al[a]; - B->seq_al[1]=A->seq_al[b]; - sprintf (B->name[0], "%s", A->name[a]); - sprintf (B->name[1], "%s", A->name[b]); - B->nseq=2; - B->len_aln=strlen (B->seq_al[0]); - CL2=aln2constraint_list (B, CL2, "sim"); - } - vfree(CL->L); - CL->L=CL2->L; - CL->ne=CL2->ne; - return CL; -} - - -Constraint_list *aln_file2constraint_list (char *alname, Constraint_list *CL,char *weight_mode) - { - Alignment *A; - A=main_read_aln ( alname, NULL); - - CL=aln2constraint_list (A, CL, weight_mode); - free_aln (A); - return CL; - } - -int *seqpair2weight (int s1, int s2, Alignment *A,Constraint_list *CL, char *weight_mode, int *weight) -{ - int *col; - int a,c, ref_weight; - - - if ( !weight)weight=vcalloc (MAX(2,A->len_aln), sizeof (int)); - - weight[0]=FORBIDEN; - if ( weight_mode==NULL || strcmp (weight_mode, "no")==0 || is_number (weight_mode)) - { - - if (is_number (weight_mode))ref_weight=atoi(weight_mode); - else ref_weight=1; - weight[1]=ref_weight; - - } - else if ( strstr ( weight_mode, "cons")) - { - ref_weight=weight[1]=1000; - } - else if ( strstr ( weight_mode, "OW")) - { - int ow; - sscanf ( weight_mode, "OW%d", &ow); - weight[1]=ow*get_seq_sim ( A->seq_al[s1], A->seq_al[s2], "-", NULL); - - } - else if ( strncmp ( weight_mode, "len",3)==0) - { - weight[1]=A->len_aln; - } - else if ( strnm ( weight_mode, "sim", 3) || strm (weight_mode, "default")) - { - - ref_weight=get_seq_sim ( A->seq_al[s1], A->seq_al[s2], "-", (strm (weight_mode, "default"))?NULL:(weight_mode+3)); - weight[1]=ref_weight; - - } - else if ( strnm ( weight_mode, "subset", 6)) - { - ref_weight=get_seq_sim ( A->seq_al[s1], A->seq_al[s2], "-",NULL); - weight[1]=ref_weight; - } - - else if ( strncmp (weight_mode, "winsim", 6)==0) - { - weight=get_seq_winsim ( A->seq_al[s1], A->seq_al[s2], "-", weight_mode+6, weight); - } - else if ( strncmp ( weight_mode, "cdna", 4)==0) - { - ref_weight=get_seq_sim ( A->seq_al[s1], A->seq_al[s2], "-", weight_mode+4); - col=vcalloc ( A->len_aln+1, sizeof (int)); - if (A->cdna_cache) - for ( a=0; a<=A->len_aln; a++)col[a]=A->cdna_cache[0][a]; - else - for ( a=0; a<=A->len_aln; a++)col[a]=1; - for ( c=0; c< A->len_aln; c++)weight[c]=ref_weight*col[c]; - vfree (col); - } - else if ( strm ( weight_mode, "pdb")) - { - if ( !(A->CL) || !(A->CL)->T) - { - fprintf ( stderr, "\nCould not find the PDB structure: [FATAL:%s]\n", PROGRAM); - crash (""); - } - } - else if ( strm (weight_mode, "overaln")) - { - ref_weight=get_seq_sim ( A->seq_al[s1], A->seq_al[s2], "-","idmat"); - //weight=pw_aln2clean_aln_weight (A->seq_al[s1], A->seq_al[s2], ref_weight,0, 0, 0, 0, NULL); - printf_exit (EXIT_FAILURE, stderr,"ERROR: mode overaln not currently supported [FATAL:%s]", PROGRAM); - } - else - { - fprintf ( stderr, "\nERROR: Weight Mode %s is unknown [FATAL:%s]", weight_mode, PROGRAM); - crash (""); - } - return weight; -} -Constraint_list *aln2constraint_list (Alignment *A, Constraint_list *CL,char *in_weight_mode) - { - Constraint_list *CLB=NULL; - int a, b, c,nres1, nres2; - int *weight=NULL; - int s1, s2; - int fixed_nres1, fixed_nres2; - int do_pdb=0; - int pdb_weight=0; - int set_misc; - char*alp=NULL; - char *p, *s; - char weight_mode [100]; - - - sprintf ( weight_mode , "%s", (!in_weight_mode || strm (in_weight_mode, "default"))?"sim":in_weight_mode); - - if ( !A) return CL; - - if ( !CL) - { - Sequence *S; - S=aln2seq (A); - CL=declare_constraint_list (S,NULL, NULL, 0,NULL, NULL); - CL->S=S; - } - CLB=(Constraint_list *)A->CL; - - - do_pdb=(strstr ( weight_mode, "pdb"))?1:0; - if ( (p=strstr (weight_mode, "_subset_"))) - { - alp=strchr (weight_mode, '_')+1; - p[0]='\0'; - } - - - for ( a=0; anseq-1; a++) - - { - for (set_misc=0,b=a+1; b< A->nseq; b++) - { - s1=name_is_in_list (A->name[a], (CL->S)->name, (CL->S)->nseq, 100); - s2=name_is_in_list (A->name[b], (CL->S)->name, (CL->S)->nseq, 100); - - if ( s1==-1 || s2==-1) - { - if ( getenv4debug ("DEBUG_LIBRARY"))fprintf ( stderr, "\n[DEBUG_LIBRARY:aln2constraint_list]Could use a pair of constraints"); - } - else if ( s1!=-1 && s2!=-1) - { - int use_pair; - - weight=seqpair2weight (a, b, A, CL, weight_mode, weight); - - for (nres1=A->order[a][1], nres2=A->order[b][1], c=0; c< A->len_aln; c++) - { - int isgop1, isgop2; - - isgop1=is_gop(c, A->seq_al[a]); - isgop2=is_gop(c, A->seq_al[b]); - nres1+=!is_gap(A->seq_al[a][c]); - nres2+=!is_gap(A->seq_al[b][c]); - - if ( strm ( weight_mode, "pdb") && CLB) - { - - pdb_weight=MAX(0,(CLB->evaluate_residue_pair)(CLB,0, nres1,1,nres2)); - } - - use_pair=1; - use_pair=use_pair && !is_gap(A->seq_al[a][c]); - use_pair=use_pair && !is_gap(A->seq_al[b][c]); - use_pair=use_pair && A->seq_al[b][c]!=UNDEFINED_RESIDUE; - use_pair=use_pair && A->seq_al[a][c]!=UNDEFINED_RESIDUE; - use_pair=use_pair && !(do_pdb && pdb_weight==0); - use_pair=use_pair && ((weight[0]==FORBIDEN)?weight[1]:weight[c]); - - if (alp)use_pair=use_pair && is_in_set (A->seq_al[b][c], alp) && is_in_set (A->seq_al[a][c], alp); - - /*if ( !is_gap(A->seq_al[a][c]) && !is_gap(A->seq_al[b][c]) && A->seq_al[b][c]!=UNDEFINED_RESIDUE && A->seq_al[a][c]!=UNDEFINED_RESIDUE && !(do_pdb && pdb_weight==0))*/ - if (use_pair) - { - - fixed_nres1=(!A->seq_cache)?nres1:A->seq_cache[s1][nres1]; - fixed_nres2=(!A->seq_cache)?nres2:A->seq_cache[s2][nres2]; - - - if ( fixed_nres1==-1 || fixed_nres2==-1) - { - fprintf ( stderr, "\nPB: Sequence %s, Residue %d : Cache=%d",A->name[a], nres1,fixed_nres1 ); - fprintf ( stderr, "\nPB: Sequence %s, Residue %d : Cache=%d",A->name[b], nres2,fixed_nres2 ); - - myexit(EXIT_FAILURE); - } - - if ( fixed_nres1 && fixed_nres2) - { - - - - /* - This code was uncommented to make profile2seq simpler - Must check how this affects other functions - - vwrite_clist (CL,CL->ne, SEQ1, (s1ne, SEQ2, (s1ne, R1, (s1ne, R2, (s1ne, SEQ1, s1); - vwrite_clist (CL,CL->ne, SEQ2, s2); - vwrite_clist (CL,CL->ne, R1,fixed_nres1); - vwrite_clist (CL,CL->ne, R2,fixed_nres2); - - if (do_pdb) - { - - vwrite_clist (CL,CL->ne, WE,(NORM_F/MAXID)*pdb_weight ); - } - else - { - - vwrite_clist (CL,CL->ne, WE,(NORM_F/MAXID)*((weight[0]==FORBIDEN)?weight[1]:weight[c]) ); - - } - vwrite_clist (CL,CL->ne, CONS,1); - if (!set_misc) - { - vwrite_clist (CL,CL->ne, MISC,A->len_aln); - set_misc=1; - } - else - { - vwrite_clist (CL,CL->ne, MISC,0); - } - CL->ne++; - /* - if (isgop1) - { - - vwrite_clist (CL,CL->ne, SEQ1, s1); - vwrite_clist (CL,CL->ne, SEQ2, s1); - vwrite_clist (CL,CL->ne, R1,fixed_nres1); - vwrite_clist (CL,CL->ne, R2,fixed_nres1); - vwrite_clist (CL,CL->ne, WE, (weight[0]==FORBIDEN)?weight[1]:weight[c] ); - vwrite_clist (CL,CL->ne, CONS,1); - CL->ne++; - } - if (isgop2) - { - - vwrite_clist (CL,CL->ne, SEQ1, s2); - vwrite_clist (CL,CL->ne, SEQ2, s2); - vwrite_clist (CL,CL->ne, R1,fixed_nres2); - vwrite_clist (CL,CL->ne, R2,fixed_nres2); - vwrite_clist (CL,CL->ne, WE, (weight[0]==FORBIDEN)?weight[1]:weight[c] ); - vwrite_clist (CL,CL->ne, CONS,1); - CL->ne++; - } - */ - } - - } - } - } - } - } - - vfree (weight); - if (A->A) - { - return aln2constraint_list (A->A, CL, weight_mode); - } - else - return CL; - } -double **list2mat (Constraint_list *CLin,int s1,int s2, double *min, double *max) - { - double ** mat; - int a, r1, r2; - int min_def=0; - Constraint_list *CL; - static Sequence *S; - - - int row, column; - if ( S==NULL)S=declare_sequence ((CLin->S)->min_len, (CLin->S)->max_len,(CLin->S)->nseq); - sprintf ( S->name[0], "%s",(CLin->S)->name[s1]); - sprintf ( S->name[1],"%s",(CLin->S)->name[s2]); - S->nseq=2; - - row =(CLin->S)->len[s1]; - column=(CLin->S)->len[s2]; - - if ( CLin->extend_jit) - CL=extend_list(CLin,"mem",CLin->extend_clean_mode, CLin->extend_compact_mode, CLin->do_self, S); - else - CL=CLin; - - - min[0]=max[0]; - mat=declare_double ( row, column); - - for ( a=0; ane; a++) - { - r1=vread_clist(CL,a,R1)-1; - r2=vread_clist(CL,a,R2)-1; - if ( vread_clist(CL,a,SEQ1)==s1 &&vread_clist(CL,a,SEQ2)==s2) - { - mat[r1][r2]=(double)vread_clist(CL,a,WE); - if (min_def==0) - { - min_def=1; - min[0]=mat[r1][r2]; - max[0]=mat[r1][r2]; - } - else - { - min[0]=(min[0]mat[r1][r2])?max[0]:mat[r1][r2]; - } - } - else if (vread_clist(CL,a,SEQ2)==s1 &&vread_clist(CL,a,SEQ1)==s2) - { - mat[r2][r1]=(double)vread_clist(CL,a,WE); - if (min_def==0) - { - min_def=1; - min[0]=mat[r2][r1]; - max[0]=mat[r2][r1]; - } - else - { - min[0]=(min[0]mat[r2][r1])?max[0]:mat[r2][r1]; - } - } - } - return mat; - } - -Constraint_list * constraint_list2bin_file(Constraint_list *clist) - { - int a,b; - - clist->fp=tmpfile(); - for ( a=0; a< clist->ne; a++) - for ( b=0; bentry_len; b++) - { - fwrite (&clist->L[a*clist->entry_len+b],clist->el_size, 1,clist->fp); - } - return clist; - } - -FILE * bin_file2constraint_list ( Constraint_list *CL, FILE *fp, char *name) - { - int a, b, s1, s2; - CLIST_TYPE *entry; - - if ( fp==NULL)fp=vfopen ( name, "w"); - entry=vcalloc ( CL->entry_len, CL->el_size); - fprintf ( fp, "%d\n", (CL->S)->nseq); - for ( a=0; a< (CL->S)->nseq; a++)fprintf (fp, "%s %d %s\n", (CL->S)->name[a], (CL->S)->len[a], (CL->S)->seq[a]); - - - rewind ( CL->fp); - fread(entry, CL->el_size, CL->entry_len, CL->fp); - s1=entry[SEQ1]; - s2=entry[SEQ2]; - fprintf (fp, "#%d %d\n", s1, s2); - for ( b=2; b< CL->entry_len; b++)fprintf (fp, "%5d ",entry[b]); - fprintf (fp, "\n"); - for ( a=1; a< (CL->ne); a++) - { - fread(entry, CL->el_size, CL->entry_len, CL->fp); - if ( entry[SEQ1]!=s1 || entry[SEQ2]!=s2) - { - s1=entry[SEQ1]; - s2=entry[SEQ2]; - fprintf (fp, "#%d %d\n", s1, s2); - } - for ( b=2; b< CL->entry_len; b++)fprintf (fp, "%5d ",entry[b]); - fprintf (fp, "\n"); - } - fprintf (fp, "! CPU %d\n",get_time()); - - return fp; - } -int **list2residue_total_weight ( Constraint_list *CL) - { - /*Returns - tot_weight[nseq][maxlen] - where each residue is associated with the total of its weights in CL - ####IMPORTANT - - -the numbering of the residues goes from 1 to L: - -the numbering of the sequences goes from 0 to N-1: - */ - - int **tot_weight; - int s1, s2, r1, r2, w, a; - - - tot_weight=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); - for ( a=0; ane; a++) - { - r1=vread_clist(CL,a,R1)-1; - r2=vread_clist(CL,a,R2)-1; - s1=vread_clist(CL,a,SEQ1); - s2=vread_clist(CL,a,SEQ2); - w=vread_clist(CL,a,WE); - - tot_weight[s1][r1]+=w; - tot_weight[s2][r2]+=w; - } - return tot_weight; - } - -int **list2residue_total_extended_weight ( Constraint_list *CL) - { - /*Returns - tot_extended_weight[nseq][maxlen] - where each residue is associated with the total of its weights in CL - ####IMPORTANT - - -the numbering of the residues goes from 1 to L: - -the numbering of the sequences goes from 0 to N-1: - */ - - static int **tot_extended_weight; - int s1, s2, r1, r2, w; - - if (CL->residue_indexed && tot_extended_weight); - else - { - if (tot_extended_weight) free_int (tot_extended_weight, -1); - if (CL->residue_indexed==0)index_res_constraint_list (CL,WE); - - - tot_extended_weight=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); - - for ( s1=0; s1< (CL->S)->nseq-1; s1++) - for ( s2=s1+1; s2< (CL->S)->nseq; s2++) - for (r1=1; r1<=(CL->S)->len[s1]; r1++) - for (r2=1; r2<=(CL->S)->len[s2]; r2++) - { - w=(CL->evaluate_residue_pair)( CL, s1, r1, s2, r2); - tot_extended_weight[s1][r1]+=w; - tot_extended_weight[s2][r2]+=w; - } - } - return tot_extended_weight; - } -int **list2residue_partial_extended_weight ( Constraint_list *CL) - { - /*Returns - tot_extended_weight[nseq][maxlen] - where each residue is associated with the total of its weights in CL - ####IMPORTANT - - -the numbering of the residues goes from 1 to L: - -the numbering of the sequences goes from 0 to N-1: - */ - - int **tot_extended_weight; - int s1, s2, r1, r2, w1, w2, a; - - - tot_extended_weight=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); - for ( a=0; ane; a++) - { - r1=vread_clist(CL,a,R1); - r2=vread_clist(CL,a,R2); - s1=vread_clist(CL,a,SEQ1); - s2=vread_clist(CL,a,SEQ2); - w1=(CL->evaluate_residue_pair)( CL, s1, r1, s2, r2); - w2=(CL->evaluate_residue_pair)( CL, s2, r2, s1, r1); - if ( w1!=w2)fprintf ( stderr, "*"); - - tot_extended_weight[s1][r1]+=w1; - tot_extended_weight[s2][r2]+=w2; - } - return tot_extended_weight; - } - - -/*******************************************************************************************/ -/* */ -/* */ -/* clean functions */ -/* */ -/* */ -/* */ -/*******************************************************************************************/ -Constraint_list *clean ( char *clean_mode,Constraint_list *CL,int start, int len) - { - - if ( strm ( clean_mode, "shadow")) CL=clean_shadow (CL,start,len); - else if ( strm5( clean_mode, "","NO","no","No","default")); - else add_warning ( CL->local_stderr, "\nWARNING: The %s CLEANING MODE DOES NOT EXIST\n", clean_mode); - - return CL; - } - - -Constraint_list * clean_shadow ( Constraint_list *CL, int start, int len) - { - int s1, s2, r1, a, b, end; - int max, min; - - s1=vread_clist (CL, start, SEQ1); - s2=vread_clist (CL, start, SEQ2); - r1=vread_clist (CL, start, R1); - - - for ( a=start; a<(start+len);) - { - - max=min=vread_clist (CL, a, WE); - while ( ane && vread_clist (CL, a, SEQ1)==s1 && vread_clist (CL, a, SEQ2)==s2 && vread_clist (CL, a, R1)==r1) - { - max=(vread_clist (CL, a, WE)>max)?vread_clist (CL, a, WE):max; - min=(vread_clist (CL, a, WE)1) - { - for ( b=start; bne) - { - s1=vread_clist (CL, start, SEQ1); - s2=vread_clist (CL, start, SEQ2); - r1=vread_clist (CL, start, R1); - } - } - CL=sort_constraint_list_inv (CL, start, (CL->ne-start)); - CL=sort_constraint_list (CL,start,(CL->ne-start) ); - - return CL; - } -/*********************************************************************/ -/* */ -/* LIST FUNCTIONS */ -/* */ -/* */ -/*********************************************************************/ - -static Constraint_list *fast_merge_constraint_list ( Constraint_list *SL, Constraint_list *ML, char *mode); -static Constraint_list *slow_merge_constraint_list ( Constraint_list *SL, Constraint_list *ML, char *mode); - -Constraint_list *merge_constraint_list ( Constraint_list *SL, Constraint_list *ML, char *mode) -{ - - - if ( !ML) - { - return SL; - } - else if ( SL->S == ML->S) - { - return fast_merge_constraint_list (SL, ML, mode); - } - else - { - return slow_merge_constraint_list (SL, ML, mode); - } -} - -Constraint_list *fast_merge_constraint_list ( Constraint_list *SL, Constraint_list *ML, char *mode) -{ - int l; - - l=ML->ne+SL->ne; - - ML->max_L_len=l; - ML->seq_indexed=0; - ML->residue_indexed=0; - if (ML->ne==0 || !ML->L)ML->L=vcalloc (l*ML->entry_len, sizeof (int)); - else ML->L=vrealloc (ML->L, sizeof (int)*l*ML->entry_len); - - memcpy (ML->L+(ML->ne*ML->entry_len), SL->L, SL->ne*sizeof (int)*SL->entry_len); - - ML->ne+=SL->ne; - return ML; - -} - -Constraint_list *slow_merge_constraint_list ( Constraint_list *SL, Constraint_list *ML, char *mode) -{ - int a, s1, s2; - Sequence *S1, *S2; - int **name_index; - int **seq_index; - CLIST_TYPE *entry=NULL; - - if ( !ML)return SL; - - S1=SL->S;S2=ML->S; - - name_index=index_seq_name(S1,S2); - seq_index=index_seq_res (S1,S2, name_index); - - for ( a=0; a< SL->ne; a++) - { - - entry=extract_entry ( entry, a, SL); - //HERE ("BEF: %d %d %d %d", entry[SEQ1], entry[SEQ2], entry[R1], entry[R2]); - s1=entry[SEQ1]; s2=entry[SEQ2]; - - if ( S1==S2) - { - add_entry2list(entry, ML); - } - else if (name_index[s1][0]==-1 || name_index[s2][0]==-1 || !seq_index[s1] || !seq_index[s2]); - else - { - int r1, r2; - r1=seq_index[s1][entry[R1]-1]; - r2=seq_index[s2][entry[R2]-1]; - - entry[SEQ1]=name_index[s1][0]; - entry[SEQ2]=name_index[s2][0]; - if ( r1!=-1 && r2!=-1 && entry[SEQ1]!=entry[SEQ2]) - { - entry[R1]=r1+1; entry[R2]=r2+1; - add_entry2list(entry, ML); - } - //HERE ("AFT: %d %d %d %d", entry[SEQ1], entry[SEQ2], entry[R1], entry[R2]); - } - } - free_int (name_index, -1); - free_int ( seq_index, -1); - vfree ( entry); - return ML; -} - -Constraint_list *modify_weight( Constraint_list *CL,int start, int end, char *modify_mode) - { - int a; - CLIST_TYPE x; - - if ( strm(modify_mode, "default"))return CL; - for ( a=start; ane, compact_mode, start, len); - - if ( len==0 || strm3(compact_mode, "no", "No", "NO"))return CL; - else if ( strm2(compact_mode,"mirror","mirror_sum")); - else if ( strm4(compact_mode, "default","shrink","shrink_best","shrink_worst")) - { - - for ( a=start; a<(start+len) ; a++) - { - - if ( vread_clist(CL, a, SEQ1)> vread_clist(CL, a, SEQ2) ||\ - ( vread_clist(CL, a, SEQ1)==vread_clist(CL, a, SEQ2) &&\ - vread_clist(CL, a, R1) > vread_clist(CL, a, R2) )) - - - { - s1=vread_clist(CL, a, SEQ1); - s2=vread_clist(CL, a, SEQ2); - r1=vread_clist(CL, a, R1); - r2=vread_clist(CL, a, R2); - vwrite_clist(CL, a, SEQ1,s2); - vwrite_clist(CL, a, SEQ2,s1); - vwrite_clist(CL, a, R1,r2); - vwrite_clist(CL, a, R2,r1); - } - } - } - - if (debug_compact)fprintf ( stderr, "\n[2: %d %s start=%d len=%d", CL->ne, compact_mode, start, len); - - sort_constraint_list ( CL, start, len); - - rs1=vread_clist(CL, start, SEQ1); - rs2=vread_clist(CL, start, SEQ2); - rr1=vread_clist(CL, start, R1); - rr2=vread_clist(CL, start, R2); - ra=start; - - if (debug_compact)fprintf ( stderr, "\n[3: %d %s start=%d len=%d", CL->ne, compact_mode, start, len); - - - if ( (rs1==rs2) && (rr1==rr2))vwrite_clist(CL, start, SEQ1,-1); - for ( a=start+1; a<(start+len); a++) - { - s1=vread_clist(CL, a, SEQ1); - s2=vread_clist(CL, a, SEQ2); - r1=vread_clist(CL, a, R1); - r2=vread_clist(CL, a, R2); - - //if ( (s1==s2) && (r1==r2))vwrite_clist(CL, a, SEQ1, -1); - if ( s1==rs1 && s2==rs2 && r1==rr1 && r2==rr2) - { - x=vread_clist(CL, ra, WE); - if (strm ( compact_mode, "shrink")); - else if (strm( compact_mode,"default"))//default is set to best - vwrite_clist(CL, ra, WE,MAX(vread_clist(CL, a, WE),x)); - else if ( strm ( compact_mode,"mirror_sum")) - vwrite_clist(CL, ra, WE, vread_clist(CL, a, WE)+x); - else if (strm2 ( compact_mode,"best", "shrink_best")) - vwrite_clist(CL, ra, WE,MAX(vread_clist(CL, a, WE),x)); - else if (strm2 ( compact_mode, "worst","shrink_worst")) - vwrite_clist(CL, ra, WE,MIN(vread_clist(CL, a, WE), vread_clist(CL, a, WE))); - - if ( strm(compact_mode, "shrink")); - else - { - vwrite_clist(CL, ra, CONS, vread_clist(CL, ra, CONS)+ vread_clist(CL, a, CONS)); - vwrite_clist(CL, ra, MISC, vread_clist(CL, ra, MISC)+ vread_clist(CL, a, MISC)); - } - vwrite_clist(CL,a, SEQ1, -1); - - } - else - { - rs1=s1; - rs2=s2; - rr1=r1; - rr2=r2; - ra=a; - } - } - - - sort_constraint_list_inv(CL,0,CL->ne); - - sort_constraint_list (CL,0,CL->ne); - - - if ( strm3 (compact_mode, "consPwe", "wePcons","cons")) - { - for ( a=start; a<(start+len); a++) - { - if ( strm2(compact_mode,"consPwe", "wePcons")) - vwrite_clist(CL, a, WE, vread_clist(CL,a,WE)* vread_clist(CL,a,CONS)); - else if (strm (compact_mode, "cons")) - vwrite_clist(CL, a, WE, vread_clist(CL,a,CONS)*100); - } - } - if (debug_compact)fprintf ( stderr, "....OUT: %d]\n", CL->ne); - return CL; - } - - -Constraint_list *rescale_list_simple (Constraint_list *CL,int start, int len,int new_min, int new_max) - { - int a, min, max; - double x; - /*Rescales between 0 and max2 - Any value above max1 is set to max1 first and then to max2 - */ - - - - min=max=vread_clist ( CL,start, WE); - - for ( a=start; a<(start+len); a++) - { - x=(double)vread_clist ( CL,a, WE); - if ( x>max)max=(int)x; - if ( x< min)min=(int)x; - } - - fprintf ( CL->local_stderr, "\n[%d-%d]=>[%d-%d]", min, max, new_min, new_max); - - for ( a=start; a<(start+len); a++) - { - - x=vread_clist(CL,a, WE); - - if ((max-min)==0)x=100; - else x=(((x-min)/(max-min))*new_max)+new_min; - - vwrite_clist(CL, a, WE,(CLIST_TYPE) x); - } - return CL; - } -Constraint_list *rescale_list (Constraint_list *CL,int start, int len,int max1, int max2) - { - int a, min_val, max_val; - CLIST_TYPE x; - - /*Rescales between 0 and max2 - Any value above max1 is set to max1 first and then to max2 - */ - - - min_val=0; - max_val=max1; - - - - for ( a=start; amax1)vwrite_clist(CL, a, WE, max1); - } - - for ( a=start; a0)field=WE; - else if ( T<0) - { - field=CONS; - T=-T; - } - - - for ( a=start; ane); - CL=sort_constraint_list (CL, 0, CL->ne); - return CL; - } - - - - - -Constraint_list *undefine_list (Constraint_list *CL) - { - int a, b; - int undefined_flag; - - for ( a=0;ane; a++) - { - for ( b=0, undefined_flag=0; b< LIST_N_FIELDS; b++) - { - - if ( vread_clist (CL, a, b)==UNDEFINED)undefined_flag=1; - if ( undefined_flag) - { - for ( b=0; b< LIST_N_FIELDS; b++) - if ( b!=SEQ1 && b!=SEQ2 && b!=R1 && b!=R2) - { - vwrite_clist(CL, a, b, UNDEFINED); - } - } - } - } - return CL; - } - -int ** seq2defined_residues ( Sequence *S, Constraint_list *CL) -{ - int **seq_count; - int *entry=NULL; - int a; - - seq_count=declare_int (S->nseq, S->max_len+1); - for (a=0; a< CL->ne; a++) - { - entry=extract_entry(entry, a, CL); - seq_count[entry[SEQ1]][entry[R1]]++; - seq_count[entry[SEQ2]][entry[R2]]++; - } - vfree (entry); - return seq_count; -} - -int ** aln2defined_residues ( Alignment *A, Constraint_list *CL) -{ - - int **seq_count; - int **aln_count; - int **pos; - int a,ra, b; - - pos=aln2pos_simple(A, A->nseq); - seq_count=seq2defined_residues(CL->S, CL); - aln_count=declare_int (A->nseq, A->len_aln); - for (a=0; a< A->nseq; a++) - { - ra=name_is_in_list(A->name[a], (CL->S)->name, (CL->S)->nseq, 100); - if ( ra==-1) continue; - for ( b=0; blen_aln; b++) - if (pos[a][b]>0 && seq_count[ra][pos[a][b]]>0)aln_count[a][b]=1; - } - - free_int (seq_count, -1); - free_int (pos,-1); - return aln_count; -} - -/*********************************************************************/ -/* */ -/* DEBUG CONSTRAINT_LIST */ -/* */ -/* */ -/*********************************************************************/ -void check_seq_pair_in_list(Constraint_list *CLin,int seq1, int seq2) - { - int a, s1, s2, r1, r2; - - for ( a=0; a< CLin->ne; a++) - { - s1=vread_clist(CLin,a,SEQ1); - s2=vread_clist(CLin,a,SEQ2); - if ( s1==seq1 && s2==seq2) - { - r1=vread_clist(CLin,a,R1); - r2=vread_clist(CLin,a,R2); - fprintf ( stderr, "\n[%d][%d %d] [%d %d]",a,s1, r1, s2, r2); - } - } - } - -void print_CL_mem(Constraint_list *CL, char *function) - { - fprintf ( stderr,"%s\n", function); - if ( CL->fp==NULL && CL->L==NULL) fprintf ( stderr, "\n\tNOTHING"); - if ( CL->fp)fprintf ( stderr, "\n\tFILE SET"); - if ( CL->L)fprintf ( stderr, "\n\tMEM SET\n"); - } - -int constraint_list_is_sorted ( Constraint_list *CL) - { - int a,b, x1, x2; - for ( a=0; a< CL->ne-1; a++) - { - for ( b=0; b< CL->entry_len; b++) - { - x1=vread_clist( CL, a, b); - x2=vread_clist( CL, a+1,b); - if ( x1x2) - { - fprintf ( stderr, "\n[%d][%d]=>%d\n[%d][%d]=>%d\n\n",a, b, x1, a+1, b, x2); - return 0; - } - - } - } - return 1; - } -/*********************************************************************/ -/* */ -/* PRUNE CONSTRAINT_LIST */ -/* */ -/* */ -/*********************************************************************/ -char * list2prune_list_old ( Sequence *S, int **sm) -{ - int a, b, i; - char *aln, *seq, *file; - FILE *fp; - Sequence *subS; - Alignment *A; - - aln=vtmpnam (NULL); - seq=vtmpnam (NULL); - file=vtmpnam (NULL); - - output_fasta_seq (seq, A=seq2aln (S,NULL,RM_GAP)); - free_aln (A); - - printf_system ( "t_coffee %s -in Xblosum62mt -outfile=%s -msa_mode iterative_tree_aln", seq, aln); - printf_system ( "t_coffee -other_pg seq_reformat -in %s -action +trim _aln_n5 -output fasta_seq > %s",aln, seq ); - subS=main_read_seq ( seq); - - fp=vfopen (file, "w"); - for ( a=0; a< subS->nseq; a++) - { - i=name_is_in_list (subS->name[a], S->name, S->nseq, 100); - if ( i==-1) continue; - for ( b=0; bnseq; b++) - if (i!=b)fprintf ( fp, "\n2 %d %d",i, b); - } - vfclose (fp); - return file; -} - -char * list2prune_list ( Sequence *S, int **sm) -{ - int a, b, c; - int **mat, *used, *keep; - int nk=0, n=0; - int ns=4; - char *file; - FILE *fp; - - n=S->nseq; - - if (get_string_variable("prune_lib_mode")) - ns=atoi(get_string_variable("prune_lib_mode")); - - HERE ("NS=%d", ns); - if (ns==0)ns=n; - else if (ns<0)ns=-(n*ns)/100; - else if (ns>=n)ns=n; - - - HERE ("NS=%d", ns); - - - keep=vcalloc (n, sizeof (int)); - used=vcalloc (n, sizeof (int)); - mat=declare_int (n, n); - file=vtmpnam (NULL); - - //1-Identify the seed sequence: the one on average the further away from the rest - for (a=0; anseq; b++) - if (keep[a]!=b) - { - fprintf ( fp, "\n2 %d %d", keep[a], b); - } - } - vfclose (fp); - vfree (keep); vfree (used);free_int (mat, -1); - - return file; -} - - -char * list2prune_list_old3 ( Sequence *S, int **sm) -{ - int **keep; - int a,b,c,n,s1,s2, tot, nseq; - - char *file; - FILE *fp; - nseq=S->nseq; - - keep=declare_int (nseq, nseq); - - for (a=0; a< nseq; a++) - for (b=a+1; b< nseq; b++) - { - int bc=0,bsim=0; - s1=sm[a][b]; - for (c=0; c< nseq; c++) - { - if ( c==a || c==b) continue; - s2=MIN(sm[a][c], sm[c][b]); - - if (s2>s1 && s2>bsim) - { - bsim=s2; - bc=c; - } - } - if ( bsim) - { - keep[a][bc]=1; - keep[bc][a]=1; - keep[b][bc]=1; - keep[bc][b]=1; - } - } - - file=vtmpnam (NULL); - fp=vfopen (file, "w"); - - for (n=0,tot=0,a=0; a< nseq; a++) - for ( b=a+1; b< nseq; b++) - { - if (keep[a][b]) - fprintf (fp, "\n 2 %d %d", a, b); - } - vfclose (fp); - return file; -} - -/*********************************************************************/ -/* */ -/* WEIGHT CONSTRAINT_LIST */ -/* */ -/* */ -/*********************************************************************/ - -Constraint_list *weight_constraint_list(Constraint_list * CL, char *seq_weight) - - { - Weights *W; - - if ( CL->ne==0)return CL; - else if ( strm(seq_weight, "t_coffee")) W=compute_t_coffee_weight(CL); - else if (check_file_exists (seq_weight)) - { - W=read_seq_weight ((CL->S)->name, (CL->S)->nseq, seq_weight); - } - else - { - int a; - W=declare_weights((CL->S)->nseq); - sprintf ( W->mode, "no_seq_weight"); - for ( a=0; a<(CL->S)->nseq; a++) - { - sprintf ( W->seq_name[a], "%s", (CL->S)->name[a]); - W->SEQ_W[a]=1; - } - CL->W=W; - return CL; - } - - CL=re_weight_constraint_list (CL,W); - - CL->W=W; - - - return CL; - - - } - - - -Weights* compute_t_coffee_weight(Constraint_list * CL) - { - int a, b; - float p, d; - Weights *W; - int nseq; - - - - - - if (!CL->L)return NULL; - - nseq=(CL->S)->nseq; - W=declare_weights(nseq); - sprintf ( W->mode, "t_coffee"); - for ( a=0; a< nseq; a++) - { - sprintf ( W->seq_name[a], "%s", (CL->S)->name[a]); - W->SEQ_W[a]=1; - } - - - for (a=0; a< (CL->S)->nseq-1; a++) - for ( b=a+1; b< (CL->S)->nseq; b++) - { - if ( b==a){d=1;} - else if ( !(CL->S)->len[b] || !(CL->S)->len[a])d=1; - else - { - d=((float)(CL->DM)->similarity_matrix[a][b]/MAXID)*10; - } - p=pow(d,3); - - W->SEQ_W[a]+=p; - W->SEQ_W[b]+=p; - - } - - for ( p=0,b=0; b< (CL->S)->nseq; b++) - { - if ((CL->S)->len[b]==0)W->SEQ_W[b]=0; - else W->SEQ_W[b]=2/W->SEQ_W[b]; - p+=W->SEQ_W[b]; - } - for ( b=0; b< (CL->S)->nseq; b++) - { - W->SEQ_W[b]=W->SEQ_W[b]*((float)W->nseq/p); - } - - - return W; - } - -Constraint_list *re_weight_constraint_list(Constraint_list * CL,Weights *W) - { - int a; - float w; - float *weight; - int sA, sB; - - - - weight=W->SEQ_W; - - if (!CL->L)return CL; - - - - for ( a=0; a< CL->ne; a++) - { - sA=CL->L[a*CL->entry_len+SEQ1]; - sB=CL->L[a*CL->entry_len+SEQ2]; - - w=MIN(weight[sA], weight[sB]); - - CL->L[a*CL->entry_len+WE]*=w; - } - CL=evaluate_constraint_list_reference (CL); - return CL; - } - -Distance_matrix* cl2distance_matrix (Constraint_list *CL, Alignment *A, char *in_mode, char *in_sim_mode, int print) -{ - - char mode[100]; - char sim_mode [100]; - - - - if ( !CL)return NULL; - sprintf ( mode, "%s", (CL && in_mode==NULL)?CL->distance_matrix_mode:in_mode); - sprintf ( sim_mode, "%s", (CL && in_sim_mode==NULL)?CL->distance_matrix_sim_mode:in_sim_mode); - - if ( !CL->DM ||!strm ((CL->DM)->mode, mode) || !strm ((CL->DM)->sim_mode, sim_mode) || A ) - { - return seq2distance_matrix (CL, A, mode, sim_mode, print); - } - else - { - - return CL->DM; - } -} - - -Distance_matrix *seq2distance_matrix (Constraint_list *CL, Alignment *A,char *mode, char *sim_mode, int print) -{ - /*Compute the distance matrix associated with the Constraint List and the sequences*/ - /*Computation only occurs if the similiraty matrix is undefined : CL->similarity_matrix*/ - /*Undefine CL->similarity_matrix to force computation*/ - - int a, b; - Alignment *B; - Constraint_list *NCL; - float score=0; - int *ns; - int **l_s; - float id; - int max_name=0; - int id_score; - static float **g_matrix; - float ref=0; - int n_coor=0; - Distance_matrix *DM; - int **sim_table=NULL; - - //mode: computation mode - //sim_mode: mode for computing the similarity - - //Composite modes - - if (strm (mode, "ktup2")) - { - B=seq2aln ( CL->S, NULL, 1); - B=very_fast_aln (B, B->nseq,NULL); - sprintf ( CL->distance_matrix_mode, "aln"); - DM=cl2distance_matrix (CL, B, NULL, NULL, 1); - sprintf ( CL->distance_matrix_mode, "ktup2"); - sprintf ( DM->mode, "%s", mode); - sprintf ( DM->sim_mode, "%s", sim_mode); - free_aln (B); - return DM; - } - - if ( !CL) return NULL; - else - { - for ( max_name=0,a=0; a< (CL->S)->nseq; a++)max_name=MAX(strlen ((CL->S)->name[a]), max_name); - - - if ( CL->DM)DM=CL->DM; - else - { - DM=vcalloc ( 1, sizeof (Distance_matrix)); - DM->nseq=(CL->S)->nseq; - DM->similarity_matrix=declare_int ( (CL->S)->nseq, (CL->S)->nseq); - DM->distance_matrix =declare_int ( (CL->S)->nseq, (CL->S)->nseq); - DM->score_similarity_matrix=declare_int ( (CL->S)->nseq, (CL->S)->nseq); - } - - sprintf ( DM->mode, "%s", mode); - sprintf ( DM->sim_mode, "%s", sim_mode); - - NCL=duplicate_constraint_list_soft (CL); - NCL->pw_parameters_set=1; - - if (!A) - { - if ( CL->tree_aln)B=CL->tree_aln; - else B=seq2aln ( NCL->S, NULL, 1); - } - else - { - B=copy_aln (A, NULL); - B=reorder_aln (B, (CL->S)->name, (CL->S)->nseq); - } - - if ( strm (mode, "very_fast")) - { - sprintf ( NCL->dp_mode, "very_fast_pair_wise"); - NCL->evaluate_residue_pair=evaluate_matrix_score; - if ( strm ((CL->S)->type, "DNA") ||strm ((CL->S)->type, "RNA") ) - { - NCL->M=read_matrice ("idmat"); - NCL->gop=-10; - NCL->gep=-1; - CL->ktup=6; - } - else - { - NCL->M=read_matrice ("blosum62mt"); - NCL->gop=get_avg_matrix_mm (NCL->M, AA_ALPHABET)*10; - NCL->gep=-1; - CL->ktup=2; - } - NCL->use_fragments=1; - CL->diagonal_threshold=6; - } - - else if ( strm (mode, "ktup")) - { - - NCL->ktup=6; - sim_table=ktup_dist_mat((CL->S)->seq,(CL->S)->nseq,NCL->ktup, (CL->S)->type); - } - - - else if (strm (mode, "aln")) - { - - sim_table=aln2sim_mat (A, sim_mode); - } - else if ( strm (mode, "fast") || strm ("idscore", mode)) - { - sprintf ( NCL->dp_mode, "myers_miller_pair_wise"); - NCL->evaluate_residue_pair=evaluate_matrix_score; - if ( strm ((CL->S)->type, "DNA") || strm ((CL->S)->type, "RNA")) - { - NCL->M=read_matrice ("idmat"); - NCL->gop=-10; - NCL->gep=-1; - } - else - { - NCL->M=read_matrice ("blosum62mt"); - NCL->gop=get_avg_matrix_mm (NCL->M, AA_ALPHABET)*10; - NCL->gep=-1; - } - } - else if ( strm (mode, "cscore")) - { - if (!CL || !CL->L || CL->ne==0) - return seq2distance_matrix (CL, A,"idscore",sim_mode, print); - } - else if ( strm (mode, "geometric") ); - else if (strm (mode, "slow")); - else if (strm (mode, "clustalw")); - else if (strm (mode, "no")) - print=1; - else if (strm (mode, "random")) - print=1; - else - { - fprintf ( stderr, "\nError: %s is an unknown distance_matrix_mode [FATAL:%s]", mode,PROGRAM); - crash (""); - } - - //Special Geometric Mode - if ( strm (NCL->distance_matrix_mode, "geometric")) - { - free_arrayN(g_matrix, 2); - g_matrix=declare_float ((CL->S)->nseq, 3); - n_coor=MIN(3,((CL->S)->nseq)); - - for ( a=0; a<(CL->S)->nseq; a++) - { - for (b=0; bS)->seq[a], (CL->S)->seq[b], "pam250mt", -10, -1, "fasta_pair_wise"); - g_matrix[a][b]=get_seq_sim ( B->seq_al[0], B->seq_al[1], "-", NULL); - free_aln(B);B=NULL; - } - } - ref=(float)sqrt((double)(10000*n_coor)); - } - - - ns=vcalloc ( 2, sizeof(int)); - l_s=declare_int ( 2, 1); - ns[0]=ns[1]=1; - l_s[0][0]=0; - l_s[1][0]=1; - - if (CL->local_stderr && print>0)fprintf ( (CL->local_stderr), "\nCOMPUTE PAIRWISE SIMILARITY [dp_mode: %s] [distance_matrix_mode: %s][Similarity Measure: %s] \n", NCL->dp_mode,mode, sim_mode); - - for (a=0; a< (CL->S)->nseq; a++) - { - if (CL->local_stderr && print>0)fprintf ( (CL->local_stderr), "\n\tSeq: %s", (CL->S)->name[a]); - for ( b=a; b< (CL->S)->nseq; b++) - { - if ( b==a){DM->similarity_matrix[a][b]=MAXID;} - else - { - l_s[0][0]=a; - l_s[1][0]=b; - if ( !strm(mode, "ktup2") && ! strm (mode, "geometric")) - { - ungap ( B->seq_al[a]); - ungap ( B->seq_al[b]); - } - - if ( strm (mode, "slow")) - { - - B->score_aln=pair_wise (B, ns, l_s,NCL); - - id=get_seq_sim ( B->seq_al[a], B->seq_al[b], "-", sim_mode); - if ( CL->L) - { - score=(int)(((float)B->score_aln)/(B->len_aln*SCORE_K)); - score=(int)(CL->L && CL->normalise)?((score*MAXID)/(CL->normalise)):(score); - } - else if ( CL->M)score=id; - - - if ( score>MAXID)score=(int)(CL->L)?sub_aln2sub_aln_score (B, CL, CL->evaluate_mode, ns, l_s):id; - - } - else if ( strm2 (mode,"fast", "very_fast")) - { - B->score_aln=pair_wise (B, ns, l_s,NCL); - id=get_seq_sim ( B->seq_al[a], B->seq_al[b], "-", sim_mode); - score=(int)(id)*SCORE_K; - } - else if ( strm (mode, "cscore")) - { - ungap ( B->seq_al[a]); - ungap ( B->seq_al[b]); - score=(int)linked_pair_wise (B, ns, l_s, NCL); - - - score/=(B->len_aln*SCORE_K); - id=score/SCORE_K; - } - else if ( strm (mode, "idscore")) - { - score=id=idscore_pairseq (B->seq_al[a], B->seq_al[b], NCL->gop, NCL->gep, NCL->M, sim_mode); - //HERE ("%s %d %d ->%d", sim_mode, a, b, (int)id); - } - else if (strm (mode, "ktup")) - { - id=sim_table[a][b]; - score=id*SCORE_K; - - } - else if (strm (mode, "aln")) - { - score=id=sim_table[a][b]; - score*=SCORE_K; - } - - else if ( strm (mode, "geometric")) - { - id=get_geometric_distance (g_matrix,n_coor, a, b, "euclidian"); - id=MAXID*(1-((id/ref))); - score=(int)(id)*SCORE_K; - } - else if ( strm (mode, "no")) - { - id=100; - score=id*SCORE_K; - } - else if ( strm (mode, "random")) - { - id=rand()%100; - score=id*SCORE_K; - } - else - { - id=B->score_aln=pair_wise (B, ns, l_s,NCL); - score=id*SCORE_K; - } - /*Sim mat*/ - DM->similarity_matrix[a][b]=DM->similarity_matrix[b][a]=(int)(id); - /*Dist mat*/ - DM->distance_matrix[a][b]=DM->distance_matrix[b][a]=MAXID-(int)(id); - /*Score mat*/ - - - DM->score_similarity_matrix[a][b]=DM->score_similarity_matrix[b][a]=(int)score; - id_score=id; - if (CL->local_stderr && print>1) fprintf (CL->local_stderr, "\n\t%-*s %-*s identity=%3d%% score=%3d", max_name,(CL->S)->name[a], max_name,(CL->S)->name[b], id_score, (int)score); - } - } - } - vfree (ns); - free_int(l_s, -1); - - } - - - if (CL->local_stderr) fprintf (CL->local_stderr, "\n"); - free_constraint_list (NCL); - - - - if (!CL->tree_aln) - { - free_aln (B); - } - - free_int (sim_table, -1); - - - - return DM; -} -/*********************************************************************/ -/* */ -/* RNA FUNCTIONS */ -/* */ -/* */ -/*********************************************************************/ -char * seq2rna_lib ( Sequence *S, char *name) -{ - int a; - FILE *fp; - - - if (!name)name=vtmpnam (NULL); - fp=vfopen (name, "w"); - for ( a=0; anseq; a++) - { - - fprintf (fp, "%s\n", rna_struc2rna_lib(S->name[a], S->seq[a], NULL)); - } - vfclose (fp); - - return name; -} - -Constraint_list *read_rna_lib ( Sequence *S, char *fname) -{ - Constraint_list *R; - char **list; - int n=0,a; - - - if (check_file_exists (fname)) - { - - list=read_lib_list ( fname, &n); - } - else - { - X_template *F; - - list=vcalloc (S->nseq, sizeof (char*)); - for ( a=0; anseq; a++) - { - if ((F=seq_has_template (S, a, "_F_"))) - { - list[n++]=F->template_file; - } - } - } - - R=declare_constraint_list ( S,NULL, NULL, 0,NULL, NULL); - - for (a=0; a< n; a++) - { - - if (list[a])R=fast_read_constraint_list_file (R, list[a]); - } - - R=index_res_constraint_list (R,WE); - - return R; -} - -Constraint_list * rna_lib_extension ( Constraint_list *CL, Constraint_list *R) -{ - CLIST_TYPE *entry=NULL; - int a,b,c,n1,n2, ne,s1, s2, r1, r2,w; - int list1[100], list2[100]; - - - entry=vcalloc ( CL->entry_len, CL->el_size); - ne=CL->ne; - - - for ( a=0; aresidue_index[s1][r1][0]; b+=3) - { - list1[n1++]=R->residue_index[s1][r1][b+1]; - } - list2[n2++]=r2; - for (b=1; bresidue_index[s2][r2][0]; b+=3) - { - list2[n2++]=R->residue_index[s2][r2][b+1]; - } - - for (b=1; b\n"); - fprintf ( fp, "PARAM &bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", MAFFT_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", MAFFT_4_TCOFFEE); - vfclose (fp);} - - - sprintf (list[n][0], "mafftdef_msa"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "EXECUTABLE mafft\n"); - fprintf ( fp, "DOC Mafft [%s]\n", MAFFT_ADDRESS); - fprintf ( fp, "ALN_MODE multiple\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "PARAM1 --localpair --maxiterate 1000 \n"); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG >\n"); - fprintf ( fp, "PARAM &bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", MAFFT_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", MAFFT_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "mafft_pair"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "EXECUTABLE mafft\n"); - fprintf ( fp, "DOC Mafft [%s]\n", MAFFT_ADDRESS); - fprintf ( fp, "ALN_MODE pairwise\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "PARAM1 --localpair --maxiterate 1000 \n"); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG >\n"); - fprintf ( fp, "PARAM &bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", MAFFT_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", MAFFT_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "mafft_msa"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "EXECUTABLE mafft\n"); - fprintf ( fp, "DOC Mafft [%s]\n", MAFFT_ADDRESS); - fprintf ( fp, "ALN_MODE multiple\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "PARAM1 --localpair --maxiterate 1000 \n"); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG >\n"); - fprintf ( fp, "PARAM &bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", MAFFT_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", MAFFT_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "mafftjtt_pair"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC Mafft [%s]\n", MAFFT_ADDRESS); - fprintf ( fp, "EXECUTABLE mafft \n"); - fprintf ( fp, "ALN_MODE pairwise\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG >\n"); - fprintf ( fp, "PARAM1 --jtt 250 --localpair --maxiterate 1000 \n"); - fprintf ( fp, "PARAM &bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", MAFFT_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", MAFFT_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "mafftjtt_msa"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC Mafft [%s]\n", MAFFT_ADDRESS); - fprintf ( fp, "EXECUTABLE mafft \n"); - - fprintf ( fp, "ALN_MODE multiple\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG >\n"); - fprintf ( fp, "PARAM1 --jtt 250 --localpair --maxiterate 1000 \n"); - fprintf ( fp, "PARAM &bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", MAFFT_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", MAFFT_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "mafftgins_pair"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "EXECUTABLE mafft\n"); - fprintf ( fp, "DOC Mafft [%s]\n", MAFFT_ADDRESS); - fprintf ( fp, "ALN_MODE pairwise\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "PARAM1 --globalpair --maxiterate 1000 \n"); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG >\n"); - fprintf ( fp, "PARAM &bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", MAFFT_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", MAFFT_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "mafftgins_msa"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "EXECUTABLE mafft\n"); - fprintf ( fp, "DOC Mafft [%s]\n", MAFFT_ADDRESS); - fprintf ( fp, "ALN_MODE multiple\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "PARAM1 --globalpair --maxiterate 1000 \n"); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG >\n"); - fprintf ( fp, "PARAM &bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", MAFFT_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", MAFFT_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "dialigntx_pair"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC dialign-tx [%s]\n", DIALIGNTX_ADDRESS); - fprintf ( fp, "EXECUTABLE dialign-tx\n"); - fprintf ( fp, "ALN_MODE pairwise\n"); - fprintf ( fp, "OUT_MODE aln\n"); - if ( isdir (DIALIGNTX_DIR)) - fprintf ( fp, "PARAM1 %s \n", DIALIGNTX_DIR); - else - fprintf ( fp, "PARAM1 %s \n", get_mcoffee_4_tcoffee()); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG &bnsp\n"); - fprintf ( fp, "PARAM >/dev/null&bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", DIALIGNTX_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", DIALIGNTX_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "dialigntx_msa"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC dialign-tx [%s]\n", DIALIGNTX_ADDRESS); - fprintf ( fp, "EXECUTABLE dialign-tx\n"); - fprintf ( fp, "ALN_MODE multiple\n"); - fprintf ( fp, "OUT_MODE aln\n"); - if ( isdir (DIALIGNTX_DIR)) - fprintf ( fp, "PARAM1 %s \n", DIALIGNTX_DIR); - else - fprintf ( fp, "PARAM1 %s \n", get_mcoffee_4_tcoffee()); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG &bnsp\n"); - fprintf ( fp, "PARAM >/dev/null&bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", DIALIGNTX_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", DIALIGNTX_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "dialignt_pair"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC dialign-tx [%s]\n", DIALIGNT_ADDRESS); - fprintf ( fp, "EXECUTABLE dialign-tx\n"); - fprintf ( fp, "ALN_MODE pairwise\n"); - fprintf ( fp, "OUT_MODE aln\n"); - if ( isdir (DIALIGNT_DIR)) - fprintf ( fp, "PARAM1 %s \n", DIALIGNT_DIR); - else - fprintf ( fp, "PARAM1 %s \n", get_mcoffee_4_tcoffee()); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG &bnsp\n"); - fprintf ( fp, "PARAM >/dev/null&bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", DIALIGNT_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", DIALIGNT_4_TCOFFEE); - - vfclose (fp);} - - sprintf (list[n][0], "dialignt_msa"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC dialign-tx [%s]\n", DIALIGNT_ADDRESS); - fprintf ( fp, "EXECUTABLE dialign-tx\n"); - fprintf ( fp, "ALN_MODE multiple\n"); - fprintf ( fp, "OUT_MODE aln\n"); - if ( isdir (DIALIGNT_DIR)) - fprintf ( fp, "PARAM1 %s \n", DIALIGNT_DIR); - else - fprintf ( fp, "PARAM1 %s \n", get_mcoffee_4_tcoffee()); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG &bnsp\n"); - fprintf ( fp, "PARAM >/dev/null&bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", DIALIGNT_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", DIALIGNT_4_TCOFFEE); - - vfclose (fp);} - - sprintf (list[n][0], "poa_pair"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC Partial Order Graph Alignment [%s]\n", POA_ADDRESS); - fprintf ( fp, "EXECUTABLE poa\n"); - fprintf ( fp, "ALN_MODE pairwise\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "PARAM1 -toupper \n"); - fprintf ( fp, "IN_FLAG -read_fasta&bnsp\n"); - fprintf ( fp, "OUT_FLAG -clustal&bnsp\n"); - if (file_exists (POA_DIR, POA_FILE1)) - fprintf ( fp, "PARAM %s/%s&bnsp2>/dev/null\n",POA_DIR,POA_FILE1); - else - fprintf ( fp, "PARAM %s/%s&bnsp2>/dev/null\n", get_mcoffee_4_tcoffee(), POA_FILE1); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", POA_ADDRESS); - fprintf ( fp, "PROGRAM %s\n",POA_4_TCOFFEE); - - vfclose (fp);} - - sprintf (list[n][0], "poa_msa"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC Partial Order Graph Alignment [%s]\n", POA_ADDRESS); - fprintf ( fp, "EXECUTABLE poa\n"); - fprintf ( fp, "ALN_MODE multiple\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "PARAM1 -toupper \n"); - fprintf ( fp, "IN_FLAG -read_fasta&bnsp\n"); - fprintf ( fp, "OUT_FLAG -clustal&bnsp\n"); - if (file_exists (POA_DIR, POA_FILE1)) - fprintf ( fp, "PARAM %s/%s&bnsp2>/dev/null\n",POA_DIR,POA_FILE1); - else - fprintf ( fp, "PARAM %s/%s&bnsp2>/dev/null\n", get_mcoffee_4_tcoffee(), POA_FILE1); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", POA_ADDRESS); - fprintf ( fp, "PROGRAM %s\n",POA_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "probcons_pair"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC probcons [%s]\n", PROBCONS_ADDRESS); - fprintf ( fp, "ALN_MODE pairwise\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG >\n"); - fprintf ( fp, "PARAM &bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - if ( strm (retrieve_seq_type(), "DNA") || strm (retrieve_seq_type(), "RNA"))fprintf ( fp, "EXECUTABLE probconsRNA\n"); - else fprintf ( fp, "EXECUTABLE probcons\n"); - fprintf ( fp, "ADDRESS %s\n", PROBCONS_ADDRESS); - fprintf ( fp, "PROGRAM %s\n",PROBCONS_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "probcons_msa"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC probcons [%s]\n", PROBCONS_ADDRESS); - fprintf ( fp, "ALN_MODE multiple\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG >\n"); - fprintf ( fp, "PARAM &bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - if ( strm (retrieve_seq_type(), "DNA") || strm (retrieve_seq_type(), "RNA"))fprintf ( fp, "EXECUTABLE probconsRNA\n"); - else fprintf ( fp, "EXECUTABLE probcons\n"); - fprintf ( fp, "ADDRESS %s\n", PROBCONS_ADDRESS); - fprintf ( fp, "PROGRAM %s\n",PROBCONS_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "probconsRNA_pair"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC probcons [%s]\n", PROBCONSRNA_ADDRESS); - fprintf ( fp, "ALN_MODE pairwise\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG >\n"); - fprintf ( fp, "PARAM &bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "EXECUTABLE probconsRNA\n"); - fprintf ( fp, "ADDRESS %s\n", PROBCONSRNA_ADDRESS); - fprintf ( fp, "PROGRAM %s\n",PROBCONSRNA_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "probconsRNA_msa"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC probcons [%s]\n", PROBCONSRNA_ADDRESS); - fprintf ( fp, "ALN_MODE multiple\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG >\n"); - fprintf ( fp, "PARAM &bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "EXECUTABLE probconsRNA\n"); - fprintf ( fp, "ADDRESS %s\n", PROBCONSRNA_ADDRESS); - fprintf ( fp, "PROGRAM %s\n",PROBCONSRNA_4_TCOFFEE); - vfclose (fp);} - - - - sprintf (list[n][0], "muscle_pair"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC Muscle [%s]\n", MUSCLE_ADDRESS); - fprintf ( fp, "EXECUTABLE muscle\n"); - fprintf ( fp, "ALN_MODE pairwise\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG -in&bnsp\n"); - fprintf ( fp, "OUT_FLAG -out&bnsp\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", MUSCLE_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", MUSCLE_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "muscle_msa"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC Muscle [%s]\n", MUSCLE_ADDRESS); - fprintf ( fp, "EXECUTABLE muscle\n"); - fprintf ( fp, "ALN_MODE multiple\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG -in&bnsp\n"); - fprintf ( fp, "OUT_FLAG -out&bnsp\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", MUSCLE_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", MUSCLE_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "t_coffee_pair"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "EXECUTABLE t_coffee\n"); - fprintf ( fp, "DOC T-Coffee [%s]\n", TCOFFEE_ADDRESS); - fprintf ( fp, "ALN_MODE pairwise\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG -infile&bnsp\n"); - fprintf ( fp, "OUT_FLAG -outfile&bnsp\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", TCOFFEE_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", TCOFFEE_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "t_coffee_msa"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "EXECUTABLE t_coffee\n"); - fprintf ( fp, "DOC T-Coffee [%s]\n", TCOFFEE_ADDRESS); - fprintf ( fp, "ALN_MODE multiple\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG -infile&bnsp\n"); - fprintf ( fp, "OUT_FLAG -outfile&bnsp\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", TCOFFEE_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", TCOFFEE_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "pcma_pair"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC PCMA [%s]\n", PCMA_ADDRESS); - fprintf ( fp, "EXECUTABLE pcma\n"); - fprintf ( fp, "ALN_MODE pairwise\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG -infile=\n"); - fprintf ( fp, "OUT_FLAG -outfile=\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", PCMA_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", PCMA_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "pcma_msa"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC PCMA [%s]\n", PCMA_ADDRESS); - fprintf ( fp, "EXECUTABLE pcma\n"); - fprintf ( fp, "ALN_MODE multiple\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG -infile=\n"); - fprintf ( fp, "OUT_FLAG -outfile=\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", PCMA_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", PCMA_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "kalign_pair"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "EXECUTABLE kalign\n"); - fprintf ( fp, "DOC kalign [%s]\n", KALIGN_ADDRESS); - fprintf ( fp, "ALN_MODE pairwise\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG -i&bnsp\n"); - fprintf ( fp, "OUT_FLAG -o&bnsp\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", KALIGN_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", KALIGN_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "kalign_msa"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "EXECUTABLE kalign\n"); - fprintf ( fp, "DOC kalign [%s]\n", KALIGN_ADDRESS); - fprintf ( fp, "ALN_MODE multiple\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG -i&bnsp\n"); - fprintf ( fp, "OUT_FLAG -o&bnsp\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", KALIGN_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", KALIGN_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "amap_pair"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "EXECUTABLE amap\n"); - fprintf ( fp, "DOC amap [%s]\n", AMAP_ADDRESS); - fprintf ( fp, "ALN_MODE pairwise\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG >\n"); - fprintf ( fp, "PARAM &bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", AMAP_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", AMAP_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "amap_msa"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "EXECUTABLE amap\n"); - fprintf ( fp, "DOC amap [%s]\n", AMAP_ADDRESS); - fprintf ( fp, "ALN_MODE multiple\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG >\n"); - fprintf ( fp, "PARAM &bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", AMAP_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", AMAP_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "proda_pair"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC proda [%s]\n", PRODA_ADDRESS); - fprintf ( fp, "EXECUTABLE proda\n"); - fprintf ( fp, "ALN_MODE pairwise\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG >\n"); - fprintf ( fp, "PARAM &bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", PRODA_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", PRODA_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "proda_msa"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC proda [%s]\n", PRODA_ADDRESS); - fprintf ( fp, "EXECUTABLE proda\n"); - fprintf ( fp, "ALN_MODE multiple\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG &bnsp\n"); - fprintf ( fp, "OUT_FLAG >\n"); - fprintf ( fp, "PARAM &bnsp2>/dev/null\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", PRODA_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", PRODA_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "prank_pair"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC prank [%s]\n", PRANK_ADDRESS); - fprintf ( fp, "EXECUTABLE tc_generic_method.pl\n"); - fprintf ( fp, "ALN_MODE pairwise\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "PARAM -method=%s -mode=seq_msa -tmpdir=%s\n",(getenv("PRANK_4_TCOFFEE"))?getenv("PRANK_4_TCOFFEE"):PRANK_4_TCOFFEE, get_tmp_4_tcoffee()); - fprintf ( fp, "IN_FLAG -infile=\n"); - fprintf ( fp, "OUT_FLAG -outfile=\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", PRANK_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", PRANK_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "prank_msa"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC prank [%s]\n", PRANK_ADDRESS); - fprintf ( fp, "EXECUTABLE tc_generic_method.pl\n"); - fprintf ( fp, "ALN_MODE multiple\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "PARAM -method=%s -mode=seq_msa -tmpdir=%s\n",(getenv("PRANK_4_TCOFFEE"))?getenv("PRANK_4_TCOFFEE"):PRANK_4_TCOFFEE, get_tmp_4_tcoffee()); - fprintf ( fp, "IN_FLAG -infile=\n"); - fprintf ( fp, "OUT_FLAG -outfile=\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", PRANK_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", PRANK_4_TCOFFEE); - vfclose (fp);} - - - sprintf (list[n][0], "em"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++; - if (method==NULL || lstrstr (method,"em@")) - { - - fp=vfopen (list[n-1][1], "w"); - if (method) - { - char **l2; - l2=string2list2 ( method, "@"); - fprintf ( fp, "PARAM -method=%s -mode=seq_msa -tmpdir=%s\n",l2[2], get_tmp_4_tcoffee()); - fprintf ( fp, "ALN_MODE %s\n", l2[3]); - free_char (l2, -1); - } - fprintf ( fp, "EXECUTABLE tc_generic_method.pl\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG -infile=\n"); - fprintf ( fp, "OUT_FLAG -outfile=\n"); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", ADDRESS_BUILT_IN); - fprintf ( fp, "PROGRAM %s\n", PROGRAM_BUILT_IN); - vfclose (fp); - } - - sprintf (list[n][0], "consan_pair"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "DOC consan (sfold) RNA pairwise sequence aligner [%s]\n", CONSAN_ADDRESS); - fprintf ( fp, "EXECUTABLE fasta_seq2consan_aln.pl \n"); - fprintf ( fp, "ALN_MODE pairwise\n"); - fprintf ( fp, "OUT_MODE aln\n"); - fprintf ( fp, "IN_FLAG -i&bnsp\n"); - fprintf ( fp, "OUT_FLAG -o&bnsp\n"); - fprintf ( fp, "PARAM -d&bnsp%s&bnsp2>/dev/null\n",get_mcoffee_4_tcoffee()); - fprintf ( fp, "SEQ_TYPE S\n"); - fprintf ( fp, "ADDRESS %s\n", CONSAN_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", CONSAN_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "RNAplfold"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "EXECUTABLE RNAplfold \n"); - fprintf ( fp, "ALN_MODE predict\n"); - fprintf ( fp, "SEQ_TYPE RNA\n"); - fprintf ( fp, "ADDRESS %s\n", RNAPLFOLD_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", RNAPLFOLD_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "HMMtop"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "EXECUTABLE HMMtop \n"); - fprintf ( fp, "ALN_MODE predict\n"); - fprintf ( fp, "SEQ_TYPE PROTEIN\n"); - fprintf ( fp, "ADDRESS %s\n", HMMTOP_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", HMMTOP_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "GOR4"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "EXECUTABLE GORIV \n"); - fprintf ( fp, "ALN_MODE predict\n"); - fprintf ( fp, "SEQ_TYPE PROTEIN\n"); - fprintf ( fp, "ADDRESS %s\n", GOR4_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", GOR4_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "wublast_client"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "EXECUTABLE wublast.pl \n"); - fprintf ( fp, "ALN_MODE predict\n"); - fprintf ( fp, "SEQ_TYPE PROTEIN\n"); - fprintf ( fp, "ADDRESS %s\n", EBIWUBLASTc_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", EBIWUBLASTc_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "blastpgp_client"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "EXECUTABLE blastpgp.pl \n"); - fprintf ( fp, "ALN_MODE predict\n"); - fprintf ( fp, "SEQ_TYPE PROTEIN\n"); - - fprintf ( fp, "ADDRESS %s\n", EBIBLASTPGPc_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", EBIBLASTPGPc_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "ncbi_netblast"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "EXECUTABLE blastcl3 \n"); - fprintf ( fp, "ALN_MODE predict\n"); - fprintf ( fp, "SEQ_TYPE PROTEIN\n"); - - fprintf ( fp, "ADDRESS %s\n", NCBIWEBBLAST_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", NCBIWEBBLAST_4_TCOFFEE); - vfclose (fp);} - - sprintf (list[n][0], "local_ncbiblast"); - sprintf (list[n][1], "%s", vtmpnam(NULL)); - n++;if (method==NULL || strm (method, list[n-1][0])){fp=vfopen (list[n-1][1], "w"); - fprintf ( fp, "EXECUTABLE blastall \n"); - fprintf ( fp, "ALN_MODE predict\n"); - fprintf ( fp, "SEQ_TYPE PROTEIN\n"); - - fprintf ( fp, "ADDRESS %s\n", NCBIBLAST_ADDRESS); - fprintf ( fp, "PROGRAM %s\n", NCBIBLAST_4_TCOFFEE); - vfclose (fp);} - - list[n]=NULL; - return list; -} - - - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_declare.c b/binaries/src/tcoffee/t_coffee_source/util_declare.c deleted file mode 100644 index 0168ee5..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_declare.c +++ /dev/null @@ -1,1832 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" - -void free_pair_wise() -{ - //Free static allocated memory - free_proba_pair_wise(); -} - -/************************************************************************/ -/* */ -/* CONSTRAINT_LIST */ -/* */ -/* */ -/************************************************************************/ -Constraint_list * declare_constraint_list_simple ( Sequence *S) -{ - return declare_constraint_list (S, NULL, NULL, 0, NULL, NULL); -} - -Constraint_list * declare_constraint_list ( Sequence *S, char *name, int *L, int ne,FILE *fp, int **M) - { - Constraint_list *CL; - - CL=vcalloc (1, sizeof ( Constraint_list)); - - - CL->S=S; - CL->M=M; - - if ( name!=NULL) - { - sprintf ( CL->list_name, "%s", name); - - } - CL->cpu=1; - CL->fp=fp; - CL->L=L; - CL->ne=ne; - CL->entry_len=LIST_N_FIELDS; - CL->el_size=sizeof (CLIST_TYPE); - CL->matrices_list=declare_char(20,20); - - CL->chunk=500; - CL->weight_field=WE; - if ( S)CL->seq_for_quadruplet=vcalloc ( S->nseq, sizeof (int)); - CL->Prot_Blast=vcalloc ( 1, sizeof ( Blast_param)); - CL->DNA_Blast=vcalloc ( 1, sizeof ( Blast_param)); - CL->Pdb_Blast=vcalloc ( 1, sizeof ( Blast_param)); - CL->TC=vcalloc (1, sizeof (TC_param)); - - return CL; - } - -Constraint_list *free_constraint_list4lib_computation (Constraint_list *CL) -{ - if (!CL)return NULL; - - vfree (CL->L); - free_int (CL->M, -1); - - vfree (CL); - return CL; -} -Constraint_list *duplicate_constraint_list4lib_computation (Constraint_list *CL) -{ - Constraint_list *SCL; - SCL=vcalloc (1, sizeof ( Constraint_list)); - SCL[0]=CL[0]; - SCL->S=CL->S; - SCL->RunName=CL->RunName; - SCL->L=NULL; - SCL->max_L_len=0; - SCL->M=NULL; - SCL->ne=0; - SCL->residue_indexed=0; - SCL->residue_index=NULL; - - return SCL; -} -Constraint_list *duplicate_constraint_list_soft (Constraint_list *CL) -{ - /*Duplication that does not copy the long lists*/ - return copy_constraint_list (CL,SOFT_COPY); -} -Constraint_list *duplicate_constraint_list (Constraint_list *CL) - { - /*Duplicate everything in the constraint_list*/ - return copy_constraint_list (CL,HARD_COPY); - } -Constraint_list *copy_constraint_list (Constraint_list *CL, int mode) - { - Constraint_list *NCL; - Sequence *S; - int a, b; - - - - /*Sequences*/ - - - S=(mode==HARD_COPY)?duplicate_sequence (CL->S):CL->S; - - if (mode==HARD_COPY) - NCL=declare_constraint_list (S, NULL, NULL,0, NULL, NULL); - else - { - NCL=vcalloc ( 1, sizeof (Constraint_list)); - NCL[0]=CL[0]; - } - - NCL->S=S; - NCL->copy_mode=mode; - if (mode==SOFT_COPY)NCL->pCL=CL; - - NCL->STRUC_LIST=(mode==HARD_COPY)?duplicate_sequence (CL->STRUC_LIST):CL->STRUC_LIST; - sprintf ( NCL->align_pdb_param_file, "%s", CL->align_pdb_param_file); - sprintf ( NCL->align_pdb_hasch_mode, "%s", CL->align_pdb_hasch_mode); - - - NCL->W=(mode==HARD_COPY)?duplicate_weights (CL->W):CL->W; - NCL->DM=(mode==HARD_COPY)?duplicate_distance_matrix (CL->DM):CL->DM; - NCL->ktupDM=(mode==HARD_COPY)?duplicate_distance_matrix (CL->ktupDM):CL->ktupDM; - NCL->RunName=CL->RunName; - - if ( mode==HARD_COPY && CL->translation){NCL->translation=vcalloc ((CL->S)->nseq, sizeof (int)); for ( a=0; a< (CL->S)->nseq; a++)NCL->translation[a]=CL->translation[a];} - else{NCL->translation=CL->translation;} - - NCL->out_aln_format=(mode==HARD_COPY)?duplicate_char (CL->out_aln_format, -1, -1):CL->out_aln_format; - NCL->n_out_aln_format=CL->n_out_aln_format; - - /*Packing Sequence: To use with domain analysis*/ - NCL->packed_seq_lu=(mode==HARD_COPY)?duplicate_int (CL->packed_seq_lu, -1, -1):CL->packed_seq_lu; - /*DATA*/ - if (CL->fp)(mode==HARD_COPY)?NCL->fp=vtmpfile():CL->fp; - - if ( mode==HARD_COPY) - { - for ( a=0; a< CL->ne; a++) - for ( b=0; b< CL->entry_len; b++) vwrite_clist(NCL, a, b, vread_clist(CL, a, b)); - } - else NCL->L=CL->L; - - - if ( mode==HARD_COPY) - { - NCL->M=copy_int ( CL->M,NCL->M,-1, -1); - } - else - NCL->M=CL->M; - - - /*List Information*/ - NCL->ne=CL->ne; - sprintf ( NCL->list_name, "%s", CL->list_name); - NCL->entry_len=CL->entry_len; - NCL->el_size=CL->el_size; - - /*Normalisation information*/ - NCL->filter_lib=CL->filter_lib; - NCL->normalise=CL->normalise; - NCL->overweight=CL->overweight; - NCL->max_ext_value=CL->max_ext_value; - NCL->max_value=CL->max_value; - - /*Pair wise alignment method*/ - NCL->pw_parameters_set=CL->pw_parameters_set; - NCL->gop=CL->gop; - NCL->f_gop=CL->f_gop; - NCL->gep=CL->gep; - NCL->f_gep=CL->f_gep; - - NCL->nomatch=CL->nomatch; - - NCL->TG_MODE=CL->TG_MODE; - NCL->F_TG_MODE=CL->F_TG_MODE; - - sprintf ( NCL->dp_mode, "%s", CL->dp_mode); - NCL->maximise=CL->maximise; - sprintf ( NCL->matrix_for_aa_group, "%s", CL->matrix_for_aa_group); - sprintf ( NCL->method_matrix, "%s", CL->method_matrix); - - NCL->diagonal_threshold=CL->diagonal_threshold; - NCL->ktup=CL->ktup; - - NCL->use_fragments=CL->use_fragments; - NCL->fasta_step=CL->fasta_step; - NCL->lalign_n_top=CL->lalign_n_top; - NCL->sw_min_dist=CL->sw_min_dist; - NCL->matrices_list=(mode==HARD_COPY)?duplicate_char (CL->matrices_list, -1, -1):CL->matrices_list; - NCL->n_matrices=CL->n_matrices; - - sprintf (NCL->distance_matrix_mode, "%s", CL->distance_matrix_mode); - sprintf (NCL->distance_matrix_sim_mode, "%s", CL->distance_matrix_sim_mode); - - sprintf (NCL->tree_mode, "%s", CL->tree_mode); - NCL->tree_aln=(mode==HARD_COPY)?copy_aln (CL->tree_aln, NULL):CL->tree_aln; - /*Functions used for dynamic programming and Evaluation*/ - NCL->no_overaln=CL->no_overaln; - NCL->profile_mode=CL->profile_mode; - sprintf ( NCL->profile_comparison, "%s",CL->profile_comparison); - NCL->get_dp_cost=CL->get_dp_cost; - NCL->evaluate_residue_pair=CL->evaluate_residue_pair; - NCL->pair_wise=CL->pair_wise; - - NCL->weight_field=CL->weight_field; - NCL->max_n_pair=CL->max_n_pair; - - /*threading parameters*/ - NCL->Prot_Blast=(mode==HARD_COPY)?duplicate_blast_param ( CL->Prot_Blast):CL->Prot_Blast; - NCL->DNA_Blast =(mode==HARD_COPY)?duplicate_blast_param ( CL->DNA_Blast):CL->DNA_Blast; - NCL->Pdb_Blast =(mode==HARD_COPY)?duplicate_blast_param ( CL->Pdb_Blast):CL->Pdb_Blast; - NCL->TC =(mode==HARD_COPY)?duplicate_TC_param ( CL->TC):CL->TC; - - /*Split parameters*/ - NCL->split=CL->split; - NCL->split_nseq_thres= CL->split_nseq_thres; - NCL->split_score_thres= CL->split_score_thres; - /*Structural status*/ - NCL->check_pdb_status=CL->check_pdb_status; - /*log*/ - sprintf ( NCL->method_log, "%s",CL->method_log); - sprintf ( NCL->evaluate_mode, "%s",CL->evaluate_mode); - - /*Parameters for domain extraction*/ - NCL->moca=(mode==HARD_COPY)?duplicate_moca ( CL->moca):CL->moca; - - - - /*Functions for hiding forbiden pairs of residues*/ - /* Copy only for soft_copy*/ - if (mode==SOFT_COPY) - { - NCL->forbiden_pair_list=CL->forbiden_pair_list; - } - /*extention properties:*/ - NCL->nseq_for_quadruplet=CL->nseq_for_quadruplet; - if (mode==HARD_COPY && CL->seq_for_quadruplet) - {NCL->seq_for_quadruplet=vcalloc ( S->nseq, sizeof(int)); - for ( a=0; a< S->nseq; a++) - NCL->seq_for_quadruplet[a]=CL->seq_for_quadruplet[a]; - } - else if (mode==SOFT_COPY) - { - NCL->seq_for_quadruplet=CL->seq_for_quadruplet; - } - - /*extention properties: Do only a soft copy*/ - /* Not To be copied yet */ - if ( mode==SOFT_COPY) - { - NCL->extend_jit=CL->extend_jit; - NCL->extend_threshold=CL->extend_threshold; - sprintf ( NCL->extend_clean_mode, "%s", CL->extend_clean_mode); - sprintf ( NCL->extend_compact_mode, "%s", CL->extend_compact_mode); - } - - /*Lookup table parameteres*/ - NCL->chunk= CL->chunk; - /* Do NOT copy NCL->seq_indexed, NCL->start_index, NCL->max_L_len, NCL->chunk*/ - /* - if ( mode==SOFT_COPY) - { - NCL->seq_indexed=CL->seq_indexed; - NCL->start_index=CL->start_index; - NCL->end_index=CL->start_index; - NCL->max_L_len=CL->max_L_len; - } - */ - /*PDB STRUCTURE ALIGNMENTS*/ - /* Do only a soft copy */ - if ( mode==SOFT_COPY) - { - NCL->T=CL->T; - } - /*MISC*/ - NCL->cpu=CL->cpu; - NCL->local_stderr=CL->local_stderr; - sprintf (NCL->multi_thread, "%s", CL->multi_thread); - - return NCL; - } -Constraint_list *free_constraint_list_full (Constraint_list *CL) -{ - free_sequence (free_constraint_list (CL), -1); - return NULL; -} -Sequence *free_constraint_list (Constraint_list *CL) - { - Sequence *S; - int a, b; - Constraint_list *pCL; - - - /*Prepare the selective freeing of the CL data structure: - If the CL has been obtained from copy, every pointer that is identical to the parent CL (CL->pCL) - will not be saved. - */ - - - if ( !CL)return NULL; - else S=CL->S; - - if ( CL->copy_mode==SOFT_COPY && !CL->pCL) - {vfree(CL); return S;} - else if ( CL->copy_mode==SOFT_COPY) - { - - pCL=CL->pCL; - CL->L=NULL; - - if ( CL->M ==pCL->M )CL->M=NULL; - - if (CL->start_index ==pCL->start_index )CL->start_index=NULL; - if (CL->end_index ==pCL->end_index )CL->end_index=NULL; - if (CL->residue_index ==pCL->residue_index )CL->residue_index=NULL; - - if ( CL->fp ==pCL->fp )CL->fp=NULL; - if ( CL->matrices_list ==pCL->matrices_list )CL->matrices_list=NULL; - - - if ( CL->STRUC_LIST ==pCL->STRUC_LIST )CL->STRUC_LIST=NULL; - if ( CL->W ==pCL->W )CL->W=NULL; - if ( CL->DM ==pCL->DM )CL->DM=NULL; - if ( CL->ktupDM ==pCL->ktupDM )CL->ktupDM=NULL; - - - if ( CL->translation ==pCL->translation )CL->translation=NULL; - if ( CL->moca ==pCL->moca )CL->moca=NULL; - if ( CL->Prot_Blast ==pCL->Prot_Blast )CL->Prot_Blast=NULL; - if ( CL->DNA_Blast ==pCL->DNA_Blast )CL->DNA_Blast=NULL; - if ( CL->Pdb_Blast ==pCL->Pdb_Blast )CL->Pdb_Blast=NULL; - if ( CL->seq_for_quadruplet ==pCL->seq_for_quadruplet )CL->seq_for_quadruplet=NULL; - if ( CL->TC ==pCL->TC )CL->TC=NULL; - - } - - - /*End of selective freeing of the CL data structure*/ - - - - if ( CL->L)vfree (CL->L); - if ( CL->M)free_int (CL->M, -1); - if ( CL->fp)vfclose (CL->fp); - if ( CL->matrices_list)free_char(CL->matrices_list,-1); - - - if ( CL->start_index)free_int ( CL->start_index,-1); - if ( CL->end_index)free_int ( CL->end_index,-1); - - if ( CL->residue_index) - { - for ( a=0; a< (CL->S)->nseq; a++) - { - for ( b=0; b<=(CL->S)->len[a]; b++) - vfree(CL->residue_index[a][b]); - vfree (CL->residue_index[a]); - } - vfree(CL->residue_index); - } - - - if ( CL->STRUC_LIST)free_sequence ( CL->STRUC_LIST, (CL->STRUC_LIST)->nseq); - if ( CL->W)free_weights (CL->W); - - CL->DM=free_distance_matrix (CL->DM); - CL->ktupDM=free_distance_matrix (CL->ktupDM); - - if ( CL->translation)vfree(CL->translation); - if ( CL->moca)free_moca (CL->moca); - if ( CL->Prot_Blast)free_blast_param ( CL->Prot_Blast); - if ( CL->DNA_Blast) free_blast_param ( CL->DNA_Blast); - if ( CL->Pdb_Blast) free_blast_param ( CL->Pdb_Blast); - if ( CL->TC) free_TC_param ( CL->TC); - - if (CL->seq_for_quadruplet)vfree (CL->seq_for_quadruplet); - - vfree(CL); - return S; - } - -Distance_matrix * free_distance_matrix ( Distance_matrix *DM) -{ - if (!DM)return NULL; - free_int ( DM->similarity_matrix,-1); - free_int ( DM->distance_matrix,-1); - free_int ( DM->score_similarity_matrix,-1); - vfree (DM); - return NULL; -} -Distance_matrix * duplicate_distance_matrix ( Distance_matrix *DMin) -{ - Distance_matrix *DM; - if (!DMin) return NULL; - - DM=vcalloc ( 1, sizeof (Distance_matrix)); - DM->similarity_matrix=duplicate_int ( DMin->similarity_matrix, -1, -1); - DM->distance_matrix=duplicate_int ( DMin->distance_matrix, -1, -1); - DM->score_similarity_matrix=duplicate_int ( DMin->score_similarity_matrix, -1, -1); - return DM; -} - -/************************************************************************/ -/* */ -/* MOCA Functions */ -/* */ -/* */ -/************************************************************************/ -Moca * duplicate_moca ( Moca *m) - { - Moca *nm; - - if ( m==NULL)return m; - - nm=vcalloc ( 1, sizeof (Moca)); - - nm->moca_scale=m->moca_scale; - nm->evaluate_domain=m->evaluate_domain; - nm->moca_threshold=m->moca_threshold; - nm->cache_cl_with_domain=m->cache_cl_with_domain; - if ( m->forbiden_residues)nm->forbiden_residues=copy_int (m->forbiden_residues,nm->forbiden_residues, -1, -1); - nm->make_nol_aln=m->make_nol_aln; - - - return nm; - } -Moca * free_moca ( Moca *m) - { - if ( m->forbiden_residues)free_int ( m->forbiden_residues, -1); - vfree ( m); - return NULL; - } -/************************************************************************/ -/* */ -/* TC_param Functions */ -/* */ -/* */ -/************************************************************************/ -TC_param * duplicate_TC_param ( TC_param*B) -{ - TC_param *N; - N=vcalloc (1, sizeof ( TC_param)); - memcpy(B, N, sizeof(TC_param)); - return N; - } -TC_param * free_TC_param ( TC_param*B) -{ - vfree (B); - return NULL; -} -/************************************************************************/ -/* */ -/* Blast_param Functions */ -/* */ -/* */ -/************************************************************************/ -Blast_param * duplicate_blast_param ( Blast_param*B) -{ - Blast_param *N; - N=vcalloc (1, sizeof ( Blast_param)); - sprintf ( N->blast_server, "%s", B->blast_server); - sprintf ( N->db, "%s", B->db); - N->min_id=B->min_id; - N->max_id=B->min_id; - N->min_cov=B->min_cov; - return N; -} -Blast_param * free_blast_param ( Blast_param*B) -{ - vfree (B); - return NULL; -} - -/************************************************************************/ -/* */ -/* PDB Functions */ -/* */ -/* */ -/************************************************************************/ -Structure* declare_structure ( int n, char **array) - { - Structure *S; - int a; - - S=vcalloc (1, sizeof (Structure)); - S->n_fields=1; - S->nseq=n; - - S->struc=vcalloc ( n, sizeof (int**)); - S->len=vcalloc ( n, sizeof (int)); - for ( a=0; a< n; a++) - { - S->len[a]=strlen(array[a]); - S->struc[a]=declare_int ( strlen ( array[a])+2, 1); - } - return S; - } - -Structure *extend_structure ( Structure *S) - { - int a, b; - - - for ( a=0; a< S->nseq; a++) - { - for ( b=0; b< S->len[a]; b++) - S->struc[a][b]=vrealloc ( S->struc[a][b],( S->n_fields+1)*sizeof (int)); - } - S->n_fields++; - return S; - } - -Sequence * declare_sequence ( int min, int max, int nseq) - { - Sequence *LS; - - - - LS=vcalloc (1, sizeof ( Sequence)); - - LS->seq_comment=declare_char ( nseq,COMMENT_SIZE); - LS->aln_comment=declare_char ( nseq,COMMENT_SIZE); - - LS->file=declare_char( nseq,STRING+1); - LS->seq=declare_char ( nseq, max+1); - LS->name=declare_char( nseq,MAXNAMES+1); - - LS->len=vcalloc ( nseq, sizeof (int)); - LS->max_len=max; - LS->min_len=min; - LS->nseq=nseq; - LS->max_nseq=nseq; - LS->type=vcalloc(30, sizeof (char)); - LS->T=declare_arrayN(2, sizeof (Template), nseq, 1); - LS->dc=declare_int (nseq, 2); - return LS; - } -Sequence * realloc_sequence (Sequence *OUT, int new_nseq, int max_len) - { - - - if ( new_nseqmax_nseq)return OUT; - - OUT->min_len =MIN(OUT->min_len,max_len); - OUT->max_len =MAX(OUT->max_len,max_len); - OUT->seq_comment =new_realloc_char ( OUT->seq_comment, new_nseq,COMMENT_SIZE); - OUT->aln_comment =new_realloc_char ( OUT->aln_comment, new_nseq,COMMENT_SIZE); - - OUT->seq =new_realloc_char ( OUT->seq, new_nseq,OUT->max_len+1); - OUT->name =new_realloc_char ( OUT->name, new_nseq,MAXNAMES+1); - - - OUT->file =new_realloc_char ( OUT->file, new_nseq,STRING+1); - OUT->len =vrealloc ( OUT->len, (new_nseq+1)*sizeof (int)); - - OUT->T=(Template**)realloc_arrayN (2, (void **)OUT->T,sizeof (Template), new_nseq, 1); - OUT->dc=(int **)realloc_arrayN (2, (void **)OUT->dc,sizeof (int), new_nseq, 2); - OUT->max_nseq=new_nseq; - return OUT; - } - -Sequence * duplicate_sequence (Sequence *S ) - { - Sequence *LS; - int a, b; - - - if (S==NULL)return S; - LS=declare_sequence (S->min_len, S->max_len, S->nseq); - for (b=0, a=0; anseq; a++) - { - if (S->seq && S->seq[a]) - { - - sprintf ( LS->file[b], "%s", S->file[a]); - if ( S->seq_comment && S->seq_comment[a])sprintf ( LS->seq_comment[b], "%s", S->seq_comment[a]); - if ( S->aln_comment && S->aln_comment[a])sprintf ( LS->aln_comment[b], "%s", S->aln_comment[a]); - if ( S->seq && S->seq[a])sprintf ( LS->seq[b], "%s", S->seq[a]); - if ( S->name&& S->name[a])sprintf ( LS->name[b], "%s", S->name[a]); - LS->dc[b][0]=S->dc[a][0]; - LS->dc[b][1]=S->dc[a][1]; - LS->len[b]=S->len[a]; - LS->T[b][0]=S->T[a][0]; - b++; - - } - } - - LS->max_len=S->max_len; - LS->min_len=S->min_len; - LS->nseq=b; - - if ( S->W)LS->W=duplicate_weights (S->W); - sprintf ( LS->type, "%s", S->type); - sprintf ( LS->template_file, "%s", S->template_file); - LS->max_nseq=S->nseq; - - return LS; - } - -void free_sequence ( Sequence *LS, int nseq) - { - - - if ( !LS) return; - - - free_char ( LS->file, -1); - free_char ( LS->seq_comment, -1); - free_char ( LS->aln_comment, -1); - free_char ( LS->seq, -1); - free_char ( LS->name,-1); - free_int (LS->dc, -1); - free_arrayN((void*)LS->T, 2); - vfree (LS->type); - vfree (LS->len); - free_weights (LS->W); - vfree (LS); - - } -/************************************************************************/ -/* */ -/* Weights Functions */ -/* */ -/* */ -/************************************************************************/ -Weights* declare_weights ( int nseq) - { - Weights *W; - - W=vcalloc ( 1, sizeof ( Weights)); - W->comments=vcalloc ( 1000, sizeof (char)); - W->nseq=nseq; - W->mode=vcalloc (FILENAMELEN, sizeof (char)); - W->seq_name= declare_char ( W->nseq*2, 200); - W->PW_SD=declare_float ( W->nseq, W->nseq); - W->PW_ID=declare_float ( W->nseq, W->nseq); - W->SEQ_W=vcalloc ( W->nseq, sizeof ( float)); - return W; - } -Weights* duplicate_weights (Weights *W) - { - Weights *NW; - int a, b, c; - - NW=declare_weights (W->nseq); - sprintf ( NW->comments, "%s", W->comments); - sprintf ( NW->mode, "%s", W->mode); - for (a=0, c=0; a< W->nseq; a++) - { - if ( W->seq_name[a]) - { - sprintf ( NW->seq_name[c], "%s", W->seq_name[a]); - NW->SEQ_W[c]=W->SEQ_W[a]; - for(b=0; b< W->nseq; b++) - { - NW->PW_SD[c][b]=W->PW_SD[a][b]; - NW->PW_ID[c][b]=W->PW_ID[a][b]; - } - c++; - } - } - return NW; - } -Weights* free_weights ( Weights* W) - { - - if ( !W)return NULL; - - vfree(W->comments); - - - vfree(W->mode); - - free_char(W->seq_name, -1); - free_float(W->PW_SD,-1); - free_float(W->PW_ID, -1); - vfree(W->SEQ_W); - vfree(W); - return NULL; - } - - -Alignment* copy_aln ( Alignment *A, Alignment *B) - { - int a, b; - - /* c[100]=10;*/ - - - - if ( A==NULL){free_aln(B); return NULL;} - - - if (B) - B=realloc_alignment2 (B, A->nseq, A->len_aln); - else if ( A->S && A->nseq>(A->S)->nseq) - { - B=declare_aln2(A->nseq, MAX((A->S)->max_len+1, A->len_aln+1)); - B->S=A->S; - } - else - B=declare_aln ((A->S)); - - - -/*SIZES*/ - B->max_len=A->max_len; - B->min_len=A->min_len; - B->declared_len=A->declared_len; - B->max_n_seq=A->max_n_seq; - - B->nseq=A->nseq; - B->len_aln=A->len_aln; - - -/*sequence Information*/ - if ( A->generic_comment) - { - vfree(B->generic_comment); - B->generic_comment=vcalloc (strlen(A->generic_comment)+1, sizeof (char)); - sprintf ( B->generic_comment, "%s", A->generic_comment); - } - if ( (A->S)==NULL){vfree (B->len); B->len=vcalloc ( A->max_n_seq, sizeof (int));} - ga_memcpy_int ( A->len, B->len, B->nseq); - - B->seq_comment=copy_char ( A->seq_comment, B->seq_comment, -1,-1); - B->aln_comment=copy_char ( A->aln_comment, B->aln_comment, -1,-1); - - B->name=copy_char ( A->name, B->name, -1,-1); - - B->file=copy_char ( A->file, B->file, -1,-1); - B->tree_order=copy_char ( A->tree_order, B->tree_order, -1,-1); - B->expanded_order=A->expanded_order; - free_char ( B->seq_al, -1); - B->seq_al=declare_char(B->max_n_seq, B->declared_len); - for ( a=0; a< A->max_n_seq; a++) - { - for ( b=0; b< A->declared_len; b++) - B->seq_al[a][b]=A->seq_al[a][b]; - } - - - - B->order=copy_int ( A->order, B->order, -1, -1); - B->S=A->S; - if (A->seq_cache) - { - B->seq_cache=copy_int ( A->seq_cache, B->seq_cache,-1,-1); - } - - if (A->cdna_cache) - { - B->cdna_cache=copy_int ( A->cdna_cache, B->cdna_cache,-1,-1); - } - - B->P=copy_profile (A->P); - - B->Dp_result=A->Dp_result; - -/*Score*/ - - if ( (A->S)==NULL){vfree (B->score_seq); B->score_seq=vcalloc ( A->max_n_seq, sizeof (int));} - ga_memcpy_int( A->score_seq,B->score_seq,B->nseq); - - - B->score_aln=A->score_aln; - B->score=A->score; - B->cpu=A->cpu; - B->finished=A->finished; - -/*Output Options*/ - B->output_res_num=A->output_res_num; - B->residue_case=A->residue_case; - B->expand=A->expand; - - B->CL=A->CL; - B->random_tag=A->random_tag; - -/*Make the function Recursive */ - if ( A->A) - { - B->A=copy_aln (A->A, NULL); - } - else B->A=NULL; - - return B; - } - -Alignment* shrink_aln ( Alignment *A, int nseq, int *list) - { - Alignment *B=NULL; - int a,seq; - - B=copy_aln (A, B); - for ( a=0; a< nseq; a++) - { - seq=list[a]; - sprintf ( A->seq_comment[a], "%s",B->seq_comment[seq]); - sprintf ( A->aln_comment[a], "%s",B->aln_comment[seq]); - - sprintf ( A->seq_al [a], "%s",B->seq_al [seq]); - A->order[a][0]=B->order[seq][0]; - A->order[a][1]=B->order[seq][1]; - A->order[a][2]=B->order[seq][2]; - A->order[a][3]=B->order[seq][3]; - A->order[a][4]=B->order[seq][4]; - - A->score_seq[a]=B->score_seq[seq]; - A->len[a]=B->len[seq]; - } - A->nseq=nseq; - A->len_aln=strlen (A->seq_al[0]); - free_aln (B); - return A; - } -Alignment* extract_sub_aln2 ( Alignment *B, int ns, char **ls) - { - int *list; - Alignment *A; - - list=name_array2index_array(ls, ns, B->name, B->nseq); - A=extract_sub_aln ( B,ns, list); - vfree (list); - return A; - } -Alignment* extract_sub_aln ( Alignment *B, int nseq, int *list) - { - Alignment *A=NULL; - int a,b,n,seq; - - A=declare_aln2(nseq, B->len_aln+1); - for ( n=0,a=0; a< nseq; a++) - { - seq=list[a]; - if ( seq==-1)continue; - else n++; - sprintf ( A->seq_comment[a], "%s",B->seq_comment[seq]); - sprintf ( A->aln_comment[a], "%s",B->aln_comment[seq]); - sprintf ( A->name[a], "%s",B->name[seq]); - - - for (b=0; b<=B->len_aln; b++)A->seq_al [a][b]=B->seq_al [seq][b]; - A->order[a][0]=B->order[seq][0]; - A->order[a][1]=B->order[seq][1]; - A->order[a][2]=B->order[seq][2]; - A->order[a][3]=B->order[seq][3]; - A->order[a][4]=B->order[seq][4]; - - A->score_seq[a]=B->score_seq[seq]; - A->len[a]=B->len[seq]; - } - A->nseq=n; - A->len_aln=B->len_aln; - return A; - } - -Alignment *declare_aln2 ( int nseq, int len) - { - Sequence *S; - Alignment *A; - - S=vcalloc ( 1, sizeof ( Sequence)); - S->nseq=nseq; - S->max_len=len; - - A=declare_aln (S); - A->S=NULL; - vfree(S); - return A; - } - - - -Alignment *declare_aln ( Sequence *S){return declare_Alignment(S);} - -Alignment *declare_Alignment ( Sequence *S) - { - Alignment *LA; - int a; - - /*ordre: - [x][0]= which is the xth seq of aln - [x][1]= how many deleted residues before the first one - */ - - - LA=vcalloc (1, sizeof ( Alignment)); - aln_stack (LA, DECLARE_ALN); - if ( S==NULL) - { - LA->declared_len=MAX_LEN_ALN; - LA->max_n_seq=MAX_N_SEQ; - } - else - { - LA->declared_len=2*S->max_len+1; - LA->max_n_seq=S->nseq+1; - } - LA->S=S; - - - LA->seq_comment=declare_char (LA->max_n_seq, COMMENT_SIZE); - LA->aln_comment=declare_char (LA->max_n_seq, COMMENT_SIZE); - - - LA->seq_al=declare_char ( LA->max_n_seq,LA->declared_len ); - LA->name=declare_char (LA->max_n_seq, MAXNAMES+1); - - - LA->file=declare_char (LA->max_n_seq, STRING); - LA->tree_order=declare_char (LA->max_n_seq, STRING); - LA->order= declare_int (LA->max_n_seq , 5); - //order[a][0]: sequence index in S - //order[a][1]: offset of the sequence - //order[a][2]: used by sw_gotoh_pair_wise - //order[a][3]: used by sw_gotoh_pair_wise - //order[a][4]: weight, -1 - LA->score_seq= vcalloc (LA->max_n_seq, sizeof (int)); - - for ( a=0; a< LA->max_n_seq; a++)LA->order[a][0]=a; - - LA->len_aln=0; - LA->score_aln=0; - LA->len=vcalloc (LA->max_n_seq, sizeof (int)); - - if (S && S->name)for ( a=0; anseq; a++) - { - sprintf ( LA->name[a], "%s", S->name[a]); - - } - - return LA; - - } -Alignment * realloc_aln ( Alignment *A, int new_len){return realloc_alignment(A, new_len);} -Alignment * realloc_alignment ( Alignment *A, int new_len) - { - if (A==NULL)A=declare_Alignment (NULL); - - return realloc_alignment2( A, A->max_n_seq,new_len); - } - -Alignment * realloc_aln2 ( Alignment *A, int n_nseq, int n_len){return realloc_alignment2(A, n_nseq, n_len);} - - - -Alignment * realloc_alignment2 ( Alignment *A, int n_nseq, int n_len) - { - int a; - int len, nseq; - int delta_len, delta_nseq; - - if ( A==NULL) A=declare_Alignment(NULL); - - n_len++; - n_nseq++; - - len=A->declared_len; - nseq=A->max_n_seq; - - n_len=MAX(len, n_len); - n_nseq=MAX(nseq,n_nseq); - delta_nseq=MAX(0,n_nseq-nseq); - delta_len =MAX(0,n_len-len); - - if ( delta_nseq<=0 && delta_len<=0)return A; - - - else - { - A->len =vrealloc( A->len , sizeof (int)*n_nseq); - for (a=nseq; a< n_nseq; a++)A->len[a]=0; - - A->declared_len =n_len; - A->max_n_seq =n_nseq; - - - A->seq_comment=new_realloc_char ( A->seq_comment, n_nseq, -1); - A->aln_comment=new_realloc_char ( A->aln_comment, n_nseq, -1); - - A->name =new_realloc_char ( A->name, n_nseq, -1); - - - A->file =new_realloc_char ( A->file, n_nseq, -1); - - A->tree_order =new_realloc_char ( A->tree_order, n_nseq, -1); - A->seq_al =new_realloc_char ( A->seq_al, n_nseq, n_len); - A->order =new_realloc_int ( A->order, n_nseq, -1); - - if ( A->seq_cache) A->seq_cache=new_realloc_int ( A->seq_cache, n_nseq,n_len); - if ( A->cdna_cache)A->cdna_cache=new_realloc_int ( A->cdna_cache, n_nseq,n_len); - - - A->score_seq =vrealloc( A->score_seq, sizeof (int)*(n_nseq)); - for ( a=nseq; a< n_nseq; a++)A->score_seq[a]=0; - - - - } - return A; - } - - -long aln_stack (Alignment *A, int mode) -{ - static long *list; - static int size; - static int max_size; - - - if (A==NULL) return 0; - else if ( mode==DECLARE_ALN) - { - if ( size==max_size) - { - max_size+=1000; - list=vrealloc (list, max_size*sizeof (long)); - } - list[size++]=(long)A; - return 0; - } - else if (mode==FREE_ALN) - { - int a, b; - for (a=0; aS field (sequences of A)*/ - - Sequence *S; - //aln_stack checks the alignment has not already been freed - if ( LA==NULL || !aln_stack(LA,FREE_ALN)){return NULL;} - - S=LA->S; - free_char ( LA->file, -1); - free_char ( LA->seq_al, -1); - free_int ( LA->seq_cache, -1); - free_int ( LA->cdna_cache, -1); - free_char ( LA->name,-1); - - free_char ( LA->tree_order,-1); - vfree ( LA->generic_comment); - free_char ( LA->seq_comment, -1); - free_char ( LA->aln_comment, -1); - - free_int ( LA->order, -1); - - vfree ( LA->score_seq); - vfree ( LA->len); - - free_profile (LA->P); - if ( LA->A){free_Alignment (LA->A);LA->A=NULL;} - - - vfree ( LA); - return S; - } - -Alignment * update_aln_random_tag ( Alignment *A) -{ - static int tag; - if ( !A) return A; - - A->random_tag=++tag; - return A; -} - -Profile *copy_profile (Profile *P1) -{ - - Profile *P; - - if ( !P1) return NULL; - P=declare_profile ( P1->alphabet, P1->max_len); - P->count=copy_int (P1->count, P->count, -1, -1); - P->count2=copy_int (P1->count2, P->count2, -1, -1); - P->count3=copy_int (P1->count3, P->count3, -1, -1); - - return P; - -} - - -Profile *declare_profile(char *alphabet, int len) -{ - Profile *P; - P=vcalloc ( 1, sizeof ( Profile)); - P->alp_size=strlen(alphabet); - P->max_len=len; - P->alphabet=vcalloc ( strlen (alphabet)+2, sizeof (char)); - sprintf ( P->alphabet, "%s", alphabet); - - P->count=declare_int( P->alp_size+2, len); - P->count2=declare_int(100, len); - P->count3=declare_int(100, len); - - return P; -} -Profile * free_profile ( Profile *P) -{ - if (!P) return NULL; - else - { - vfree (P->alphabet); - free_int ( P->count, -1); - free_int ( P->count2, -1); - vfree (P); - } - return NULL; -} - - -/************************************************************************/ -/* */ -/* ALLOCATION */ -/* */ -/* */ -/************************************************************************/ - - -double alloc_mem; -double max_mem; -double tot_mem; -Memcontrol *memlast; - -FILE* print_mem_usage (FILE *fp, char *comment) -{ - fprintf ( fp, "# %s Memory Usage: Current= %.3f Mb, Max= %.3f Mb\n", comment,(float)((float)alloc_mem/(1024*1024)),(float)((float)tot_mem/(1024*1024)) ); - return fp; -} -void set_max_mem (int m) -{ - max_mem=m*1024*1024; -} - -int verify_memory (int s) -{ - alloc_mem+=s; - - tot_mem=(alloc_mem>tot_mem)?alloc_mem:tot_mem; - - if (max_mem && alloc_mem>max_mem) - { - fprintf (stderr, "\n%s Requires Too Much Memory: %d Megabytes [FATAL:%s]\n", PROGRAM,(int)(alloc_mem/1024*1024),PROGRAM); - fprintf (stderr, "Tip: Rerun your Job with a smaller dataset\n"); - - myexit (EXIT_FAILURE); - } - else - return 1; - return 0; - -} - -int my_assert ( void *p, int index) -{ - static int warning; - - if (!warning) - { - fprintf ( stderr, "\n****************************************************************\n"); - fprintf ( stderr, "\n DEBUG MODE [Rebuild For Better Performances] \n"); - fprintf ( stderr, "\n*****************************************************************\n"); - warning=1; - } - - if ( !is_dynamic_memory(p)) return 1; - else if ( read_array_size_new (p)<=index) - { - fprintf ( stderr, "\nFaulty Allocation: Size=%d Access=%d\n", read_array_size (p,0),index); - return 0; - } - else - { - return 1; - } -} - - - -void * vmalloc ( size_t size) - { - void * x; - Memcontrol *M; - - verify_memory (size+2*sizeof (Memcontrol)); - - if ( size==0) - return NULL; /*crash ("\n0 bytes in vmalloc\n");*/ - else - { - x= malloc (size + 2*sizeof (Memcontrol)); - - if ( x==NULL) - { - printf_exit (EXIT_FAILURE,stderr, "\nFAILED TO ALLOCATE REQUIRED MEMORY (vmalloc)\n"); - - } - else - { - M=x; - M[0].size=size; - M[0].size_element=0; - sprintf ( M[0].check, "dy"); - M+=2; - x=M; - return x; - } - } - return NULL;} - - - -void *vcalloc (size_t nobj, size_t size) -{ - return sub_vcalloc (nobj,size, MEMSET0); -} -void *vcalloc_nomemset ( size_t nobj, size_t size) -{ - return sub_vcalloc (nobj, size, NO_MEMSET0); -} -void *sub_vcalloc ( size_t nobj, size_t size, int MODE) - { - void *x; - Memcontrol *M; - - if ( nobj<=0 || size<=0)return NULL;/*crash ("\n0 bytes in vmalloc\n");*/ - else x=vmalloc (nobj*size); - - - M=x;M-=2;M[0].size_element=size;M+=2;x=M; - - if ( x==NULL) - { - crash ( "\nFAILED TO ALLOCATE REQUIRED MEMORY (vcalloc)\n"); - return NULL; - } - else - { - if ( MODE==MEMSET0) - { - x=memset (x,0, nobj*size); - } - else - { - if (nobj)x=memset (x, 0, size); - } - return x; - } - } - -void *vrealloc ( void *p, size_t size) - { - void *x; - Memcontrol *M; - size_t i_size; - int a; - - - if ( p==NULL) - { - x=vmalloc (size); - memset (x, 0, size); - - return x; - } - else - { - M=p; - M-=2; - i_size=M[0].size; - p=M; - - - if ( size<=0){return NULL;vfree (p);return NULL;} - else - { - verify_memory (size - i_size); - x=realloc ( p, size+2*sizeof(Memcontrol)); - - if ( x==NULL){crash ( "\nFAILED TO ALLOCATE REQUIRED MEMORY (realloc)\n");return NULL;} - M=x; - M[0].size=size; - M+=2; - x=M; - for ( a=i_size; a< size; a++)((char*)x)[a]=0; - return x; - } - } - return NULL; - } -void vfree ( void *p) - { - Memcontrol *M; - size_t size; - - if ( !p)return; - else - { - M=p; - M-=2; - size=M[0].size; - - p=M; - free(p); - - verify_memory (-(size+2*sizeof(Memcontrol))); - } - } -void vfree_all (void *p) -{ - Memcontrol *n; - while (memlast) - { - n=memlast->p; - vfree (memlast+2); - memlast=n; - } -} -/*********************************************************************/ -/* */ -/* SIZES */ -/* */ -/* */ -/*********************************************************************/ -#define WRITE_SIZE(type,function)\ -void function ( int x, type *array, int os)\ - {\ - fprintf(stderr, "\nwrite_size is a deprecated function [Warning:%s]\n", PROGRAM);return;\ - } -WRITE_SIZE(short,write_size_short) -WRITE_SIZE(char,write_size_char) -WRITE_SIZE(int,write_size_int) -WRITE_SIZE(float,write_size_float) -WRITE_SIZE(double,write_size_double) - -#define READ_ARRAY_SIZE(type, function)\ -int function (void *array, size_t size)\ - {\ - return read_array_size (array, size);\ - } -READ_ARRAY_SIZE(short,read_size_short) -READ_ARRAY_SIZE(char,read_size_char) -READ_ARRAY_SIZE(int,read_size_int) -READ_ARRAY_SIZE(float,read_size_float) -READ_ARRAY_SIZE(double,read_size_double) - - -int read_array_size_new (void *array) -{ - return read_array_size ( array, 0); -} -int read_array_size (void *array, size_t size) - { - Memcontrol *p; - if (array==NULL)return 0; - p=(Memcontrol *)array; - p-=2; - if ( p[0].size_element ==0 && size==0) - { - fprintf ( stderr, "\nERROR in read_array_size: trying to read the size of a malloced block"); - } - else if ( size ==0) return (int)p[0].size/p[0].size_element; - - return (int)p[0].size/size; - - } -int is_dynamic_memory ( void *array) -{ - Memcontrol *p; - if (array==NULL)return 0; - p=(Memcontrol *)array; - p-=2; - if ( strm (p[0].check, "dy"))return 1; - return 0; -} - -/************************************************************************/ -/* */ -/* DECLARE 2d ARRAYS */ -/* */ -/* */ -/************************************************************************/ - -void * free_arrayN(void *p, int n) -{ - int a, s; - void **i; - - - if ( p==NULL) return NULL; - else if ( n==1)vfree ((void *)p); - else - { - i=(void**)p; - s=read_array_size ( (void *)p, sizeof ( void *)); - for ( a=0; a< s; a++)free_arrayN ((void *)i[a], n-1); - vfree (p); - } - return NULL; -} - -void * declare_arrayNnomemset (int ndim, size_t size, ...) -{ - va_list ap; - int *array; - void **p; - int a; - - va_start (ap, size); - - array=vcalloc (ndim, sizeof (int)); - for ( a=0; a< ndim; a++) - { - array[a]=va_arg (ap,int); - if ( array[a]<0){va_end(ap);return NULL;} - - } - va_end (ap); - - if ( ndim==2) - { - - p=vcalloc_nomemset (array[0], sizeof ( void*)); - for (a=0; a< array[0]; a++) - { - p[a]=vcalloc_nomemset (array[1], size); - } - } - else - { - p=declare_arrayN2nomemset (ndim, array, size); - } - vfree (array); - return p; -} - -void *declare_arrayN2nomemset ( int ndim, int *A, size_t size) -{ - int a; - void **p; - - if ( ndim>1) - { - p=vcalloc_nomemset (A[0], sizeof (void*)); - for ( a=0; a1) - { - p=vcalloc_nomemset (A[0], sizeof (void*)); - for ( a=0; a1) - { - o=read_array_size (p,sizeof (void*)); - if (A[0]>o)p=vrealloc (p, sizeof (void*)*A[0]); - n=(A[0]==-1)?o:A[0]; - for ( a=0; ao)p=vrealloc (p, size*A[0]); - } - return p; -} - - - -void ** realloc_array (void **array,size_t size, int first, int second, int ext1, int ext2) -{ - int a; - int d1, d2; - if ( array==NULL)return declare_array (((first==-1)?0:first)+ext1, ((second==-1)?0:second)+ext2, size); - else if ( first==-1) - { - first=read_array_size (array, sizeof (void *)); - } - if (second==-1)second=read_array_size(array[0], size); - - d1=first+ext1; - d2=second+ext2; - - for ( a=d1; a0) - { - array=vrealloc ( array, (sizeof (Alignment*))*(first+ext1)); - for ( a=first; a=(first+ext1);a--)free_Alignment (array[a]); - array=vrealloc ( array, (sizeof (Alignment*))*(first+ext1)); - } - return array; - } - -/************************************************************************/ -/* */ -/* free 2d ARRAYS */ -/* */ -/* */ -/************************************************************************/ -#define FREE_ARRAY(type,wf,rf,function) \ -type ** function (type **array, int first)\ - {\ - return free_arrayN((void*)array, 2);\ - } -FREE_ARRAY(short,write_size_short,read_size_short,free_short) -FREE_ARRAY(char,write_size_char,read_size_char,free_char) -FREE_ARRAY(int,write_size_int,read_size_int,free_int) -FREE_ARRAY(float,write_size_float,read_size_float,free_float) -FREE_ARRAY(double,write_size_double,read_size_double,free_double) - - - -Alignment ** free_aln_array (Alignment **array) - { - int a; - int len; - - - if ( array==NULL)return NULL; - len=read_array_size ( array, sizeof (Alignment *)); - for ( a=1; a< len; a++)free_Alignment(array[a]); - vfree ( array); - return NULL; - } - -Fname *declare_fname (int size) - { - Fname *F; - - size+=strlen (get_home_4_tcoffee())+FILENAMELEN+1; - - F=vcalloc ( 1, sizeof (Fname)); - F->name =vcalloc ( size, sizeof (char)); - F->path =vcalloc ( size, sizeof (char)); - F->suffix=vcalloc ( size, sizeof (char)); - F->full=vcalloc ( size, sizeof (char)); - return F; - } - -Fname *free_fname ( Fname *F) - { - vfree (F->name); - vfree (F->path); - vfree (F->suffix); - return NULL; - } -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_domain_constraints_list.c b/binaries/src/tcoffee/t_coffee_source/util_domain_constraints_list.c deleted file mode 100644 index 343fa35..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_domain_constraints_list.c +++ /dev/null @@ -1,399 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -/*********************************************************************/ -/* */ -/* MASKING LIST FUNCTIONS */ -/* */ -/* */ -/*********************************************************************/ -Constraint_list * mask_list_with_aln (Alignment *A,int start, int len,Constraint_list *CL, int new_value) - { - int a, c,d; - static int *entry; - int s1, s2, r1, r2; - int **pos; - int **cache; - int max_nseq; - int max_len; - - - - if ( A==NULL || A->len_aln==0 || A->nseq<=1)return CL; - if ( entry==NULL) entry=vcalloc (CL->entry_len , CL->el_size); - - cache=declare_int (return_max_int (A->order,read_size_int ( A->order,sizeof (int*)),0)+1,return_max_int (A->order,read_size_int ( A->order,sizeof (int)),1)+A->len_aln+1); - - max_nseq=return_max_int (A->order,read_size_int ( A->order,sizeof (int*)),0)+1; - max_len =return_max_int (A->order,read_size_int ( A->order,sizeof (int*)),1)+A->len_aln+1; - - pos=aln2pos_simple(A, A->nseq); - - for (a=0; a< A->nseq; a++) - for (c=start; c< (start+len); c++) - if ( pos[a][c]>0)cache[A->order[a][0]][pos[a][c]]=1; - - - if (!CL->M) - { - for ( d=0; dne; d++) - { - s1=vread_clist(CL, d, SEQ1); - s2=vread_clist(CL, d, SEQ2); - r1=vread_clist(CL, d, R1); - r2=vread_clist(CL, d, R2); - if ( s1>=max_nseq); - else if (r1>=max_len); - else if ( cache[s1][r1]==1) - { - mask_entry(CL,d,new_value); - } - - if ( s2>=max_nseq); - else if (r2>=max_len); - else if ( cache[s2][r2]==1) - { - mask_entry(CL,d,new_value); - } - } - - sort_constraint_list_inv (CL,0, CL->ne); - sort_constraint_list (CL,0, CL->ne); - free_int ( cache, -1); - } - else if ( CL->M) - { - - for (a=0; a< A->nseq; a++) - for (c=start; c< (start+len); c++) - if ( pos[a][c]>0) - { - vwrite_clist(CL,30+A->order[a][0],pos[a][c],UNDEFINED); - } - } - free_int (pos, -1); - return CL; - } -Constraint_list* mask_list_with_aln_pair (Alignment *A,int start, int len ,Constraint_list *CL,int new_value) - { - int a, b, p; - int *entry; - - - int l1, l2, r1, r2, s1, s2; - int x, y; - - if ( A==NULL || A->len_aln==0 || A->nseq<=1)return CL; - - if (CL->M) - { - fprintf ( stderr, "\nERROR: AA matrix cannot be masked with mask_list_with_aln_pair"); - myexit (EXIT_SUCCESS); - } - - - entry=vcalloc (CL->entry_len, sizeof (int)); - - for ( a=0; a< A->nseq-1; a++) - { - for (l1=A->order[a][1]+1,p=0 ; pseq_al[a][p]); - for (r1=l1-1 ,p=start; p<(start+len) ; p++)r1+=!is_gap(A->seq_al[a][p]); - s1=A->order[a][0]; - - for ( b=a+1; b< A->nseq; b++) - { - for (l2=A->order[b][1]+1,p=0 ; pseq_al[b][p]); - for (r2=l2-1 ,p=start; p<(start+len) ; p++)r2+=!is_gap(A->seq_al[b][p]); - s2=A->order[b][0]; - - - for ( x=l1; x<=r1; x++) - { - - for ( y=l2; y<=r2; y++) - { - - set_int(entry,4,x,R1,y,R2,s1,SEQ1,s2,SEQ2); - if ( (main_search_in_list_constraint ( entry,&p,4,CL))!=NULL) - { - mask_entry(CL,p,new_value); - } - set_int(entry,4,y,R1,x,R2,s2,SEQ1,s1,SEQ2); - if ( (main_search_in_list_constraint ( entry,&p,4,CL))!=NULL) - { - - mask_entry(CL,p,new_value); - } - } - } - } - } - - - vfree(entry); - - return CL; - } - -Constraint_list *mask_entry( Constraint_list *CL, int p, int new_value) - { - vwrite_clist(CL, p, WE, new_value); - vwrite_clist(CL, p, CONS, new_value); - vwrite_clist(CL, p, MISC, new_value); - return CL; - } -/*********************************************************************/ -/* */ -/* SEQUENCE CONCATENATION */ -/* */ -/* */ -/*********************************************************************/ -Constraint_list *prepare_list_and_seq4sw(Constraint_list *I, int n_seq, char **seq_name) - { - int a, b; - int len,l; - char **long_seq=NULL; - int **translation; - char **name; - int s1, s2; - Constraint_list *Out=NULL; - Sequence *S_Out; - - translation=declare_int ( (I->S)->nseq,2); - name=declare_char (1,STRING); - long_seq=declare_char(1, STRING); - - for (len=0,a=0; a< (I->S)->nseq; a++) - { - if((b=name_is_in_list ((I->S)->name[a],seq_name, n_seq, 100))!=-1) - { - l=strlen((I->S)->seq[a])+1; - long_seq[0]=vrealloc(long_seq[0],(len+l+1)*sizeof(char)); - long_seq[0]=strcat(long_seq[0], (I->S)->seq[a]); - long_seq[0]=strcat(long_seq[0], "O"); - - translation[a][0]=b; - translation[a][1]=len; - len+=l; - } - else translation[a][0]=-1; - } - - long_seq[0][len-1]='\0'; - len--; - sprintf ( name[0], "concatenat"); - S_Out=fill_sequence_struc(1, long_seq, name); - free_char(name, -1); - free_char(long_seq, -1); - - - if (!I->M) - { - if ( I->fp) Out=declare_constraint_list(S_Out, NULL, NULL, 0, vtmpfile(),NULL); - else if ( I->L)Out=declare_constraint_list(S_Out, NULL, NULL, 0, NULL ,NULL); - - for (a=0; ane; a++) - { - s1=vread_clist(I,a,SEQ1); - s2=vread_clist(I,a,SEQ2); - - if ( translation[s1][0]!=-1 && translation[s2][0]!=-1) - Out=add_list_entry2list(Out, Out->entry_len, SEQ1, 0, SEQ2, 0, R1,vread_clist(I,a,R1)+translation[s1][1], R2,vread_clist(I,a,R2)+translation[s2][1], WE,vread_clist(I,a,WE),CONS, vread_clist(I,a,CONS), MISC, vread_clist(I,a,MISC)); - } - - for ( a=0; a<(I->S)->nseq; a++) - { - if (translation[a][0]!=-1 && translation[a][1]!=0) - { - for (b=1; b<=len; b++) - { - add_list_entry2list(Out,Out->entry_len, SEQ1, 0, SEQ2, 0, R1, translation[a][1], R2, b, WE, UNDEFINED, CONS, 0,MISC, vread_clist(I,a,MISC)); - } - } - } - sort_constraint_list (Out, 0, Out->ne); - } - else if (I->M) - { - Out=declare_constraint_list(S_Out, NULL, NULL, 0, vtmpfile(),I->M); - vfree((Out->M)[30]); - (Out->M)[30]=vcalloc ( len+1, sizeof (int)); - for ( a=0; ane=SIZEOF_AA_MAT; - } - free_int (translation,-1); - return Out; - } -/*********************************************************************/ -/* */ -/* MISCEANELLOUS */ -/* */ -/* */ -/*********************************************************************/ -int ** get_undefined_list (Constraint_list *CL) - { - int **list; - int a; - CLIST_TYPE x; - - list=declare_int ( (CL->S)->nseq+1, (CL->S)->max_len+1); - - for ( a=0; a< CL->ne; a++) - { - x=vread_clist(CL, a, WE); - list[vread_clist(CL, a, SEQ1)][vread_clist(CL, a, R1)]=(x==UNDEFINED); - list[vread_clist(CL, a, SEQ2)][vread_clist(CL, a, R2)]=(x==UNDEFINED); - } - return list; - } -int is_never_undefined (Constraint_list *CL,int r) - { - int a; - for ( a=0; a< CL->ne; a++) - { - if ( (vread_clist(CL,a,R1)==r || vread_clist(CL,a,R2)==r) && vread_clist(CL,a,WE)==UNDEFINED)return 0; - } - return 1; - } - -int* do_analyse_list ( Constraint_list *CL) - { - int **seq_score; - int **seq_score2; - int *pos_L; - int a; - int n_res; - - - int n_it=4; - - double sum, sum2, tot; - int field=2; - double z; - int r1, r2; - int max_we=0; - - - - fprintf ( stderr, "\nDO ANALYSE"); - - n_res=(CL->S)->max_len; - - pos_L =vcalloc (n_res+2, sizeof (int)); - pos_L++; - pos_L[-1]=n_res; - seq_score=declare_int (n_res+1,n_it+1); - seq_score2=declare_int(n_res+1,n_it+1); - - - - for ( a=0; a< CL->ne; a++) - { - r1=vread_clist(CL, a, R1); - r2=vread_clist(CL, a, R2); - - seq_score[r1][0]+=vread_clist(CL, a, WE); - seq_score[r2][0]+=vread_clist(CL, a, WE); - - - seq_score[r1][1]+=vread_clist(CL, a, CONS); - seq_score[r2][1]+=vread_clist(CL, a, CONS); - - - seq_score[r1][2]+=vread_clist(CL, a, MISC); - seq_score[r2][2]+=vread_clist(CL, a, MISC); - - } - for ( a=1; a<=n_res; a++)max_we=MAX(seq_score[a][0],max_we); - for ( a=1; a<=n_res; a++) - { - if ( a!=n_res && seq_score[a][0]> seq_score[a-1][0]) fprintf ( stderr, "\n%4d %s", a, num2plot(seq_score[a][0],max_we,40)); - else fprintf ( stderr, "\n"); - } - - for ( a=0; a< CL->ne; a++)if ( vread_clist(CL, a, MISC)>1000)fprintf ( stderr, "\n%4d %4d %4d", vread_clist(CL, a,R1), vread_clist(CL, a, R2), vread_clist(CL, a, MISC)); - myexit (EXIT_SUCCESS); - - for ( a=0; ane; a++) - { - if ( vread_clist(CL, a, WE)!=UNDEFINED &&vread_clist(CL, a, MISC)>=1 ) - { - seq_score[vread_clist(CL, a, R1)][0]+=vread_clist(CL, a, CONS); - seq_score[vread_clist(CL, a, R1)][1]+=vread_clist(CL, a, WE); - seq_score[vread_clist(CL, a, R1)][2]+=vread_clist(CL, a, MISC); - - seq_score[vread_clist(CL, a, R2)][0]+=vread_clist(CL, a, CONS); - seq_score[vread_clist(CL, a, R2)][1]+=vread_clist(CL, a, WE); - seq_score[vread_clist(CL, a, R2)][2]+=vread_clist(CL, a, MISC); - } - } - - for (a=1, tot=0,sum=0, sum2=0; a<= n_res ; a++) - { - if ( seq_score[a][2]>0) - { - sum +=seq_score[a][field]; - sum2+=seq_score[a][field]*seq_score[a][field]; - tot++; - } - } - - fprintf ( stderr, "\n"); - - - - for (a=1; a<= n_res ; a++) - { - z=return_z_score (seq_score[a][field],sum, sum2, tot ); - if ( seq_score[a][2]>0) - { - - pos_L[a]=(int)(z*10); - pos_L[0]++; - - } - } - - - free_int (seq_score2,-1); - free_int (seq_score,-1); - return pos_L; - } -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_domain_dp.c b/binaries/src/tcoffee/t_coffee_source/util_domain_dp.c deleted file mode 100644 index 87617f5..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_domain_dp.c +++ /dev/null @@ -1,569 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" - -#include "dp_lib_header.h" - -int domain_pair_wise (Alignment *A,int*in_ns, int **in_l_s,Constraint_list *CL ) - { -/*******************************************************************************/ -/* SEQ_DOMAIN DP */ -/* */ -/* makes DP between the the ns[0] sequences and the ns[1] */ -/* */ -/* for MODE, see the function get_dp_cost */ -/*******************************************************************************/ - - int scale, gop, gep, maximise; - int a, b, i, j,l,x; - int best_j; - - int lenal[2], len; - int sub; - int match; - - int *ns, **l_s; - - - int *f; - int *pf; - - int *dd; - int *dd_len; - - int * e; - int *pe; - int * e_len; - int *pe_len; - - int fop; - int **pos0; - - int **al=NULL; - int **pos_al=NULL; - - - - int ala,LEN; - char *buffer; - char *char_buf; - - -/*trace back variables */ - TRACE_TYPE *buf_trace=NULL; - TRACE_TYPE **trace; - TRACE_TYPE k; - TRACE_TYPE *tr; - int **result_aln; - int nseq; -/*Test Varaibles*/ - int score; - -/*Prepare l_s and ns*/ - - - ns=vcalloc( 2, sizeof (int)); - l_s=vcalloc ( 2, sizeof (int*)); - - ns[0]=in_ns[1]; - ns[1]=in_ns[0]; - - l_s[0]=in_l_s[1]; - l_s[1]=in_l_s[0]; - lenal[0]=strlen (A->seq_al[l_s[0][0]]); - lenal[1]=strlen (A->seq_al[l_s[1][0]]); - len=lenal[0]+lenal[1]+2; -/********************************/ -gop=(CL->gop)*SCORE_K; -gep=(CL->gep)*SCORE_K; -maximise=CL->maximise; -scale=(lenal[1]*SCORE_K*(CL->moca)->moca_scale); -/*******************************/ - - - -/*DO MEMORY ALLOCATION FOR DP*/ - - - - - - buf_trace=vcalloc ( len, sizeof (TRACE_TYPE)); - buffer=vcalloc ( 2*len, sizeof (char)); - - al =declare_int (2, 2*len); - pos_al=declare_int (2, 2*len); - result_aln=declare_int (1,len); - char_buf= vcalloc (2*len, sizeof (char)); - - f =vcalloc (len, sizeof (int)); - pf =vcalloc (len, sizeof (int)); - e =vcalloc (len, sizeof (int)); - pe =vcalloc (len, sizeof (int)); - e_len =vcalloc (len, sizeof (int)); - pe_len =vcalloc (len, sizeof (int)); - dd =vcalloc (len, sizeof (int)); - dd_len=vcalloc (len, sizeof (int)); - - - trace=declare_int (lenal[0]+2, lenal[1]+2); - -/*END OF MEMORY ALLOCATION*/ - - - /* - 0(s) +(dd) - \ | - \ | - \ | - \ | - \ | - \ | - \| - -(e)----O - */ - - pos0=aln2pos_simple ( A,-1, ns, l_s); - - for ( i=0; i<=lenal[0]+1; i++) - { - tr=trace[i]; - - - for ( sub=0,j=0; j<=lenal[1]; j++) - { - if (i==0 && j==0){tr[j]=1;pe[j]=dd[j]=gop;} - else if (i==0) {e[j]=pe[j]=dd[j]=gop;dd_len[j]=e_len[j]=pe_len[j]=f[j]=pf[j]=0;tr[j]=-1;} - else if (j==0) - { - for (f[j]=pf[0],best_j=0,a=1; a<=lenal[1]; a++) - { - if (f[j]!=MAX(pf[a]+scale,f[j])) - { - f[j]=pf[a]+scale; - best_j=a; - } - } - - - dd [j]=e[j]=pe[j]=gop; - dd_len[j]=e_len[j]=0; - tr [j]=best_j; - - } - else if (i>lenal[0]); - else - { - sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL); - - - - match=pf[j-1]+sub; - if (a_better_than_b(pf[j]+gop, pe[j]+gep,maximise)) - { - e [j]=pf[j]+gop; - e_len[j]=1; - } - else - { - e [j]=pe [j]+gep; - e_len[j]=pe_len[j]+1; - } - - - if (a_better_than_b(f[j-1]+gop, dd[j-1]+gep,maximise)) - { - dd [j]=f[j-1]+gop; - dd_len[j]=1; - } - else - { - dd [j]=dd [j-1]+gep; - dd_len[j]=dd_len[j-1]+1 ; - } - - - - if ( sub!=UNDEFINED) - { - - f[j] =best_int(4,maximise,&fop,e[j],match,dd[j],f[0]); - fop-=1; - - if (fop==-1)fop= e_len[j]*fop; - else if (fop==1 )fop=dd_len[j]*fop; - else if (fop==2 )fop=UNDEFINED; - - - - } - else - { - dd[j]=e[j]=match=-10000; - f[j]=f[0]; - fop=UNDEFINED; - } - pe [j] =e [j]; - pf [j-1]=f [j-1]; - pe_len[j] =e_len[j]; - tr[j] =fop; - } - - } - - pf [j-1]=f [j-1]; - pe [j] =e [j]; - pe_len[j] =e_len[j]; - } - score=f[0]; - i=lenal[0]+1; - j=0; - ala=0; - - - while (i!=0) - { - - - k=trace[i][j]; - if (j==0 && i<=lenal[0]) - { - - pos_al[0][ala]=i; - pos_al[1][ala]=j; - al[0][ala]=MATCH; - al[1][ala]=UNALIGNED; - i--; - j=k; - ala++; - } - else if ( j==0 && i>lenal[0]) - { - j=k; - i--; - - } - else if (k==0) - { - pos_al[0][ala]=i; - pos_al[1][ala]=j; - al[0][ala]=MATCH; - al[1][ala]=MATCH; - i--; - j--; - - ala++; - } - else if (k!=UNDEFINED && k<0 && i>0) - { - for (x=0; x< -k && i>0; x++) - { - pos_al[0][ala]=i; - pos_al[1][ala]=j; - al[0][ala]=MATCH; - al[1][ala]=GAP; - i--; - ala++; - } - } - else if (k!=UNDEFINED && k>0 && j>0) - { - for ( x=0; x< k && j>0; x++) - { - pos_al[0][ala]=i; - pos_al[1][ala]=j; - al[0][ala]=GAP; - al[1][ala]=MATCH; - j--; - ala++; - } - } - else if ( k==UNDEFINED){j=0;} - - } - - - LEN=ala; - - invert_list_int ( pos_al[0], LEN); - invert_list_int ( pos_al[1], LEN); - invert_list_int ( al[0], LEN); - invert_list_int ( al[1], LEN); - - - /*O: TARGET SEQUENCE (long)*/ - /*1: PATTERN SEQUENCE (short)*/ - - - - for ( b=0; b0) - { - result_aln=realloc_int ( result_aln, read_size_int ( result_aln,sizeof (int*)),len, 1, 0); - nseq++; - l=0; - for ( b=0; b0)nseq++; - - - - - A=domain_match_list2aln ( A,ns,l_s,result_aln,nseq,lenal[1]); - - - vfree (f); - vfree (pf); - vfree (e); - vfree (pe); - vfree (e_len); - vfree (pe_len); - vfree (dd_len); - vfree (dd); - free_int (pos0, -1); - vfree (buffer); - vfree (char_buf); - vfree (buf_trace); - free_int ( pos_al, -1); - pos_al=NULL; - - free_int ( al, -1); - - free_int (trace,-1); - free_int ( result_aln, -1); - return score; - } - - -Alignment *domain_match_list2aln ( Alignment *A,int *ns,int **l_s,int **ml, int nseq, int len) - { - - /* - function documentation: start - - This function edits the alignment given the results obtained by DP - ns: ns[0]->number of sequences serarched (TARGET) - ns[1]->number of sequences in the pattern (PATTERN SEQ) - l_s: - l_s[0]->list of sequences in the TARGET... - - nseq: number of occurences of PATTERN in TARGET - len: length of the PATTERN - - ml: detail of the nseq matches - ml[x][y]=k-> residue k of TARGET matches residue y of pattern - -> k=-1 means a gap; - - NOTE: This implementation can only match ONE target sequence with the PATTERN - The Pattern can either be one sequence or a profile. - - function documentation: end - */ - - - - - int a, b, c, d, e; - Alignment *B=NULL; - int **new_ml; - int *max_ml; - int *start_ml; - int tot_nseq; - int max_len,seq; - char *buf; - - if ( len==0 || nseq==0) - { - A->nseq=0; - A->len_aln=0; - } - else - { - B=copy_aln(A, B); - /*1 Extract the sequence used as a pattern, put it on the top*/ - - - A=shrink_aln (A, ns[1], l_s[1]); - A=realloc_aln2(A, ns[1]+ns[0]*nseq,len+A->len_aln+1); - - - - new_ml =declare_int ( nseq, 3*len); - max_ml =vcalloc ( nseq, sizeof (int)); - for ( a=0; a=0) - { - new_ml[c][b]=ml[c][a]; - if ( max_ml[c]<0) start_ml[c]=max_ml[c]=ml[c][a]; - for ( d=a+1; d=0){ max_ml[c]= ml[c][d];break;}} - if (max_ml[c]!=new_ml[c][b]) - { - new_ml[c][b+1]=max_ml[c]-new_ml[c][b]-1; - } - max_len=MAX( max_len, new_ml[c][b+1]); - } - else - { - new_ml[c][b]=ml[c][a]; - new_ml[c][b+1]=0; - } - } - - for ( c=0; c< nseq; c++){new_ml[c][b+2]=max_len;} - } - - tot_nseq=ns[1]+ns[0]*nseq; - - for ( a=0, b=0; a< len ;a++) - { - - /*1: Place the Match Column*/ - for ( c=0; c< ns[1]; c++) - { - A->seq_al[c][b]=B->seq_al[l_s[1][c]][a]; - A->seq_al[c][b+1]='\0'; - } - for ( e=0,c=ns[1]; cseq_al[c+d][b]=B->seq_al[l_s[0][d]][new_ml[e][3*a]]; - else - A->seq_al[c+d][b]='-'; - A->seq_al[c+d][b+1]='\0'; - - } - b++; - - /*2: Add the Gaps before the next_column*/ - if ( new_ml[0][3*a+2]>0) - { - for ( c=0; c< ns[1]; c++) - { - buf=generate_null(new_ml[0][3*a+2]); - strcat ( A->seq_al[c],buf); - vfree (buf); - } - for (e=0,c=ns[1];c< tot_nseq; c+=ns[0], e++) - { - buf=extract_char (B->seq_al[l_s[0][0]], new_ml[e][3*a]+1, new_ml[e][3*a+1]); - strcat ( A->seq_al[c],buf); - vfree (buf); - buf=generate_null(new_ml[e][3*a+2]-new_ml[e][3*a+1]); - strcat ( A->seq_al[c],buf); - - vfree (buf); - - } - } - b+=new_ml[0][3*a+2]; - } - - for (e=0,a=ns[1]; a< tot_nseq; a+=ns[0],e++) - { - for ( b=0; border[a+b][0]=B->order[seq][0]; - A->order[a+b][1]=B->order[seq][1]; - for ( c=0; corder[a+b][1]+=!is_gap(B->seq_al[seq][c]); - sprintf ( A->name[a+b], "Repeat_%d", a+b); - } - } - - free_aln(B); - A->nseq=tot_nseq; - A->len_aln=strlen ( A->seq_al[0]); - - } - return A; - } -Alignment * domain_seq2domain (Constraint_list *CL,int scale,int gop,int gep,Alignment *SEQ_DOMAIN, Alignment *TARGET) - { - static Alignment *A; - int *n_groups; - int **group_list; - int a,b,c; - - A=copy_aln (TARGET, A); - A=stack_aln( A, SEQ_DOMAIN); - - - n_groups=vcalloc ( 2, sizeof (int)); - group_list=declare_int (2, A->nseq); - - n_groups[0]=TARGET->nseq; - n_groups[1]=SEQ_DOMAIN->nseq; - for (c=0, a=0; a< 2; a++) - { - for (b=0; b< n_groups[a]; b++, c++) - { - group_list[a][b]=c; - } - } - A->score_aln=domain_pair_wise (A, n_groups, group_list,CL); - - SEQ_DOMAIN=copy_aln (A, SEQ_DOMAIN); - vfree (n_groups); - free_int (group_list,-1); - return SEQ_DOMAIN; - } - - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_domain_dp_drivers.c b/binaries/src/tcoffee/t_coffee_source/util_domain_dp_drivers.c deleted file mode 100644 index 36b3484..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_domain_dp_drivers.c +++ /dev/null @@ -1,681 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" - -/******************************************************************/ -/* MOCA DRIVER */ -/* */ -/* */ -/******************************************************************/ -Constraint_list *prepare_cl_for_moca ( Constraint_list *CL) - { - int a, b, c; - int tot_l, l; - char **name, **seq; - Sequence *NS=NULL; - - /*Prepare the constraint list*/ - CL->do_self=1; - CL->get_dp_cost=moca_slow_get_dp_cost; - CL->evaluate_residue_pair=moca_residue_pair_extended_list; - - /*Prepare the moca parameters*/ - (CL->moca)->evaluate_domain=evaluate_moca_domain; - (CL->moca)->cache_cl_with_domain=cache_cl_with_moca_domain; - (CL->moca)->make_nol_aln=make_moca_nol_aln; - - /*Prepare the packing of the sequences*/ - for ( a=0, b=1; a< (CL->S)->nseq; a++)b+=strlen ( (CL->S)->seq[a])+1; - - seq =declare_char ( 1,b+1); - name=declare_char( 1,30); - CL->packed_seq_lu =declare_int ( b, 2); - - - for (tot_l=1,a=0; a< (CL->S)->nseq; a++) - { - strcat (seq[0], (CL->S)->seq[a]); - strcat (seq[0], "X"); - l=strlen((CL->S)->seq[a]); - for ( c=1; c<= l; c++, tot_l++) - { - CL->packed_seq_lu[tot_l][0]=a; - CL->packed_seq_lu[tot_l][1]=c; - } - CL->packed_seq_lu[tot_l++][0]=UNDEFINED; - } - sprintf ( name[0], "catseq"); - NS=fill_sequence_struc(1, seq, name); - CL->S=add_sequence (NS, CL->S, 0); - free_char( seq, -1); - free_char(name, -1); - free_sequence (NS, NS->nseq); - - - return CL; - } - -Alignment ** moca_aln ( Constraint_list *CL) - { - /* - function documentation: start - - Alignment ** moca_aln ( Constraint_list *CL) - - This function inputs CL and outputs a series of local multiple alignments - contained in aln_list; - - The terminator of aln_list is set to NULL; - - function documentation: end - */ - - - static int max_n_domains=1000; - int n_domains=0; - - Alignment **aln_list; - - - - aln_list=vcalloc (max_n_domains, sizeof (Alignment *)); - if ((CL->moca)->moca_interactive)aln_list[n_domains++]=extract_domain ( CL); - else - { - while ( (aln_list[n_domains++]=extract_domain ( CL))!=NULL) - { - if ((CL->moca)->moca_len)break; - if ( n_domains==max_n_domains) - { - n_domains+=1000; - aln_list=vrealloc (aln_list, max_n_domains*sizeof (Alignment*)); - } - } - } - return aln_list; - } - -Alignment * extract_domain ( Constraint_list *CL) - { - /* - function documentation: start - Alignment * extract_domain ( Constraint_list *CL) - - given a CL, this function extracts the next best scoring local multiple alignment - It returns a CL where the aligned residues have been indicated in (CL->moca)->forbiden_residues; - - the local alignment is extracted with the dp function indicated by - CL->dp_mode: (gotoh_sw_pair_wise) - Evaluation: - CL->get_dp_cost=slow_get_dp_cost; - CL->evaluate_residue_pair=sw_residue_pair_extended_list; - Continuation: - (CL->moca)->evaluate_domain=evaluate_moca_domain; - Cache of CL: - (CL->moca)->cache_cl_with_domain=cache_cl_with_moca_domain; - Domain post processing: - (CL->moca)->make_nol_aln=make_moca_nol_aln; - function documentation: end - */ - int min_start, max_start, start,min_len, max_len, len, score; - int step; - Alignment *C=NULL; - Alignment *RESULT=NULL; - Alignment *EA=NULL; - - - - - /*CASE 1: Non Automatic Domain Extraction*/ - if ((CL->moca)->moca_interactive) - { - return interactive_domain_extraction (CL); - } - else if ((CL->moca)->moca_len) - { - while ((C=extract_domain_with_coordinates (C,(CL->moca)->moca_start,(CL->moca)->moca_len,CL))->nseq==0)(CL->moca)->moca_scale=(CL->moca)->moca_scale*0.9; - RESULT=copy_aln ( C, RESULT); - unpack_seq_aln (RESULT, CL); - output_format_aln ("mocca_aln",RESULT,EA=fast_coffee_evaluate_output(RESULT, CL),"stdout"); - free_aln(EA); - - return RESULT; - } - else if ( !(CL->moca)->moca_len) - { - analyse_sequence (CL); - myexit (EXIT_FAILURE); - } - - /*CASE 2: Automatic Domain Extraction: Find Coordinates*/ - - - start=500; - - step=10; - min_start=0; - max_start=strlen ((CL->S)->seq[0]); - min_len=20; - max_len=strlen ((CL->S)->seq[0]); - - C=extract_domain_with_coordinates (C,13,30,CL); - C->output_res_num=1; - print_aln (C); - - (CL->moca)->moca_scale=-180; - C=add_seq2aln (CL,C, CL->S); - print_aln (C); - - (CL->moca)->moca_scale=-160; - C=add_seq2aln (CL,C, CL->S); - print_aln (C); - - myexit (EXIT_FAILURE); - - while ( step>0) - { - C=approximate_domain (min_start,max_start,step,min_len,max_len, step,&start, &len, &score, CL); - min_start=start-step; - max_start=start+step; - min_len=len-step; - max_len=len+step; - step=step/2; - } - - C=extract_domain_with_coordinates (C,start-10, len+20,CL); - C->output_res_num=1; - print_aln (C); - - myexit (EXIT_FAILURE); - return C; - - - } -Alignment * interactive_domain_extraction ( Constraint_list *CL) - { - int LEN=0; - int START=1; - int SCALE=2; - int GOPP=3; - - int iteration=0; - char *choice; - int a,b, c; - int index; - char *s; - char last_start[100]; - char out_format[100]; - Alignment *RESULT=NULL; - Alignment *PREVIOUS=NULL; - Alignment *C=NULL; - Alignment *EA=NULL; - - int **parameters; - - - choice=vcalloc ( 100, sizeof (char)); - parameters=declare_int (10000, 4); - - parameters[0][START]=(CL->moca)->moca_start; - parameters[0][LEN]= (CL->moca)->moca_len; - parameters[0][SCALE]=(CL->moca)->moca_scale; - parameters[0][GOPP]=CL->gop; - iteration=0; - sprintf ( last_start, "%d", (CL->moca)->moca_start); - sprintf ( out_format, "mocca_aln"); - - print_moca_interactive_choices (); - while ( !strm4 (choice, "Q","X", "q", "x" )) - { - c=choice[0]; - - if (c=='b' || c=='B') - { - iteration-=atoi(choice+1)+1; - - if (iteration<0)iteration=1; - } - else - { - iteration++; - parameters[iteration][START]=parameters[iteration-1][START]; - parameters[iteration][LEN]=parameters[iteration-1][LEN]; - parameters[iteration][SCALE]=parameters[iteration-1][SCALE]; - parameters[iteration][GOPP]=parameters[iteration-1][GOPP]; - - if ( c=='>')parameters[iteration][LEN]=atoi(choice+1); - else if ( c=='|') - { - sprintf ( last_start, "%s", choice); - parameters[iteration][START]=0; - s=strrchr(choice, ':'); - - if (s==NULL) - { - parameters[iteration][START]=atoi(choice+1); - } - else - { - - s[0]='\0'; - - if((index=name_is_in_list (choice+1,(CL->S)->name,(CL->S)->nseq,100))==-1) - { - fprintf ( stderr, "\n\tERROR: %s NOT in Sequence Set",choice+1); - continue; - } - - for ( a=0; a< index; a++) - { - parameters[iteration][START]+=(CL->S)->len[a]+1; - } - parameters[iteration][START]+=atoi(s+1)-1; - } - - } - else if ( c=='C'||c=='c')parameters[iteration][SCALE]=atoi(choice+1); - else if ( c=='G'||c=='g') - { - parameters[iteration][GOPP]=atoi(choice+1); - CL->gop=parameters[iteration][GOPP]; - } - else if ( c=='F'||c=='f') - { - sprintf ( out_format, "%s", choice+1); - } - else if ( c=='S'||c=='s') - { - if (choice[1]=='\0')sprintf ( choice, "default.domain_aln.%d", iteration); - output_format_aln (out_format,RESULT,EA=fast_coffee_evaluate_output(RESULT, CL),choice+1); - fprintf (stderr, "\tOutput file [%15s] in [%10s] format\n",choice+1,out_format); - free_aln (EA); - } - else if (c=='\0') - { - if ( parameters[iteration][SCALE]>0) - { - fprintf ( stderr, "\nWARNING: THRESHOLD RESET to 0"); - parameters[iteration][SCALE]=0; - } - - (CL->moca)->moca_scale=parameters[iteration][SCALE]; - CL->gop=parameters[iteration][GOPP]; - - C=extract_domain_with_coordinates (C,parameters[iteration][START],parameters[iteration][LEN],CL); - - if ( C==NULL) - { - fprintf ( stderr, "\nERROR: ILLEGAL COORDINATES! SEQUENCE BOUNDARY CROSSED\n"); - for ( b=1,a=0; a< (CL->S)->nseq-1; a++) - { - - fprintf ( stderr, "\n\t%15s=> Abs:[%d %d] Rel:[0 %d]", (CL->S)->name[a],b, b+(CL->S)->len[a]-1,(CL->S)->len[a]); - b+=(CL->S)->len[a]; - } - fprintf ( stderr, "\n"); - } - else if (parameters[iteration][START]==0 && parameters[iteration][LEN]==0) - { - fprintf ( stderr, "\n\tEnter the following parameters:\n\n\t\tSTART value: |x [Return]\n\t\tLENgth value: >y [Return]\n\t\ttype [Return]\n\n"); - fprintf ( stderr, "\n\n\tSTART is measured on the total length of the concatenated sequences\n\tx and y are positive integers\n\n"); - } - - else if ( C->nseq==0) - { - fprintf ( stderr, "\nNO MATCH FOUND: LOWER THE SCALE (C)\n"); - } - else - { - RESULT=copy_aln ( C, RESULT); - unpack_seq_aln (RESULT, CL); - RESULT->output_res_num=1; - - output_format_aln (out_format,RESULT,EA=fast_coffee_evaluate_output(RESULT, CL),"stdout"); - free_aln(EA); - PREVIOUS=copy_aln ( RESULT, PREVIOUS); - free_aln (C); - print_moca_interactive_choices (); - - } - } - - fprintf ( stderr, "\t[ITERATION %3d][START=%s][LEN=%3d][GOPP=%3d][SCALE=%4d]\t",iteration,last_start,parameters[iteration][LEN],parameters[iteration][GOPP],parameters[iteration][SCALE]); - a=0; - fprintf ( stderr, "Your Choice: "); - while ( (c=fgetc(stdin))!='\n')choice[a++]=c; - choice[a]=0; - } - } - - if (!RESULT)myexit(EXIT_SUCCESS); - if ( RESULT)RESULT->output_res_num=0; - return RESULT; - } - -int print_moca_interactive_choices () -{ - fprintf ( stderr, "\n**************************************************************"); - fprintf ( stderr, "\n******************** MOCCA: %s ***********",VERSION); - fprintf ( stderr, "\n**************************************************************"); - - -fprintf ( stderr, "\nMENU: Type Flag[number] and Return: ex |10"); - -fprintf ( stderr, "\n\t|x -->Set the START to x"); -fprintf ( stderr, "\n\t 100 start=100 on concatenated sequences"); -fprintf ( stderr, "\n\t human:100 start=100 on human sequence"); -fprintf ( stderr, "\n\t>x -->Set the LEN to x"); -fprintf ( stderr, "\n\tGx -->Set the Gap Opening Penalty to x"); -fprintf ( stderr, "\n\tCx -->Set the sCale to x"); -fprintf ( stderr, "\n\tSname -->Save the Alignment "); -fprintf ( stderr, "\n\tFformat -->Save the Alignment Format"); - -fprintf ( stderr, "\n\treturn -->Compute the Alignment"); - -fprintf ( stderr, "\n\tX -->eXit\n\n"); - -return 0; -} - -Alignment * approximate_domain ( int min_start, int max_start, int step_start,int min_len, int max_len, int step_len, int *best_start, int *best_len, int *best_score, Constraint_list *CL) - { - Alignment *C=NULL; - int start; - int len; - int score; - - /*1 Extract the first*/ - best_score[0]=UNDEFINED; - best_start[0]=min_start; - best_len[0]=min_len; - - for (start=min_start; start< max_start; start+=step_start) - { - for ( len=min_len; lenmoca)->evaluate_domain)(C, CL); - fprintf ( stderr, "\nSTART=%d LEN=%3d SCORE=%5d [%d]",start,len,score, C->nseq); - - - if ( best_score[0]==UNDEFINED)best_score[0]=score; - if ( score>best_score[0]) - { - best_score[0]=score; - best_start[0]=start; - best_len[0]=len; - } - } - } - - C=extract_domain_with_coordinates (C,best_start[0], best_len[0],CL); - C->output_res_num=1; - return C; - } -int measure_domain_length ( Constraint_list *CL,Alignment *IN, int start, int min_len, int max_len, int step) - { - Alignment *C=NULL; - int score, best_score,best_len,a, b, l; - int *score_matrix, *len_matrix; - int n_val, best_val; - - score_matrix=vcalloc ( max_len, sizeof (int)); - len_matrix=vcalloc ( max_len, sizeof (int)); - - - l=strlen ( (CL->S)->seq[0]); - - min_len=MAX(0, min_len); - min_len=MIN(l-start, min_len); - - if ( !IN)C=extract_domain_with_coordinates (C,start,min_len, CL); - else - { - C=copy_aln (IN, C); - C->len_aln=min_len; - for ( a=0; a< C->nseq; a++)C->seq_al[a][min_len]='\0'; - C=add_seq2aln (CL,C, CL->S); - } - - best_score= score=((CL->moca)->evaluate_domain)(C, CL); - - - min_len=MAX(0, min_len); - for ( best_len=best_val=n_val=0,b=min_len; blen_aln=min_len; - for ( a=0; a< C->nseq; a++)C->seq_al[a][b]='\0'; - C=add_seq2aln (CL,C, CL->S); - } - if ( C->len_aln>0 )score=((CL->moca)->evaluate_domain)(C, CL); - else score=-1; - - if ( score< -3000)break; - - fprintf ( stderr, "\n\t%d %d=>%d (%d, %d)[%d]",start, b, score, C->nseq, C->len_aln, step); - score_matrix[n_val]=score; - len_matrix [n_val]=b; - if ( score>best_score) - { - best_score=score; - best_len=b; - best_val=n_val; - } - } - free_aln(C); - - for ( a=best_val; abest_score/2)best_len=len_matrix[a]; - else break; - } - vfree ( score_matrix); - vfree ( len_matrix); - - return best_len; - } - -Alignment *extract_domain_with_coordinates ( Alignment *RESULT,int start, int len, Constraint_list *CL) -{ - int a; - char *buf; - Alignment *SEQ_DOMAIN=NULL; - - - - - - /*ADJUST THE DIRECTION OF THE DOMAIN: len<0:left and len>0:right*/ - - if (len>0); - else if (len<0) - { - len=len*-1; - start=start-len+1; - } - - /*CHECK THAT THE BOUNDARY CONDITIONS*/ - - - if (start<0 || (!CL->packed_seq_lu && (start+len)>strlen((CL->S)->seq[0])) ||(CL->packed_seq_lu && (start+len)>strlen((CL->S)->seq[(CL->S)->nseq-1])) )return NULL; - else - { - for ( a=start; a< start+len; a++) - { - if ((CL->moca)->forbiden_residues && (CL->moca)->forbiden_residues[0][a+1]==UNDEFINED) - { - fprintf ( stderr, "*"); - return NULL; - } - } - } - - /*EXTRACT THE DOMAIN*/ - - SEQ_DOMAIN=add_seq2aln (CL,SEQ_DOMAIN, CL->S); - buf=extract_char (SEQ_DOMAIN->seq_al[0], start, len); - - for (a=0; aseq_al[0], "%s", buf); - SEQ_DOMAIN->order[0][1]=start; - SEQ_DOMAIN=add_seq2aln (CL,SEQ_DOMAIN, CL->S); - - - - return SEQ_DOMAIN; -} - - - - -int get_starting_point ( Constraint_list *CL) -{ - int a; - - - int l; - - - - - - int **seq; - int start; - int *entry=NULL; - - l=strlen ( (CL->S)->seq[0]); - - seq=declare_int ( l, 2); - - for ( a=0; ane;a++) - { - entry=extract_entry (entry, a, CL); - - seq[entry[R1]][1]=entry[R1]; - seq[entry[R2]][1]=entry[R2]; - if ((CL->moca) && (CL->moca)->forbiden_residues && ((CL->moca)->forbiden_residues[0][entry[R1]]==UNDEFINED||(CL->moca)->forbiden_residues[0][entry[R2]]==UNDEFINED ))continue; - else - { - seq[entry[R1]][0]+=entry[MISC]; - seq[entry[R2]][0]+=entry[MISC]; - } - } - - sort_int_inv ( seq, 2, 0, 0, l-1); - fprintf ( stderr, "\nStart=%d %d", seq[0][1], seq[0][0]); - start=seq[0][1]; - CL=index_res_constraint_list (CL,WE); - - free_int ( seq, -1); - return start; - - -} - - -int * analyse_sequence ( Constraint_list *CL) -{ - int a, p; - int len, start, n_dots; - int left, right, tw, r, w; - int best_tw, best_start=0, best_len=0; - int l; - int max_len=200; - - CL=index_res_constraint_list ( CL, WE); - l=strlen (( CL->S)->seq[0]); - - for ( best_tw=UNDEFINED,start=0; startresidue_index[0][p+start+1][0]; - - for ( a=1; aresidue_index[0][p+start+1][a+1]; - w=CL->residue_index[0][p+start+1][a+2]; - - if (rright)tw+=w; - } - } - - if ( tw> best_tw || best_tw==UNDEFINED) - { - best_tw=tw; - best_start=start; - best_len=len; - } - } - } - fprintf ( stderr, "\nStart=%d Len=%d", best_start, best_len); - return NULL; -} - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centre National de la Recherche Scientifique (CNRS) */ -/*and */ -/*Cedric Notredame */ -/*Fri Aug 8 19:03:27 MDT 2003. */ -/*All rights reserved.*/ -/*NOTICE: |*/ -/* This file is an integral part of the */ -/* T-COFFEE Software. */ -/* Its content is protected and all */ -/* the conditions mentioned in the licensing */ -/* agreement of the software apply to this file.*/ -/*............................................... |*/ -/* If you need some more information, or if you */ -/* wish to obtain a full license, please contact: */ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_dp_cdna_fasta_nw.c b/binaries/src/tcoffee/t_coffee_source/util_dp_cdna_fasta_nw.c deleted file mode 100644 index 7ae6c3a..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_dp_cdna_fasta_nw.c +++ /dev/null @@ -1,974 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" - - -int cfasta_cdna_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL) - { - -/*TREATMENT OF THE TERMINAL GAP PENALTIES*/ -/*TG_MODE=0---> gop and gep*/ -/*TG_MODE=1---> --- gep*/ -/*TG_MODE=2---> --- ---*/ - int maximise; - -/*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/ - int **tot_diag; - - int *diag; - int ktup; - static int n_groups; - static char **group_list; - int score, new_score; - int n_chosen_diag=0; - int step; - int max_n_chosen_diag; - int l0, l1; - Alignment *B; - /********Prepare Penalties******/ - - maximise=CL->maximise; - ktup=CL->ktup; - - /********************************/ - if ( !group_list) - { - - group_list=make_group_aa (&n_groups, CL->matrix_for_aa_group); - } - B=dna_aln2_3frame_cdna_aln(A, ns, l_s); - l0=strlen(B->seq_al[0]); - l1=strlen(B->seq_al[3]); - tot_diag=evaluate_diagonals_cdna ( B, ns, l_s, CL, maximise,n_groups,group_list, ktup); - - max_n_chosen_diag=100; - n_chosen_diag=step=10 ; - n_chosen_diag=MIN(n_chosen_diag, max_n_chosen_diag); - - - diag=extract_N_diag (l0,l1, tot_diag, n_chosen_diag,2); - score =make_fasta_cdna_pair_wise ( A,B, ns, l_s, CL, diag); - - - new_score=0; - vfree ( diag); - - while (new_score!=score && n_chosen_diag< max_n_chosen_diag ) - { - - score=new_score; - ungap_sub_aln ( A, ns[0], l_s[0]); - ungap_sub_aln ( A, ns[1], l_s[1]); - - - n_chosen_diag+=step; - n_chosen_diag=MIN(n_chosen_diag, max_n_chosen_diag); - - diag =extract_N_diag (l0,l1, tot_diag, n_chosen_diag,3); - new_score=make_fasta_cdna_pair_wise ( A, B,ns, l_s, CL, diag); - vfree ( diag); - } - - score=new_score; - free_int (tot_diag, -1); - free_aln(B); - return score; - } - - -int fasta_cdna_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL) - { -/*TREATMENT OF THE TERMINAL GAP PENALTIES*/ -/*TG_MODE=0---> gop and gep*/ -/*TG_MODE=1---> --- gep*/ -/*TG_MODE=2---> --- ---*/ - - - int maximise; - int l0, l1; -/*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/ - int **tot_diag; - int *diag; - int ktup; - static int n_groups; - static char **group_list; - int score; - Alignment *B; - - /********Prepare Penalties******/ - - - maximise=CL->maximise; - ktup=CL->ktup; - /********************************/ - - - - if ( !group_list) - { - - group_list=make_group_aa (&n_groups, CL->matrix_for_aa_group); - } - - B=dna_aln2_3frame_cdna_aln(A, ns, l_s); - B->nseq=6; - - l0=strlen ( B->seq_al[0]); - l1=strlen ( B->seq_al[3]); - - - tot_diag=evaluate_diagonals_cdna( B, ns, l_s, CL, maximise,n_groups,group_list, ktup); - - - diag=extract_N_diag (l0, l1, tot_diag,20,1); - score=make_fasta_cdna_pair_wise ( A,B, ns, l_s, CL, diag); - - free_aln(B); - free_int (tot_diag, -1); - vfree (diag); - return score; - } - -Dp_Model* initialize_dna_dp_model (Constraint_list *CL) - { - Dp_Model *M; - int a, b, c,d; - int f0, f1; - int deltaf1, deltaf0,deltatype; - int type, type1, type0; - - M=vcalloc ( 1, sizeof (Dp_Model)); - - for (M->nstate=0,f0=0; f0<3; f0++) - for ( f1=0; f1<3; f1++)M->nstate+=3; - - M->UM=M->nstate++; - M->START=M->nstate++; - M->END =M->nstate++; - - M->TG_MODE=CL->TG_MODE; - M->F_TG_MODE=0; - M->gop=CL->gop*SCORE_K; - M->gep=CL->gep*SCORE_K; - - - - M->f_gop=CL->f_gop*SCORE_K; - M->f_gep=CL->f_gep*SCORE_K; - - - M->bounded_model=declare_int (M->nstate+1, M->nstate+1); - M->model=declare_int (M->nstate+1, M->nstate+1); - for ( a=0; a<=M->nstate; a++) - for ( b=0; b<= M->nstate; b++) - M->model[a][b]=UNDEFINED; - M->model_properties=declare_int ( M->nstate, 10); - - a=0; - M->TYPE=a++;M->F0=a++;M->F1=a++; M->LEN_I=a++; M->LEN_J=a++; M->DELTA_I=a++;M->DELTA_J=a++;M->EMISSION=a++;M->TERM_EMISSION=a++; - a=M->nstate; - M->NON_CODING=a++; M->INSERTION=a++; M->DELETION=a++; M->CODING0=a++; M->CODING1=a++;M->CODING2=a++; - - - for ( a=0,f0=0; f0<3; f0++) - for ( f1=0; f1<3; f1++, a+=3) - { - M->model_properties[a+0][M->TYPE]=M->CODING0; - M->model_properties[a+0][M->F0]=f0; - M->model_properties[a+0][M->F1]=f1; - M->model_properties[a+0][M->LEN_I]=1; - M->model_properties[a+0][M->LEN_J]=1; - M->model_properties[a+0][M->DELTA_I]=-1; - M->model_properties[a+0][M->DELTA_J]= 0; - M->model_properties[a+0][M->EMISSION]=0; - M->model_properties[a+0][M->TERM_EMISSION]=0; - - M->model_properties[a+1][M->TYPE]=M->DELETION; - M->model_properties[a+1][M->F0]=f0; - M->model_properties[a+1][M->F1]=f1; - M->model_properties[a+1][M->LEN_I]=1; - M->model_properties[a+1][M->LEN_J]=0; - M->model_properties[a+1][M->DELTA_I]=-1; - M->model_properties[a+1][M->DELTA_J]=+1; - M->model_properties[a+1][M->EMISSION]=M->gep; - M->model_properties[a+1][M->TERM_EMISSION]=(M->TG_MODE==2)?0:M->gep; - - M->model_properties[a+2][M->TYPE]=M->INSERTION; - M->model_properties[a+2][M->F0]=f0; - M->model_properties[a+2][M->F1]=f1; - M->model_properties[a+2][M->LEN_I]=0; - M->model_properties[a+2][M->LEN_J]=1; - M->model_properties[a+2][M->DELTA_I]= 0; - M->model_properties[a+2][M->DELTA_J]=-1; - M->model_properties[a+2][M->EMISSION]=M->gep; - M->model_properties[a+2][M->TERM_EMISSION]=(M->TG_MODE==2)?0:M->gep; - } - - /*UM" Unmatched State*/ - M->model_properties[a][M->TYPE]=M->NON_CODING; - M->model_properties[a][M->F0]=0; - M->model_properties[a][M->F1]=0; - M->model_properties[a][M->LEN_I]=1; - M->model_properties[a][M->LEN_J]=1; - M->model_properties[a][M->DELTA_I]=-1; - M->model_properties[a][M->DELTA_J]=0; - M->model_properties[a][M->EMISSION]=M->f_gep; - M->model_properties[a][M->TERM_EMISSION]=(M->F_TG_MODE==2)?0:M->f_gep; - - - M->model_properties[M->START][M->TYPE]=M->NON_CODING; - M->model_properties[a+1][M->F0]=0; - M->model_properties[a+1][M->F1]=0; - M->model_properties[a+1][M->LEN_I]=0; - M->model_properties[a+1][M->LEN_J]=0; - M->model_properties[a+1][M->DELTA_I]=0 ; - M->model_properties[a+1][M->DELTA_J]=0; - M->model_properties[a+1][M->EMISSION]=0; - M->model_properties[a+1][M->TERM_EMISSION]=0; - - M->model_properties[M->END][M->TYPE]=M->NON_CODING; - M->model_properties[a+2][M->F0]=0; - M->model_properties[a+2][M->F1]=0; - M->model_properties[a+2][M->LEN_I]=0; - M->model_properties[a+2][M->LEN_J]=0; - M->model_properties[a+2][M->DELTA_I]=0 ; - M->model_properties[a+2][M->DELTA_J]=0; - M->model_properties[a+2][M->EMISSION]=0; - M->model_properties[a+2][M->TERM_EMISSION]=0; - - /*1: SET THE INDEL PENALTIES*/ - - - for ( a=0; a< M->START; a++) - { - deltaf0=M->model_properties[M->START][M->F0]-M->model_properties[a][M->F0]; - deltaf1=M->model_properties[M->START][M->F1]-M->model_properties[a][M->F1]; - if ( deltaf0==0 && deltaf1==0)deltatype=0; - else if ( deltaf0<=0 && deltaf1<=0)deltatype=1; - else deltatype=-1; - type=M->model_properties[a][M->TYPE]; - - if ( type==M->NON_CODING) M->model[a][M->END]=M->model[M->START][a]=(M->F_TG_MODE==0)?M->f_gop:0; - else if ( type==M->CODING0 && deltatype==0)M->model[a][M->END]=M->model[M->START][a]=ALLOWED; - else if ( type==M->CODING0 && deltatype==1)M->model[a][M->END]=M->model[M->START][a]=(M->F_TG_MODE==0)?M->f_gop:0; - else if ( type==M->INSERTION && deltatype==0)M->model[a][M->END]=M->model[M->START][a]=(M->TG_MODE==0)?M->gop:0; - else if ( type==M->INSERTION && deltatype==1)M->model[a][M->END]=M->model[M->START][a]=(M->TG_MODE==0)?M->gop:0+(M->F_TG_MODE==0)?M->f_gop:0; - else if ( type==M->DELETION && deltatype==0)M->model[a][M->END]=M->model[M->START][a]=(M->TG_MODE==0)?M->gop:0; - else if ( type==M->DELETION && deltatype==1)M->model[a][M->END]=M->model[M->START][a]=(M->TG_MODE==0)?M->gop:0+(M->F_TG_MODE==0)?M->f_gop:0; - else M->model[a][M->END]=M->model[M->START][a]=UNDEFINED; - - /* - if (type==M->NON_CODING ||M->model_properties[a][M->F0] ||M->model_properties[a][M->F1]) M->model[M->START][a]=M->model[a][M->END]=(M->F_TG_MODE==0)?M->f_gop:0; - else if (type==M->INSERTION || type==M->DELETION)M->model[M->START][a]=M->model[a][M->END]=( M->TG_MODE==0)?M->gop:0; - else M->model[M->START][a]=M->model[a][M->END]=ALLOWED; - */ - - for ( b=0; b< M->START; b++) - { - - deltaf0=M->model_properties[a][M->F0]-M->model_properties[b][M->F0]; - deltaf1=M->model_properties[a][M->F1]-M->model_properties[b][M->F1]; - type0=M->model_properties[a][M->TYPE]; - type1=M->model_properties[b][M->TYPE]; - - if ( deltaf0==0 && deltaf1==0)deltatype=0; - else if ( deltaf0<=0 && deltaf1<=0)deltatype=1; - else deltatype=-1; - - - - if ( type0==M->NON_CODING && type1==M->NON_CODING )M->model[a][b]=UNDEFINED; - else if ( type0==M->NON_CODING && type1==M->CODING0 )M->model[a][b]=ALLOWED ; - else if ( type0==M->NON_CODING && type1==M->INSERTION )M->model[a][b]=M->gop; - else if ( type0==M->NON_CODING && type1==M->DELETION )M->model[a][b]=M->gop; - - else if ( type0==M->CODING0 && type1==M->NON_CODING )M->model[a][b]=M->f_gop; - else if ( type0==M->CODING0 && type1==M->CODING0 && deltatype==0 )M->model[a][b]=ALLOWED; - else if ( type0==M->CODING0 && type1==M->CODING0 && deltatype==1 )M->model[a][b]=M->f_gop; - else if ( type0==M->CODING0 && type1==M->INSERTION && deltatype==0 )M->model[a][b]=M->gop; - else if ( type0==M->CODING0 && type1==M->INSERTION && deltatype==1 )M->model[a][b]=M->gop+M->f_gop; - else if ( type0==M->CODING0 && type1==M->DELETION && deltatype==0 )M->model[a][b]=M->gop; - else if ( type0==M->CODING0 && type1==M->DELETION && deltatype==1 )M->model[a][b]=M->gop+M->f_gop; - - else if ( type0==M->INSERTION && type1==M->NON_CODING )M->model[a][b]=M->f_gop; - else if ( type0==M->INSERTION && type1==M->CODING0 && deltatype==0 )M->model[a][b]=ALLOWED; - else if ( type0==M->INSERTION && type1==M->CODING0 && deltatype==1 )M->model[a][b]=M->f_gop; - else if ( type0==M->INSERTION && type1==M->INSERTION && deltatype==0 )M->model[a][b]=ALLOWED; - else if ( type0==M->INSERTION && type1==M->INSERTION && deltatype==1 )M->model[a][b]=M->f_gop; - else if ( type0==M->INSERTION && type1==M->DELETION && deltatype==0 )M->model[a][b]=M->gop; - else if ( type0==M->INSERTION && type1==M->DELETION && deltatype==1 )M->model[a][b]=M->gop+M->f_gop; - - else if ( type0==M->DELETION && type1==M->NON_CODING )M->model[a][b]=M->f_gop; - else if ( type0==M->DELETION && type1==M->CODING0 && deltatype==0 )M->model[a][b]=ALLOWED; - else if ( type0==M->DELETION && type1==M->CODING0 && deltatype==1 )M->model[a][b]=M->f_gop; - else if ( type0==M->DELETION && type1==M->INSERTION && deltatype==0 )M->model[a][b]=M->gop; - else if ( type0==M->DELETION && type1==M->INSERTION && deltatype==1 )M->model[a][b]=M->gop+M->f_gop; - else if ( type0==M->DELETION && type1==M->DELETION && deltatype==0 )M->model[a][b]=ALLOWED; - else if ( type0==M->DELETION && type1==M->DELETION && deltatype==1 )M->model[a][b]=M->f_gop; - - else {M->model[a][b]=UNDEFINED;} - - } - } - - - /*2 SET THE FRAMESHIFT PENALTIES - - for ( a=0; a< M->START; a++) - { - type=M->model_properties[a][M->TYPE]; - - for ( b=0; b< M->START; b++) - { - deltaf0=M->model_properties[a][M->F0]-M->model_properties[b][M->F0]; - deltaf1=M->model_properties[a][M->F1]-M->model_properties[b][M->F1]; - - - - - if (b==M->UM) M->model[a][b]+=M->f_gop; - else if (a==M->UM) M->model[a][b]+=ALLOWED; - else if (deltaf1==0 && deltaf0==0)M->model[a][b]+=ALLOWED; - else if (deltaf1<=0 && deltaf0<=0)M->model[a][b]+=M->f_gop; - else M->model[a][b]=UNDEFINED; - } - - } - M->model[M->UM][M->UM]=UNDEFINED; - */ - - - for (c=0,a=0, d=0; a< M->START; a++) - for ( b=0; bSTART; b++, d++) - { - if (M->model[a][b]!=UNDEFINED) - { - M->bounded_model[b][1+M->bounded_model[b][0]++]=a; - c++; - } - } - return M; - } -int make_fasta_cdna_pair_wise (Alignment *B,Alignment *A,int*in_ns, int **l_s,Constraint_list *CL, int *diag) - { - int a,c,p,k; - Dp_Result *DPR; - static Dp_Model *M; - int l0, l1; - int len_i, len_j; - int f0=0, f1=0; - int deltaf0, deltaf1, delta; - int nr1, nr2; - int ala, alb, aa0, aa1; - int type; - - char **al; - int **tl_s; - int *tns; - /*DEBUG*/ - int debug_cdna_fasta=0; - Alignment *DA; - int score; - int state,prev_state; - int t, e; - int a1, a2; - - - l0=strlen ( B->seq_al[l_s[0][0]]); - l1=strlen ( B->seq_al[l_s[1][0]]); - - al=declare_char (2, l0+l1+1); - B=realloc_aln2 (B,B->nseq,l0+l1+1); - - - free_int (B->cdna_cache, -1); - B->cdna_cache=declare_int(1, l0+l1+1); - - if ( !M)M=initialize_dna_dp_model (CL); - - - M->diag=diag; - - tl_s=declare_int (2, 2);tns=vcalloc(2, sizeof(int));tl_s[0][0]=0;tl_s[1][0]=3;tns[0]=tns[1]=1; - DPR=make_fast_dp_pair_wise (A,tns, tl_s,CL,M); - vfree(tns);free_int(tl_s, -1); - - - - /*new_trace_back*/ - a=p=0; - aa0=aa1=ala=alb=0; - while ( (k=DPR->traceback[a++])!=M->START); - while ( (k=DPR->traceback[a++])!=M->END) - { - - f0=M->model_properties[k][M->F0]; - f1=M->model_properties[k][M->F1]; - - len_i=M->model_properties[k][M->LEN_I]; - len_j=M->model_properties[k][M->LEN_J]; - - type=M->model_properties[k][M->TYPE]; - - - - if (type==M->CODING0) - { - deltaf0=(aa0*3+f0)-ala; - deltaf1=(aa1*3+f1)-alb; - - delta=MAX(deltaf0, deltaf1); - - for (nr1=0, nr2=0,c=0; cseq_al[l_s[0][0]][ala++]; - else al[0][p]='-'; - - if (nr2seq_al[l_s[1][0]][alb++]; - else al[1][p]='-'; - - B->cdna_cache[0][p]=M->NON_CODING; - if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--; - else if ( debug_cdna_fasta)fprintf (stderr, "\nUM: %c %c", al[0][p], al[1][p]); - } - for ( c=0; c< 3; c++, p++) - { - if ( c==0)B->cdna_cache[0][p]=M->CODING0; - else if ( c==1)B->cdna_cache[0][p]=M->CODING1; - else if ( c==2)B->cdna_cache[0][p]=M->CODING2; - if (alaseq_al[l_s[0][0]][ala++]; - else al[0][p]='-'; - - if (albseq_al[l_s[1][0]][alb++]; - else al[1][p]='-'; - - if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--; - else if ( debug_cdna_fasta)fprintf (stderr, "\n%d: %c %c",k, al[0][p], al[1][p]); - } - } - - aa0+=len_i; - aa1+=len_j; - } - - deltaf0=(aa0*3+f0)-ala; - deltaf1=(aa1*3+f1)-alb; - delta=MAX(deltaf0, deltaf1); - for (nr1=0, nr2=0,c=0; cseq_al[l_s[0][0]][ala++]; - else al[0][p]='-'; - - if (nr2seq_al[l_s[1][0]][alb++]; - else al[1][p]='-'; - - B->cdna_cache[0][p]=M->NON_CODING; - if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--; - else if ( debug_cdna_fasta)fprintf (stderr, "\nUM: %c %c", al[0][p], al[1][p]); - } - - - /*End New traceback*/ - - - - - al[0][p]='\0'; - al[1][p]='\0'; - - - sprintf( B->seq_al[l_s[0][0]], "%s", al[0]); - sprintf( B->seq_al[l_s[1][0]], "%s", al[1]); - B->len_aln=strlen (al[0]); - B->nseq=2; - - - - - if ( debug_cdna_fasta) - { - fprintf ( stderr, "\nA-A=%d, %d", CL->M['a'-'A']['a'-'A'], CL->M['a'-'A']['a'-'A'] *SCORE_K); - for ( a=1; agop, M->gep, M->TG_MODE); - fprintf ( stderr, "\nF_GOP=%d F_GEP=%d F_TG_MODE=%d", M->gop, M->gep, M->F_TG_MODE); - - DA=copy_aln (B, NULL); - DA=realloc_aln2 (DA,6,(DA->len_aln+1)); - - - for ( a=0; alen_aln; a++) - { - - fprintf ( stderr, "\n%d", DA->cdna_cache[0][a]); - if (DA->cdna_cache[0][a]>=M->CODING0)DA->seq_al[DA->nseq][a]=DA->cdna_cache[0][a]-M->nstate+'0'; - else DA->seq_al[DA->nseq][a]=DA->cdna_cache[0][a]-M->nstate+'0'; - - if (DA->cdna_cache[0][a]==M->CODING0) - { - DA->seq_al[DA->nseq+1][a]=translate_dna_codon (DA->seq_al[0]+a,'*'); - DA->seq_al[DA->nseq+2][a]=translate_dna_codon (DA->seq_al[1]+a,'*'); - } - else - { - DA->seq_al[DA->nseq+1][a]='-'; - DA->seq_al[DA->nseq+2][a]='-'; - } - - } - DA->nseq+=3; - print_aln (DA); - - free_aln(DA); - score=0; - - - for (prev_state=M->START,a=0; a< DA->len_aln;) - { - state=DA->cdna_cache[0][a]; - t=M->model[prev_state][state]; - if ( DA->cdna_cache[0][a]==M->CODING0) - { - a1=translate_dna_codon (A->seq_al[0]+a,'x'); - a2=translate_dna_codon (A->seq_al[1]+a,'x'); - - if ( a1!='x' && a2!='x') - { - e=CL->M[a1-'A'][a2-'A']*SCORE_K; - } - } - else if ( DA->cdna_cache[0][a]>M->CODING0); - else - { - e=M->model_properties[B->cdna_cache[0][a]][M->EMISSION]; - } - if ( e==UNDEFINED || t==UNDEFINED) fprintf ( stderr, "\nPROBLEM %d\n", a); - - fprintf ( stderr, "\n[%c..%c: %d(e)+%d(t)=%d]", A->seq_al[0][a], A->seq_al[1][a], e,t,e+t); - score+=e+t; - prev_state=state; - - if (B->cdna_cache[0][a]==M->NON_CODING)a++; - else a+=3; - - } - - } - - for ( a=0; alen_aln; a++) - { - - if ( B->cdna_cache[0][a]CODING0)B->cdna_cache[0][a]=0; - else B->cdna_cache[0][a]=1; - } - - free_char ( al, -1); - return DPR->score; - - } - - - -Dp_Result * make_fast_dp_pair_wise (Alignment *A,int*ns, int **l_s, Constraint_list *CL,Dp_Model *M) - { - - /*SIZE VARIABLES*/ - - int ndiag; - int l0, l1, len_al,len_diag; - static int max_len_al, max_len_diag; - static int mI, mJ; - - - /*EVALUATION*/ - int **mat; - int a1, a2; - - /*DP VARIABLES*/ - static int *Mat, *LMat, *trace; - int a, i, j,l; - int state, cur_state, prev_state; - int pos_i, pos_j; - int last_i=0, last_j=0; - int prev_i, prev_j; - int len_i, len_j, len; - int t, e, em; - - int prev_score; - int pc, best_pc; - - int *prev; - int model_index; - /*TRACEBACK*/ - Dp_Result *DPR; - int k=0, next_k; - int new_i, new_j; - - - ndiag=M->diag[0]; - - l0=strlen (A->seq_al[l_s[0][0]]); - l1=strlen (A->seq_al[l_s[1][0]]); - len_al =l0+l1+1; - len_diag=ndiag+4; - - if ( (len_al>max_len_al || len_diag>max_len_diag)) - { - - vfree (Mat); - vfree (LMat); - vfree(trace); - max_len_diag=max_len_al=0; - } - - if (max_len_al==0) - { - max_len_al=len_al; - max_len_diag=len_diag; - mI=max_len_al*max_len_diag; - mJ=max_len_diag; - - - Mat =vcalloc ( M->nstate*max_len_al*max_len_diag, sizeof (int)); - LMat =vcalloc ( M->nstate*max_len_al*max_len_diag, sizeof (int)); - trace=vcalloc ( M->nstate*max_len_al*max_len_diag, sizeof (int)); - - } - - prev=vcalloc ( M->nstate, sizeof (int)); - DPR=vcalloc ( 1, sizeof ( Dp_Result)); - DPR->traceback=vcalloc (max_len_al, sizeof (int)); - -/*PREPARE THE EVALUATION*/ - if (ns[0]+ns[1]>2) - { - fprintf ( stderr, "\nERROR: function make_fasta_cdna_pair_wise can only handle two sequences at a time [FATAL:%s]",PROGRAM); - crash (""); - } - mat=CL->M; - -/*INITIALIZATION OF THE DP MATRICES*/ - - for (i=0; i<=l0;i++) - { - for (j=0; j<=ndiag;j++) - { - for ( state=0; statenstate; state++) - { - Mat [state*mI+i*mJ+j]=UNDEFINED; - LMat [state*mI+i*mJ+j]=UNDEFINED; - trace [state*mI+i*mJ+j]=M->START; - } - } - } - - M->diag[0]=0; - - for (i=0; i<=l0; i++) - for ( j=0; j<=ndiag; j++) - { - pos_j=M->diag[j]-l0+i; - pos_i=i; - if (!(pos_j==0 || pos_i==0))continue; - if ( pos_j<0 || pos_i<0)continue; - if ( pos_i==0 && pos_j==0) - { - for ( a=0; a< M->nstate; a++) - { - Mat [a*mI+i*mJ+j]=0; - LMat [a*mI+i*mJ+j]=0; - trace[a*mI+i*mJ+j]=M->START; - } - } - else - { - l=MAX(pos_i,pos_j); - for ( state=0; stateSTART; state++) - { - if (pos_j==0 && M->model_properties[state][M->LEN_J])continue; - if (pos_i==0 && M->model_properties[state][M->LEN_I])continue; - - - t=M->model[M->START][state]; - e=M->model_properties[state][M->TERM_EMISSION]; - Mat [state*mI+i*mJ+j]=t+e*l; - LMat [state*mI+i*mJ+j]=l; - trace [state*mI+i*mJ+j]=M->START; - } - } - } - -/*DYNAMIC PROGRAMMING: Forward Pass*/ - - - - for (i=1; i<=l0;i++) - { - for (j=1; j<=ndiag;j++) - { - pos_j=M->diag[j]-l0+i; - pos_i=i; - - if (pos_j<=0 || pos_j>l1 )continue; - last_i=i; - last_j=j; - - for (cur_state=0; cur_stateSTART; cur_state++) - { - if (M->model_properties[cur_state][M->DELTA_J]) - { - prev_j=j+M->model_properties[cur_state][M->DELTA_J]; - prev_i=i+M->model_properties[cur_state][M->DELTA_I]*FABS((M->diag[j]-M->diag[prev_j])); - } - else - { - prev_j=j; - prev_i=i+M->model_properties[cur_state][M->DELTA_I]; - } - len_i=FABS((i-prev_i)); - len_j=FABS((M->diag[prev_j]-M->diag[j])); - len=MAX(len_i, len_j); - a1=A->seq_al[M->model_properties[cur_state][M->F0] ][pos_i-1]; - a2=A->seq_al[M->model_properties[cur_state][M->F1]+3][pos_j-1]; - - if (M->model_properties[cur_state][M->TYPE]==M->CODING0) - { - if ( a1=='o' || a2=='o')em=-(mat['w'-'A']['w'-'A'])*SCORE_K; - else if (a1=='x' || a2=='x')em=UNDEFINED; - else if ( a1==0 || a2==0)exit (0); - else - { - em=(mat[a1-'A'][a2-'A'])*SCORE_K; - } - } - else - { - em=M->model_properties[cur_state][M->EMISSION]; - } - - - - for (pc=best_pc=UNDEFINED, model_index=1; model_index<=M->bounded_model[cur_state][0]; model_index++) - { - prev_state=M->bounded_model[cur_state][model_index]; - - if(prev_i<0 || prev_j<0 ||prev_i>l0 || prev_j>ndiag || len==UNDEFINED)prev_score=UNDEFINED; - else prev_score=Mat[prev_state*mI+prev_i*mJ+prev_j]; - t=M->model[prev_state][cur_state]; - e=em; - - if (prev_score==UNDEFINED || len==UNDEFINED)e=UNDEFINED; - else if (len==0|| e==UNDEFINED)e=UNDEFINED; - else e=e*len; - - if (is_defined_int(3,prev_score,e, t)) - { - pc=prev_score+t+e; - } - else pc=UNDEFINED; - - /*Identify the best previous score*/ - if (best_pc==UNDEFINED || (pc>best_pc && pc!=UNDEFINED)) - { - prev[cur_state]=prev_state; - best_pc=pc; - - } - } - - Mat[cur_state*mI+i*mJ+j]=best_pc; - - - - if ( Mat[cur_state*mI+i*mJ+j]==UNDEFINED) - { - LMat[cur_state*mI+i*mJ+j]=UNDEFINED; - trace[cur_state*mI+i*mJ+j]=UNDEFINED; - continue; - } - - else if ( prev[cur_state]==cur_state) - { - LMat [cur_state*mI+i*mJ+j]= LMat [cur_state*mI+prev_i*mJ+prev_j]+len; - trace[cur_state*mI+i*mJ+j]= trace[cur_state*mI+prev_i*mJ+prev_j]; - } - else - { - LMat[cur_state*mI+i*mJ+j]=len; - trace[cur_state*mI+i*mJ+j]=prev[cur_state]; - } - } - } - } - - - i=last_i; - j=last_j; - for (pc=best_pc=UNDEFINED, state=0; stateSTART; state++) - { - t=M->model[state][M->END]; - e=M->model_properties[state][M->TERM_EMISSION]; - l=LMat[state*mI+i*mJ+j]; - - - if (!is_defined_int(4,t,e,Mat[state*mI+i*mJ+j],l))Mat[state*mI+i*mJ+j]=UNDEFINED; - else Mat[state*mI+i*mJ+j]+=t+e*(l); - pc=Mat[state*mI+i*mJ+j]; - - - if (best_pc==UNDEFINED || (pc>best_pc && pc!=UNDEFINED)) - { - k=state; - best_pc=pc; - } - } - DPR->score=best_pc; - -/*TRACEBACK*/ - - - e=0; - len=0; - - - while (k!=M->START) - { - next_k=trace[k*mI+i*mJ+j]; - new_i=i; - new_j=j; - l=LMat[k*mI+i*mJ+j]; - - for (a=0; a< l; a++) - { - DPR->traceback[len++]=k; - } - new_i+=M->model_properties[k][M->DELTA_I]*l; - - - if ( M->model_properties[k][M->DELTA_J]) - { - while ( next_k!=M->START && FABS((M->diag[j]-M->diag[new_j]))!=l)new_j+=M->model_properties[k][M->DELTA_J]; - } - - i=new_i; - j=new_j; - k=next_k; - } - DPR->len=len; - DPR->traceback[DPR->len++]=M->START; - invert_list_int (DPR->traceback,DPR->len); - DPR->traceback[DPR->len]=M->END; - - vfree (prev); - - return DPR; - - - } - - - -int ** evaluate_diagonals_cdna ( Alignment *B, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup) - { - int f1, f2, c; - int **diag; - char *s1, *s2; - int p1, p2; - int **tot_diag; - int n_tot_diag; - int l0, l1; - - - - - - - if ( ns[0]!=1 || ns[1]!=1) - { - fprintf ( stderr, "\nERROR 2 SEQUENCES ONLY [FATAL:%s", PROGRAM); - crash (""); - } - - - - - - l0=strlen ( B->seq_al[0]); - l1=strlen ( B->seq_al[3]); - n_tot_diag=(l0+l1-1); - - tot_diag=declare_int ( n_tot_diag+1, 2); - for ( c=0; c<= n_tot_diag; c++)tot_diag[c][0]=c; - - for (f1=0; f1< 3; f1++) - { - for ( f2=0; f2< 3; f2++) - { - s1=B->seq_al[f1]; - s2=B->seq_al[3+f2]; - - - p1=strlen (s1); - p2=strlen (s2); - - - diag=evaluate_diagonals_for_two_sequences( s1, s2, maximise,NULL,ktup); - for (c=1; c<=(p1+p2-1); c++) - { - tot_diag[diag[c][0]][1]+=diag[c][1]*diag[c][1]; - } - free_int (diag, -1); - - } - } - - - - sort_int (tot_diag+1, 2, 1,0, n_tot_diag-1); - - return tot_diag; - - } - - - - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_dp_clean_maln.c b/binaries/src/tcoffee/t_coffee_source/util_dp_clean_maln.c deleted file mode 100644 index 5ba495d..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_dp_clean_maln.c +++ /dev/null @@ -1,384 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" - -Alignment *clean_maln ( Alignment *A, Alignment *I, int T, int n_it) - { - Alignment *C=NULL; - int a, b; - int in_una, in_aln, in_gap, gap, una, aln; - int Sstart,Rstate, Sstate; - int n_segment=0; - int **segment_list; - - - add_warning ( stderr, "\nWARNING: -clean_aln is not supported anymore [PROGRAM:%s]\n", PROGRAM); - return A; - - - - /*Initialization*/ - a=0; - in_una=a++;in_gap=a++;in_aln=a++; aln=a++;gap=a++;una=a++; - segment_list=declare_int ( A->len_aln*A->nseq, 3); - - - /*1: Identify the segments*/ - C=copy_aln(A, C); - for ( a=0; a< A->nseq; a++) - { - Sstate=in_aln; - for ( b=0; blen_aln; b++) - { - if (is_gap(A->seq_al[a][b]))Rstate=gap; - else if ( I->seq_al[a][b]<=T){Rstate=una;} - else if ( I->seq_al[a][b]==NO_COLOR_RESIDUE)Rstate=una; - else Rstate=aln; - - if (Rstate==una)C->seq_al[a][b]='-'; - - if (Sstate==in_aln) - { - if ( Rstate==gap) - {Sstate=in_gap; - Sstart=b; - } - else if ( Rstate==una) - { - Sstate=in_una; - Sstart=b; - } - else if ( Rstate==aln) - Sstate=in_aln; - } - else if ( Sstate==in_gap) - { - if ( Rstate==gap); - else if ( Rstate==una)Sstate=in_una; - else if ( Rstate==aln)Sstate=in_aln; - } - else if ( Sstate==in_una) - { - if ( Rstate==gap); - else if ( Rstate==una); - else if ( Rstate==aln) - { - segment_list[n_segment][0]=a; - segment_list[n_segment][1]=Sstart; - segment_list[n_segment][2]=b-Sstart; - Sstate=in_aln; - n_segment++; - } - } - } - if (Sstate==in_una) - { - segment_list[n_segment][0]=a; - segment_list[n_segment][1]=Sstart; - segment_list[n_segment][2]=b-Sstart; - Sstate=in_aln; - n_segment++; - } - } - - /*2 Realign the segments*/ - - for ( b=0; b< n_it; b++) - { - for ( a=0; a< n_segment; a++) - { - HERE ("1"); - A=realign_segment ( segment_list[a][0], segment_list[a][1], segment_list[a][2], A, C); - - } - } - free_aln (C); - free_int ( segment_list, -1); - make_fast_generic_dp_pair_wise (NULL, NULL, NULL, NULL); - - return A; - } -Alignment *realign_segment (int seq, int start, int len,Alignment *A, Alignment *C) - { - Alignment *S1=NULL, *S2=NULL, *S3=NULL; - int *ns, **ls; - int a,b; - static Constraint_list *CL; - - - /*1 Prepare the Constraint list*/ - if ( !CL) - { - CL=vcalloc ( 1, sizeof (Constraint_list)); - CL->extend_jit=0; - CL->pw_parameters_set=1; - CL->M=read_matrice ("blosum62mt"); - CL->gop=-20; - CL->gep=-1; - CL->evaluate_residue_pair=evaluate_matrix_score; - sprintf ( CL->dp_mode, "myers_miller_pair_wise"); - } - - S1=copy_aln(A,S1); - S1=extract_aln (S1,0,start); - S2=copy_aln(A,S2); - S2=extract_aln (S2, start, start+len); - S3=copy_aln(A,S3); - S3=extract_aln (S3, start+len,A->len_aln); - - - /*for (a=0; anseq; a++){S2->order[a][1]=0;S2->order[a][0]=a;}*/ - - - ungap ( S2->seq_al[seq]); - CL->S=A->S;/*aln2seq(S2);*/ - /*3 Prepare Sequence Presentation*/ - ns=vcalloc (2, sizeof (int)); - ls=declare_int (2,S2->nseq); - - ns[0]=A->nseq-1; - for ( a=0,b=0; a< S2->nseq; a++)if (a!=seq)ls[0][b++]=a; - ns[1]=1; - ls[1][0]=seq; - - pair_wise (S2, ns, ls, CL); - - A=realloc_aln (A, strlen (S1->seq_al[0])+ strlen (S2->seq_al[0])+ strlen (S3->seq_al[0])+1); - for ( a=0; a< A->nseq; a++) - { - sprintf ( A->seq_al[a], "%s%s%s", S1->seq_al[a], S2->seq_al[a], S3->seq_al[a]); - } - - free_aln (S1); - free_aln (S2); - free_aln (S3); - vfree(ns);free_int(ls, -1); - - return A; - } -Alignment *realign_segment_old (int seq, int start, int len,Alignment *A, Alignment *C) - { - Alignment *S=NULL; - int *ns, **ls; - char *sub_seq; - static Dp_Model *M=NULL; - static Constraint_list *CL=NULL; - Dp_Result *R=NULL; - int a,b, c; - - - /*1 Prepare the Constraint list*/ - if ( !CL) - { - CL=vcalloc ( 1, sizeof (Constraint_list)); - CL->extend_jit=0; - CL->pw_parameters_set=1; - CL->M=read_matrice ("blosum62mt"); - CL->gop=-20; - CL->gep=-1; - CL->evaluate_residue_pair=evaluate_matrix_score; - - } - S=copy_aln(C,S); - S=extract_aln (S, start, start+len); - S->len_aln=strlen(S->seq_al[0]); - sub_seq=extract_char (A->seq_al[seq], start, len); - - ungap(sub_seq); - - sprintf ( S->seq_al[seq],"%s", sub_seq); - CL->S=aln2seq(S); - - - - /*2 Prepare the Model*/ - M=initialize_seg2prf_model((start==0)?2:0,(start+len==A->len_aln)?2:0,CL); - M->diag=vcalloc ( 2*len+1, sizeof (int)); - M->diag[0]=len+strlen (sub_seq)-1; - for ( a=1; a<=M->diag[0]; a++)M->diag[a]=a; - - /*3 Prepare Sequence Presentation*/ - ns=vcalloc (2, sizeof (int)); - ls=declare_int (2,A->nseq); - - ns[0]=A->nseq-1; - for ( a=0,b=0; a< A->nseq; a++)if (a!=seq)ls[0][b++]=a; - ns[1]=1; - ls[1][0]=seq; - - if ( strlen (sub_seq)!=len) - { - - - R=make_fast_generic_dp_pair_wise(S, ns, ls, M); - - for (c=0, b=1,a=start; a< start+len; b++,a++) - { - if (R->traceback[b]==0) - { - A->seq_al[seq][a]=sub_seq[c]; - C->seq_al[seq][a]=sub_seq[c]; - c++; - } - else - { - A->seq_al[seq][a]='-'; - C->seq_al[seq][a]='-'; - } - } - } - - free_dp_model (M); - free_aln (S); - free_dp_result (R); - vfree(sub_seq); - vfree(ns); - free_int (ls, -1); - free_sequence (CL->S, (CL->S)->nseq); - - - return A; - } - -Dp_Model * initialize_seg2prf_model(int left_tg_mode, int right_tg_mode, Constraint_list *CL) - { - - Dp_Model *M; - int a, b, c,d; - - M=vcalloc ( 1, sizeof (Dp_Model)); - M->nstate=2; - M->START=M->nstate++; - M->END =M->nstate++; - - M->TG_MODE=1; - M->F_TG_MODE=0; - M->gop=CL->gop*SCORE_K; - M->gep=CL->gep*SCORE_K; - - M->bounded_model=declare_int (M->nstate+1, M->nstate+1); - M->model=declare_int (M->nstate+1, M->nstate+1); - for ( a=0; a<=M->nstate; a++) - for ( b=0; b<= M->nstate; b++) - M->model[a][b]=UNDEFINED; - - a=0; - M->TYPE=a++;M->LEN_I=a++; M->LEN_J=a++; M->DELTA_I=a++;M->DELTA_J=a++; M->CODING0=a++;M->DELETION=a++; - M->model_properties=declare_int ( M->nstate, 10); - - a=0; - M->EMISSION=a++;M->TERM_EMISSION=a++;M->START_EMISSION=a++; - M->model_emission_function=vcalloc(M->nstate, sizeof (int (**)(Alignment*, int **, int, int*, int, int **, int, int*, int, struct Constraint_list *))); - for ( a=0; a< M->nstate; a++) - M->model_emission_function[a]=vcalloc(3, sizeof (int (*)(Alignment*, int **, int, int*, int, int **, int, int*, int, struct Constraint_list *))); - - - /*Substitution*/ - M->model_properties[0][M->TYPE]=M->CODING0; - M->model_properties[0][M->LEN_I]=1; - M->model_properties[0][M->LEN_J]=1; - M->model_properties[0][M->DELTA_I]=-1; - M->model_properties[0][M->DELTA_J]= 0; - - M->model_emission_function[0][M->EMISSION] =cw_profile_get_dp_cost; - M->model_emission_function[0][M->START_EMISSION]=get_start_gep_cost; - M->model_emission_function[0][M->TERM_EMISSION] =get_start_gep_cost; - - /*Deletions*/ - M->model_properties[1][M->TYPE]=M->DELETION; - M->model_properties[1][M->LEN_I]=1; - M->model_properties[1][M->LEN_J]=0; - M->model_properties[1][M->DELTA_I]=-1; - M->model_properties[1][M->DELTA_J]=+1; - M->model_emission_function[1][M->EMISSION]=get_gep_cost; - - if (left_tg_mode ==2) - M->model_emission_function[1][M->START_EMISSION]=get_start_gep_cost; - else M->model_emission_function[1][M->START_EMISSION]=get_gep_cost; - - if (right_tg_mode ==2) - M->model_emission_function[1][M->TERM_EMISSION]=get_term_gep_cost; - else M->model_emission_function[1][M->TERM_EMISSION]=get_gep_cost; - - /*Transitions*/ - M->model[0][M->END]=M->model[M->START][0]=ALLOWED; - M->model[0][1]=M->gop; - M->model[0][0]=ALLOWED; - - M->model[1][M->END]= (right_tg_mode==0)?0:-M->gop; - M->model[M->START][1]=( left_tg_mode==0)?M->gop:0; - M->model[1][1]=ALLOWED; - M->model[1][0]=ALLOWED; - - - - - /*Prune the model*/ - - for (c=0,a=0, d=0; a< M->START; a++) - for ( b=0; bSTART; b++, d++) - { - if (M->model[a][b]!=UNDEFINED) - { - M->bounded_model[b][1+M->bounded_model[b][0]++]=a; - c++; - } - } - M->CL=CL; - - return M; - } - -int get_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - return CL->gep*SCORE_K; -} - -int get_start_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - return 0; -} -int get_term_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - return CL->gep*SCORE_K*-1; -} - - - - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_dp_drivers.c b/binaries/src/tcoffee/t_coffee_source/util_dp_drivers.c deleted file mode 100644 index 37d4356..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_dp_drivers.c +++ /dev/null @@ -1,4749 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" - -int count_threshold_nodes (Alignment *A, NT_node P, int t); -int set_node_score (Alignment *A, NT_node P, char *mode); -char *split_nodes_nseq (Alignment *A, NT_node P, int max_nseq, char *list); -char *split_nodes_idmax (Alignment *A, NT_node P, int max_id, char *list); -/******************************************************************/ -/* MAIN DRIVER */ -/* */ -/* */ -/******************************************************************/ - - -Constraint_list *profile2list (Job_TC *job, int nprf) -{ - int *seqlist, *cache1, *cache2; - - static CLIST_TYPE *entry; - Alignment *A1, *A2, *A; - int a, b, c; - Constraint_list *SCL, *SCL2; - static int *L; - static int max_L_len; - int max, n_pairs; - int **score, n, s1, s2, si, r1, r2; - char *seqlistb; - int debug=0; - int cons, cons_thres, max_n_pairs, tot_n_pairs; - int *cons_table; - Constraint_list *CL; - char *command; - char *seq; - char *weight; - TC_method *M; - int *iA1, *iA2; - static char *buf1; - - if ( !buf1) buf1=vcalloc (1000, sizeof (char)); - - /*initialize the structure*/ - CL=(job->io)->CL; - M=(job->param)->TCM; - command=M->executable; - weight=M->weight; - seq=(job->param)->seq_c; - - debug=(getenv("DEBUG_TCOFFEE_profile2list")!=NULL)?1:0; - - - if ( debug)print_mem_usage (stderr, "IN"); - seqlistb=vcalloc (10, sizeof (char)); - - seqlist=string2num_list (seq)+1; - - - - if (!entry)entry=vcalloc (CL->entry_len, sizeof (int)); - entry[SEQ1]=seqlist[1]; - entry[SEQ2]=seqlist[2]; - - - A1=seq2profile(CL->S, seqlist[1]); - A2=seq2profile(CL->S, seqlist[2]); - - SCL=copy_constraint_list ( CL, SOFT_COPY); - - SCL->L=L;SCL->max_L_len=max_L_len; - SCL->fp=NULL; - SCL->M=NULL; - SCL->ne=0; - - SCL2=copy_constraint_list ( CL, SOFT_COPY); - SCL2->L=L;SCL2->max_L_len=max_L_len; - SCL2->fp=NULL; - SCL2->M=NULL; - SCL2->ne=0; - /*Merge Sequences*/ - - SCL->S=merge_seq (A1->S, NULL ); - SCL->S=merge_seq (A2->S, SCL->S); - - /*1: Compare the two profiles and identify the N master pairs*/ - n=(A1->nseq*A2->nseq); - max=(nprf==0 ||method_uses_structure (M))?n:MIN(nprf,n); - - if ( maxnseq*A2->nseq,3); - - for (n=0,a=0; anseq; a++) - for ( b=0; bnseq; b++,n++) - { - score[n][0]=a; - score[n][1]=b; - score[n][2]=(int)get_seq_fsim ( A->seq_al[a], A->seq_al[b+A1->nseq], "-", NOGROUP, NOMATRIX, AVERAGE_POSITIONS); - } - free_aln (A); - sort_int_inv (score,3,2,0, n-1); - } - else - { - score=declare_int (n,3); - for (n=0,a=0; anseq; a++) - for ( b=0; bnseq; b++,n++) - { - score[n][0]=a; - score[n][1]=b; - } - } - - iA1=get_name_index (A1->name,A1->nseq, (SCL->S)->name, (SCL->S)->nseq); - iA2=get_name_index (A2->name,A2->nseq, (SCL->S)->name, (SCL->S)->nseq); - - /*submit the N pairs*/ - for ( n_pairs=1,a=0; aseq_c=%s",buf1); - - /*2: Compute the pairewise library*/ - (job->io)->CL=SCL; - - SCL=seq2list (job); - /*Unwind the pointer counter:*/job->np--; - - if (debug)fprintf ( stderr, "\n\tProfile aln %s %s %s [(%d,%d):%d %%id]", (SCL->S)->name[iA1[s1]],(SCL->S)->name[iA2[s2]], command, score[a][0], score[a][1],score[a][2]); - - - /*3: Update the main library with the pairwise library*/ - cache1=seq2inv_pos (A1->seq_al[s1]); - cache2=seq2inv_pos (A2->seq_al[s2]); - - - if (debug)fprintf ( stderr, " =>%d pairs", SCL->ne); - n_pairs+=(SCL->ne>0)?1:0; - for (c=0; c< SCL->ne; c++) - { - si=vread_clist (SCL, c, SEQ1); - r1=vread_clist(SCL, c, (si==iA1[s1])?R1:R2); - r2=vread_clist(SCL, c, (si==iA1[s1])?R2:R1); - - entry[R1]=cache1[r1]; - entry[R2]=cache2[r2]; - - entry[WE]=vread_clist(SCL,c,WE); - entry[CONS]=1; - add_entry2list(entry, SCL2); - } - - SCL->ne=0; - vfree ( cache1);vfree ( cache2); - compact_list (SCL2, 0, SCL2->ne, "default"); - if (debug)fprintf ( stderr, " =>%d pairs", SCL2->ne); - - } - - free_sequence (SCL->S,-1); - vfree (iA1); vfree (iA2); - - if (debug)fprintf ( stderr, "\nNPairs=%d", n_pairs); - - - compact_list (SCL2, 0, SCL2->ne, "default"); - /*get the concistency distribution*/ - cons_table=vcalloc ( 101, sizeof (int)); - for (c=0; c< SCL2->ne; c++) - { - entry[R1]=vread_clist(SCL2,c,R1); - entry[R2]=vread_clist(SCL2,c,R2); - entry[WE]=vread_clist(SCL2,c,WE); - entry[CONS]=vread_clist(SCL2,c,CONS); - cons=(entry[CONS]*100)/n_pairs; - cons_table[cons]++; - } - - /*Identify the threshold*/ - max_n_pairs=(int)((float)(MIN(A1->len_aln, A2->len_aln)*2)); - - for (cons_thres=0,tot_n_pairs=0,c=100; c>=0; c--) - { - - tot_n_pairs+=cons_table[c]; - if ( tot_n_pairs>=max_n_pairs){cons_thres=c;c=-1;} - } - vfree (cons_table); - - /*Produce the library*/ - for (c=0; c< SCL2->ne; c++) - { - entry[R1]=vread_clist(SCL2,c,R1); - entry[R2]=vread_clist(SCL2,c,R2); - entry[WE]=vread_clist(SCL2,c,WE); - entry[CONS]=vread_clist(SCL2,c,CONS); - entry[WE]/=entry[CONS]; - cons=(entry[CONS]*100)/n_pairs; - - - if (cons>=cons_thres)add_entry2list(entry, CL); - } - - if ( !seq2R_template_profile (CL->S,seqlist[1]))free_aln (A1); - if ( !seq2R_template_profile (CL->S,seqlist[2]))free_aln (A2); - - vfree(seqlistb); - vfree(seqlist-1) ; - vfree (SCL->L);free_constraint_list ( SCL); - - - if (debug)print_mem_usage (stderr, "BEF FREE SCL2"); - vfree(SCL2->L);free_constraint_list ( SCL2); - - if (debug) - { - - fprintf ( stderr, "\nCL->ne=%d", CL->ne); - print_mem_usage (stderr, "OUT"); - } - - (job->io)->CL=CL; - return CL; -} - -int method_uses_structure(TC_method *M) -{ - if ( strchr (M->seq_type, 'P'))return 1; - else return 0; -} -int method_uses_profile(TC_method *M) -{ - if ( strchr (M->seq_type, 'R'))return 1; - else return 0; -} - - -Constraint_list *seq2list ( Job_TC *job) - { - char *mode; - Alignment *A=NULL; - Constraint_list *PW_CL; - Constraint_list *RCL=NULL; - - int full_prf, nprf; - int *seqlist; - - - - - static char *s1, *s2; - Sequence *S, *STL; - Constraint_list *CL; - - - char *seq; - char *weight; - TC_method *M; - - - M=(job->param)->TCM; - mode=M->executable; - weight=M->weight; - - PW_CL=M->PW_CL; - - CL=(job->io)->CL; - seq=(job->param)->seq_c; - - S=(CL)?CL->S:NULL; - STL=(CL)?CL->STRUC_LIST:NULL; - - - - - seqlist=string2num_list (seq)+1; - - if (!s1)s1=vcalloc ( MAXNAMES+1, sizeof (char)); - if (!s2)s2=vcalloc ( MAXNAMES+1, sizeof (char)); - - - sprintf (s1, "%s", (CL->S)->name[seqlist[1]]); - sprintf (s2, "%s", (CL->S)->name[seqlist[2]]); - -/*Proteins*/ - - - if ( strncmp (CL->profile_comparison, "full", 4)==0) - { - full_prf=1; - if ( CL->profile_comparison[4])nprf=atoi ( CL->profile_comparison+4); - else - nprf=0; - } - else - { - full_prf=0; - } - - if ((method_uses_structure (M)) && profile2P_template_file (CL->S, seqlist[1]) && profile2P_template_file (CL->S, seqlist[2])) - { - RCL=profile2list (job, nprf); - } - else if ( strm (mode, "ktup_msa")) - { - RCL=hasch2constraint_list (CL->S, CL); - } - else if ( strm (mode, "test_pair") || strm ( mode,"fast_pair") || strm (mode, "ifast_pair") \ - || strm ( mode, "diag_fast_pair")|| strm (mode, "idiag_fast_pair")\ - || strm ( mode, "blast_pair") || strm (mode, "lalign_blast_pair") \ - || strm ( mode, "viterbi_pair") || strm (mode, "slow_pair") || strm(mode, "glocal_pair") || strm (mode, "biphasic_pair") \ - || strm ( mode, "islow_pair") || strm (mode, "tm_slow_pair") || strm (mode, "r_slow_pair") \ - || strm ( mode, "lalign_id_pair")|| strm (mode, "tm_lalign_id_pair") || strm (mode , "lalign_len_pair") \ - || strm (mode, "prrp_aln") || strm ( mode, "test_pair") \ - || strm (mode, "cdna_fast_pair") || strm (mode, "diaa_slow_pair") || strm (mode, "monoaa_slow_pair")\ - || strncmp (mode,"cdna_fast_pair",14)==0 \ - ) - { - - A=fast_pair (job); - RCL=aln2constraint_list ((A->A)?A->A:A, CL,weight); - } - - else if ( strm ( mode, "subop1_pair") || strm ( mode, "subop2_pair") ) - { - A=fast_pair (job); - RCL=A->CL; - - } - else if ( strm ( mode, "proba_pair") ) - { - A=fast_pair (job); - RCL=A->CL; - } - else if ( strm ( mode, "best_pair4prot")) - { - RCL=best_pair4prot (job); - } - else if ( strm ( mode, "best_pair4rna")) - { - RCL=best_pair4rna (job); - } - else if ( strm ( mode, "exon2_pair")) - { - - char weight2[1000]; - - A=fast_pair (job); - sprintf ( weight2, "%s_subset_objOBJ-",weight); - RCL=aln2constraint_list (A, CL,weight2); - } - else if ( strm ( mode, "exon_pair")) - { - - A=fast_pair (job); - RCL=aln2constraint_list (A, CL,weight); - - } - else if ( strm ( mode, "exon3_pair")) - { - - char weight2[1000]; - - A=fast_pair (job); - sprintf ( weight2, "%s_subset_objOBJ-",weight); - RCL=aln2constraint_list (A, CL,weight2); - } - -/*STRUCTURAL METHODS*/ - - else if ( strm (mode, "seq_msa")) - { - RCL=seq_msa(M, seq, CL); - } -/*STRUCTURAL METHODS*/ - else if (strm (mode, "profile_pair") || strm (mode, "hh_pair")) - { - RCL=profile_pair (M, seq, CL); - } - - else if ( strm (mode, "sap_pair")) - { - RCL=sap_pair (seq, weight, CL); - } - else if ( strm (mode, "thread_pair")) - { - RCL=thread_pair (M,seq, CL); - } - else if ( strm (mode, "pdb_pair")) - { - RCL=pdb_pair (M,seq, CL); - } - else if (strm (mode, "rna_pair")) - { - RCL=rna_pair(M, seq, CL); - } - else if ( strm (mode, "pdbid_pair")) - { - RCL=pdbid_pair (M,seq, CL); - } - else if ( strm (mode, "fugue_pair")) - { - RCL=thread_pair (M,seq, CL); - } - else if ( strm (mode, "lsqman_pair")) - { - RCL=lsqman_pair(seq, CL); - } - else if ( strm ( mode, "align_pdb_pair")) - { - RCL=align_pdb_pair ( seq,"gotoh_pair_wise", CL->align_pdb_hasch_mode,CL->align_pdb_param_file,CL, job); - } - else if ( strm ( mode, "lalign_pdb_pair")) - { - RCL=align_pdb_pair ( seq,"sim_pair_wise_lalign", CL->align_pdb_hasch_mode,CL->align_pdb_param_file,CL, job); - } - else if ( strm ( mode, "align_pdb_pair_2")) - { - RCL=align_pdb_pair_2 ( seq, CL); - } - else - { - fprintf ( CL->local_stderr, "\nERROR: THE FUNCTION %s DOES NOT EXIST [FATAL:%s]\n", mode, PROGRAM);crash(""); - } - add_method_output2method_log (NULL,NULL, (A&&A->len_aln)?A:NULL,RCL, NULL); - RCL=(RCL==NULL)?CL:RCL; - - - vfree ( seqlist-1); - free_aln (A); - return RCL; - } - -Constraint_list *method2pw_cl (TC_method *M, Constraint_list *CL) - { - char *mode; - Constraint_list *PW_CL=NULL; - Sequence *S; - char mat[100], *m; - char group_mat[100]; - - - - mode=M->executable; - PW_CL=copy_constraint_list ( CL, SOFT_COPY); - PW_CL->pw_parameters_set=1; - - - - S=(PW_CL)?PW_CL->S:NULL; - - /*DNA or Protein*/ - m=PW_CL->method_matrix; - if ( strm ((PW_CL->S)->type, "PROTEIN")) - { - - sprintf ( mat, "%s", (strm(m, "default"))?"blosum62mt":m); - sprintf (group_mat, "vasiliky"); - PW_CL->ktup=2; - - } - else if ( strm ((PW_CL->S)->type, "DNA") || strm ((PW_CL->S)->type, "RNA") ) - { - - sprintf(group_mat, "idmat"); - sprintf ( mat, "%s", (strm(m, "default"))?"dna_idmat":m); - PW_CL->ktup=5; - - } - if ( M->matrix[0])sprintf ( mat, "%s", M->matrix); - - PW_CL->M=read_matrice (mat); - - - if ( M->gop!=UNDEFINED) {PW_CL->gop=M->gop;} - else - { - PW_CL->gop= get_avg_matrix_mm (PW_CL->M, AA_ALPHABET)*10; - } - - if ( M->gep!=UNDEFINED)PW_CL->gep=M->gep; - else PW_CL->gep=-1; - - - - if ( strm2 ( mode,"fast_pair", "ifast_pair")) - { - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->use_fragments=0; - if ( !PW_CL->use_fragments)PW_CL->diagonal_threshold=0; - else PW_CL->diagonal_threshold=6; - - sprintf (PW_CL->dp_mode, "fasta_pair_wise"); - sprintf (PW_CL->matrix_for_aa_group, group_mat); - - if ( strm ( mode, "fast_pair")) - { - PW_CL->L=NULL; - - PW_CL->get_dp_cost=slow_get_dp_cost; - PW_CL->evaluate_residue_pair=evaluate_matrix_score; - PW_CL->extend_jit=0; - } - } - else if ( strm2 ( mode,"diag_fast_pair","idiag_fast_pair")) - { - PW_CL->L=NULL; - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->S=CL->S; - - PW_CL->use_fragments=1; - PW_CL->diagonal_threshold=3; - - sprintf (PW_CL->dp_mode, "fasta_pair_wise"); - PW_CL->ktup=1; - sprintf (PW_CL->matrix_for_aa_group, group_mat); - - PW_CL->get_dp_cost=slow_get_dp_cost; - PW_CL->evaluate_residue_pair=evaluate_matrix_score; - - PW_CL->extend_jit=0; - } - else if ( strm ( mode,"blast_pair")) - { - PW_CL->L=NULL; - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - - PW_CL->use_fragments=0; - - PW_CL->pair_wise=gotoh_pair_wise; - PW_CL->evaluate_residue_pair=evaluate_blast_profile_score; - sprintf (PW_CL->matrix_for_aa_group, group_mat); - PW_CL->extend_jit=0; - } - else if ( strm ( mode,"lalign_blast_pair")) - { - PW_CL->L=NULL; - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - - PW_CL->use_fragments=0; - PW_CL->pair_wise=sim_pair_wise_lalign; - PW_CL->evaluate_residue_pair=evaluate_blast_profile_score; - PW_CL->lalign_n_top=10; - - sprintf (PW_CL->matrix_for_aa_group, group_mat); - PW_CL->extend_jit=0; - } - else if ( strm ( mode,"viterbi_pair")) - { - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->use_fragments=0; - sprintf (PW_CL->dp_mode, "viterbi_pair_wise"); - PW_CL->L=NULL; - PW_CL->get_dp_cost=slow_get_dp_cost; - PW_CL->evaluate_residue_pair=evaluate_matrix_score; - PW_CL->extend_jit=0; - } - else if ( strm ( mode,"glocal_pair")) - { - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->use_fragments=0; - sprintf (PW_CL->dp_mode, "glocal_pair_wise"); - sprintf (PW_CL->matrix_for_aa_group, group_mat); - - PW_CL->L=NULL; - PW_CL->get_dp_cost=slow_get_dp_cost; - PW_CL->evaluate_residue_pair=evaluate_matrix_score; - PW_CL->extend_jit=0; - } - else if ( strm ( mode,"test_pair")) - { - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->use_fragments=0; - sprintf (PW_CL->dp_mode, "test_pair_wise"); - } - else if ( strm ( mode,"sticky_pair")) - { - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->use_fragments=0; - PW_CL->L=NULL; - PW_CL->get_dp_cost=cw_profile_get_dp_cost; - PW_CL->evaluate_residue_pair=evaluate_matrix_score; - PW_CL->extend_jit=0; - sprintf (PW_CL->dp_mode, "gotoh_pair_wise_lgp_sticky"); - sprintf (PW_CL->matrix_for_aa_group, group_mat); - } - - else if ( strm ( mode,"slow_pair")|| strm (mode, "islow_pair" ) ) - { - - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->use_fragments=0; - sprintf (PW_CL->dp_mode, "myers_miller_pair_wise"); - sprintf (PW_CL->matrix_for_aa_group, group_mat); - - if ( strm ( "islow_pair", mode)) - { - PW_CL->get_dp_cost=slow_get_dp_cost; - PW_CL->evaluate_residue_pair=residue_pair_extended_list; - PW_CL->extend_jit=1; - } - else if ( strm ("slow_pair", mode) ) - { - PW_CL->L=NULL; - PW_CL->get_dp_cost=cw_profile_get_dp_cost; - PW_CL->evaluate_residue_pair=evaluate_matrix_score; - PW_CL->extend_jit=0; - } - } - else if ( strm (mode, "subop1_pair")) - { - - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->use_fragments=0; - sprintf (PW_CL->dp_mode, "subop1_pair_wise"); - sprintf (PW_CL->matrix_for_aa_group, group_mat); - PW_CL->L=NULL; - PW_CL->get_dp_cost=slow_get_dp_cost; - PW_CL->evaluate_residue_pair=evaluate_matrix_score; - PW_CL->extend_jit=0; - } - else if ( strm (mode, "biphasic_pair")) - { - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->use_fragments=0; - sprintf (PW_CL->dp_mode, "biphasic_pair_wise"); - sprintf (PW_CL->matrix_for_aa_group, group_mat); - PW_CL->L=NULL; - PW_CL->get_dp_cost=cw_profile_get_dp_cost; - PW_CL->evaluate_residue_pair=evaluate_matrix_score; - PW_CL->extend_jit=0; - } - else if ( strm (mode, "proba_pair")) - { - - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->use_fragments=0; - sprintf (PW_CL->dp_mode, "proba_pair_wise"); - sprintf (PW_CL->matrix_for_aa_group, group_mat); - PW_CL->L=NULL; - PW_CL->get_dp_cost=slow_get_dp_cost; - PW_CL->evaluate_residue_pair=evaluate_matrix_score; - PW_CL->extend_jit=0; - } - - else if ( strm (mode, "diaa_slow_pair")) - { - - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->use_fragments=0; - sprintf (PW_CL->dp_mode, "gotoh_pair_wise_lgp"); - sprintf (PW_CL->matrix_for_aa_group, group_mat); - PW_CL->L=NULL; - PW_CL->get_dp_cost=slow_get_dp_cost; - PW_CL->evaluate_residue_pair=evaluate_diaa_matrix_score; - PW_CL->extend_jit=0; - } - else if ( strm (mode, "r_slow_pair")) - { - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->use_fragments=0; - sprintf (PW_CL->dp_mode, "gotoh_pair_wise_lgp"); - sprintf (PW_CL->matrix_for_aa_group, group_mat); - PW_CL->L=NULL; - PW_CL->get_dp_cost=slow_get_dp_cost; - PW_CL->evaluate_residue_pair=evaluate_matrix_score; - PW_CL->extend_jit=0; - PW_CL->reverse_seq=1; - } - else if ( strm (mode, "tm_slow_pair")) - { - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->use_fragments=0; - sprintf (PW_CL->dp_mode, "myers_miller_pair_wise"); - sprintf (PW_CL->matrix_for_aa_group, group_mat); - PW_CL->L=NULL; - PW_CL->get_dp_cost=slow_get_dp_cost; - PW_CL->evaluate_residue_pair=evaluate_tm_matrix_score; - PW_CL->extend_jit=0; - } - else if ( strm (mode, "monoaa_slow_pair")) - { - - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->use_fragments=0; - sprintf (PW_CL->dp_mode, "gotoh_pair_wise_lgp"); - sprintf (PW_CL->matrix_for_aa_group, group_mat); - PW_CL->L=NULL; - PW_CL->get_dp_cost=slow_get_dp_cost; - PW_CL->evaluate_residue_pair=evaluate_monoaa_matrix_score; - PW_CL->extend_jit=0; - } - else if ( strm (mode, "subop2_pair")) - { - - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->use_fragments=0; - sprintf (PW_CL->dp_mode, "subop2_pair_wise"); - sprintf (PW_CL->matrix_for_aa_group, group_mat); - PW_CL->L=NULL; - PW_CL->get_dp_cost=slow_get_dp_cost; - PW_CL->evaluate_residue_pair=evaluate_matrix_score; - PW_CL->extend_jit=0; - } - - else if (strm ( mode, "exon2_pair")) - { - int a; - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - - PW_CL->use_fragments=0; - sprintf (PW_CL->dp_mode, "myers_miller_pair_wise"); - sprintf (PW_CL->matrix_for_aa_group, group_mat); - PW_CL->L=NULL; - - for ( a=0; a<60; a++) - { - PW_CL->M['x'-'A'][a]=0; - PW_CL->M[a]['x'-'A']=0; - PW_CL->M['X'-'A'][a]=0; - PW_CL->M[a]['X'-'A']=0; - } - PW_CL->evaluate_residue_pair=evaluate_matrix_score; - PW_CL->extend_jit=0; - } - else if (strm ( mode, "exon3_pair")) - { - int a; - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - - PW_CL->use_fragments=0; - sprintf (PW_CL->dp_mode, "myers_miller_pair_wise"); - sprintf (PW_CL->matrix_for_aa_group, group_mat); - PW_CL->L=NULL; - - for ( a=0; a<60; a++) - { - PW_CL->M['x'-'A'][a]=0; - PW_CL->M[a]['x'-'A']=0; - PW_CL->M['X'-'A'][a]=0; - PW_CL->M[a]['X'-'A']=0; - } - PW_CL->evaluate_residue_pair=evaluate_matrix_score; - PW_CL->extend_jit=0; - } - else if (strm ( mode, "exon_pair")) - { - int a; - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - - PW_CL->use_fragments=0; - sprintf (PW_CL->dp_mode, "myers_miller_pair_wise"); - sprintf (PW_CL->matrix_for_aa_group, group_mat); - PW_CL->L=NULL; - - for ( a=0; a<60; a++) - { - PW_CL->M['x'-'A'][a]=0; - PW_CL->M[a]['x'-'A']=0; - PW_CL->M['X'-'A'][a]=0; - PW_CL->M[a]['X'-'A']=0; - } - PW_CL->evaluate_residue_pair=evaluate_matrix_score; - PW_CL->extend_jit=0; - } - else if ( strm ( mode , "lalign_len_pair")) - { - PW_CL->L=NULL; - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->use_fragments=0; - PW_CL->pair_wise=sim_pair_wise_lalign; - PW_CL->evaluate_residue_pair=evaluate_matrix_score; - PW_CL->get_dp_cost=slow_get_dp_cost; - PW_CL->lalign_n_top=CL->lalign_n_top; - sprintf (PW_CL->matrix_for_aa_group, group_mat); - PW_CL->extend_jit=0; - } - else if ( strm ( mode , "lalign_id_pair")) - { - PW_CL->L=NULL; - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->use_fragments=0; - PW_CL->pair_wise=sim_pair_wise_lalign; - PW_CL->evaluate_residue_pair=evaluate_matrix_score; - PW_CL->get_dp_cost=slow_get_dp_cost; - PW_CL->lalign_n_top=CL->lalign_n_top; - sprintf (PW_CL->matrix_for_aa_group, group_mat); - PW_CL->extend_jit=0; - } - else if ( strm ( mode , "tm_lalign_id_pair")) - { - PW_CL->L=NULL; - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->use_fragments=0; - PW_CL->pair_wise=sim_pair_wise_lalign; - PW_CL->evaluate_residue_pair=evaluate_tm_matrix_score; - PW_CL->get_dp_cost=slow_get_dp_cost; - PW_CL->lalign_n_top=CL->lalign_n_top; - sprintf (PW_CL->matrix_for_aa_group, group_mat); - PW_CL->extend_jit=0; - } -/*CDNA*/ - else if ( strm ( mode, "cdna_cfast_pair")) - { - PW_CL->L=NULL; - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->S=CL->S; - PW_CL->use_fragments=0; - sprintf (PW_CL->dp_mode, "cfasta_cdna_pair_wise"); - - PW_CL->M=read_matrice (strcpy ( mat, "blosum62mt")); - PW_CL->extend_jit=0; - - PW_CL->f_gop=CL->f_gop; - PW_CL->f_gep=CL->f_gep; - PW_CL->get_dp_cost=get_dp_cost; - PW_CL->evaluate_residue_pair=evaluate_cdna_matrix_score; - PW_CL->ktup=1; - } - else if ( strm ( mode, "cdna_fast_pair") || strncmp (mode,"cdna_fast_pair",14)==0) - { - - PW_CL->L=NULL; - PW_CL->maximise=1; - PW_CL->TG_MODE=1; - PW_CL->use_fragments=0; - sprintf (PW_CL->dp_mode, "fasta_cdna_pair_wise"); - - PW_CL->extend_jit=0; - PW_CL->gop=-5; - PW_CL->gep=-1; - PW_CL->f_gop=-15; - PW_CL->f_gep=0; - - PW_CL->get_dp_cost=get_dp_cost; - PW_CL->evaluate_residue_pair=evaluate_cdna_matrix_score; - PW_CL->ktup=1; - } - else - { - free_constraint_list (PW_CL); - PW_CL=NULL; - } - - - if (!strm (CL->method_evaluate_mode, "default")) - { - choose_extension_mode (CL->method_evaluate_mode, PW_CL); - } - return PW_CL; - } -/******************************************************************/ -/* MULTIPLE ALIGNMENTS */ -/* */ -/* */ -/******************************************************************/ -Alignment * compute_prrp_aln (Alignment *A, Constraint_list *CL) - { - char *tmpseq=NULL; - char *tmpaln=NULL; - char command[10000]; - Sequence *S; - - tmpseq=vtmpnam(NULL); - tmpaln=vtmpnam(NULL); - - - A=seq2aln ( CL->S, A, 1); - output_gotoh_seq (tmpseq, A); - sprintf ( command, "prrp -E/dev/null -o%s -F9 %s >/dev/null", tmpaln, tmpseq); - my_system (command); - if (!check_file_exists(tmpaln)){return NULL;} - S=get_fasta_sequence (tmpaln, NULL); - - S->contains_gap=0; - A=seq2aln(S, A,0); - free_sequence (S, S->nseq); - - return A; - } - -Alignment *seq2clustalw_aln (Sequence *S) -{ - return aln2clustalw_aln (seq2aln (S, NULL,RM_GAP), NULL); -} - -Alignment * aln2clustalw_aln (Alignment *B, Constraint_list *CL) -{ - char *seq=NULL,*aln=NULL, command[1000]; - - output_fasta_seq (seq=vtmpnam (NULL), B); - sprintf ( command, "clustalw -infile=%s -outorder=input -outfile=%s %s", seq, aln=vtmpnam (NULL), TO_NULL_DEVICE); - my_system (command); - - if (!check_file_exists(aln)) - return NULL; - else{B->nseq=0;return main_read_aln(aln,B);} -} -Alignment * compute_tcoffee_aln_quick (Alignment *A, Constraint_list *CL) - { - char *tmpseq=NULL; - char *tmpaln=NULL; - char command[10000]; - - - tmpseq=vtmpnam(NULL); - tmpaln=vtmpnam(NULL); - - if ( CL)A=seq2aln ( CL->S, A, 1); - output_fasta_seq (tmpseq, A); - - sprintf ( command, "t_coffee -seq %s -very_fast -outfile %s -quiet ",tmpseq,tmpaln); - - my_system (command); - if (!check_file_exists(tmpaln))return NULL; - A->nseq=0; - A=main_read_aln(tmpaln,A); - - vremove( tmpseq); - vremove (tmpaln); - return A; - } - -Alignment * compute_clustalw_aln (Alignment *A, Constraint_list *CL) - { - char *tmpseq=NULL; - char *tmpaln=NULL; - char command[10000]; - - - tmpseq=vtmpnam(NULL); - tmpaln=vtmpnam(NULL); - - A=seq2aln ( CL->S, A, 1); - output_fasta_seq (tmpseq, A); - - sprintf ( command, "clustalw %sinfile=%s %soutfile=%s %s",CWF, tmpseq,CWF, tmpaln,TO_NULL_DEVICE); - - my_system (command); - if (!check_file_exists(tmpaln))return NULL; - A->nseq=0; - A=main_read_aln(tmpaln,A); - - vremove( tmpseq); - vremove (tmpaln); - return A; - } - -Alignment * realign_block ( Alignment *A, int col1, int col2, char *pg) -{ - /*Uses pg: (pg -infile= -outfile= to realign the block [col1 col2[ - Only guaranteed if pg can handle empty sequences - set pg to NULL to use the default program - */ - - - Alignment *L, *M, *R; - char *seq_name; - char *aln_name; - char command[1000], script[1000]; - - - - seq_name=vtmpnam(NULL); - aln_name=vtmpnam (NULL); - - L=copy_aln (A, NULL); - M=copy_aln (A, NULL); - R=copy_aln (A, NULL); - - L=extract_aln ( L, 0, col1); - M=extract_aln ( M, col1, col2); - R=extract_aln ( R, col2, A->len_aln); - output_fasta_seq (seq_name, M); - - sprintf ( script, "%s", (pg==NULL)?"t_coffee":pg); - - sprintf ( command, "%s -infile=%s -outfile=%s %s", script,seq_name, aln_name, TO_NULL_DEVICE); - my_system ( command); - free_aln (M); - M=main_read_aln (aln_name, NULL); - - - M=reorder_aln (M, L->name,L->nseq); - L=aln_cat (L, M); - L=aln_cat (L, R); - A=copy_aln (L, A); - free_aln (L);free_aln (M); free_aln (R); - - return A; -} - - - - - -/******************************************************************/ -/* DNA */ -/* */ -/* */ -/******************************************************************/ - - -/******************************************************************/ -/* STRUCTURES */ -/* */ -/* */ -/******************************************************************/ - - - - - -Constraint_list * align_pdb_pair_2 (char *seq, Constraint_list *CL) - { - char *tmp_name=NULL; - int s1, s2; - - - static char *command; - static char *program; - - tmp_name=vtmpnam ( NULL); - - if ( !program)program=vcalloc ( LONG_STRING, sizeof (char)); - if ( !command)command=vcalloc ( LONG_STRING, sizeof (char)); - -#ifndef ALIGN_PDB_4_TCOFFEE - if ( getenv ( "ALIGN_PDB_4_TCOFFEE")==NULL)crash ("ALIGN_PDB_4_TCOFFEE IS NOT DEFINED"); - else sprintf ( program, "%s", (getenv ( "ALIGN_PDB_4_TCOFFEE"))); -#else - if ( getenv ( "ALIGN_4_TCOFFEE")==NULL)sprintf (program, "%s", ALIGN_PDB_4_TCOFFEE); - else sprintf ( program, "%s", (getenv ( "ALIGN_PDB_4_TCOFFEE"))); -#endif - - atoi(strtok (seq,SEPARATORS)); - s1=atoi(strtok (NULL,SEPARATORS)); - s2=atoi(strtok (NULL,SEPARATORS)); - - sprintf ( command , "%s -in P%s P%s -gapopen=-40 -max_delta=2.5 -gapext=0 -scale=0 -hasch_mode=hasch_ca_trace_bubble -maximum_distance=10 -output pdb_constraint_list -outfile stdout> %s%s",program, (CL->S)->file[s1], (CL->S)->file[s2], get_cache_dir(),tmp_name); - - my_system ( command); - CL=read_constraint_list_file(CL, tmp_name); - - - vremove ( tmp_name); - - - return CL; - } - -Constraint_list *align_pdb_pair (char *seq_in, char *dp_mode,char *evaluate_mode, char *file, Constraint_list *CL, Job_TC *job) - { - int s1, s2; - char seq[1000]; - char name1[1000]; - char name2[1000]; - - - Constraint_list *PWCL; - Alignment *F; - - sprintf ( seq, "%s",seq_in); - atoi(strtok (seq,SEPARATORS)); - s1=atoi(strtok (NULL,SEPARATORS)); - s2=atoi(strtok (NULL,SEPARATORS)); - - - - sprintf (name1, "%s%s_%s.%s.align_pdb", get_cache_dir(),(CL->S)->name[s1], (CL->S)->name[s2], dp_mode); - sprintf (name2, "%s%s_%s.%s.align_pdb", get_cache_dir(),(CL->S)->name[s2], (CL->S)->name[s1], dp_mode); - - - if ( check_file_exists (name1) && is_lib(name1))CL=read_constraint_list_file(CL,name1); - else if ( check_file_exists (name2) && is_lib(name2))CL=read_constraint_list_file(CL,name2); - else - { - PWCL=set_constraint_list4align_pdb ( CL,s1,dp_mode, evaluate_mode, NULL); - PWCL=set_constraint_list4align_pdb ( CL,s2,dp_mode, evaluate_mode, NULL); - ((job->param)->TCM)->PW_CL=PWCL; - F=fast_pair (job); - output_constraints (name1, "100", F); - CL=aln2constraint_list (F, CL, "100"); - free_aln (F); - } - return CL; - } - -Constraint_list * profile_pair (TC_method *M , char *in_seq, Constraint_list *CL) - { - - char seq[1000]; - int a, s1, s2; - char *result,*prf1_file, *prf2_file; - Alignment *F=NULL, *A1, *A2; - FILE *fp; - char command[10000]; - char *param; - - if ( M->executable2[0]=='\0') - fprintf ( stderr, "\nERROR: profile_pair requires a method: thread_pair@EP@executable2@ [FATAL:%s]\n", PROGRAM); - - - sprintf ( seq, "%s", in_seq); - atoi(strtok (seq,SEPARATORS)); - s1=atoi(strtok (NULL,SEPARATORS)); - s2=atoi(strtok (NULL,SEPARATORS)); - - A1=seq2R_template_profile(CL->S,s1); - A2=seq2R_template_profile(CL->S,s2); - - - prf1_file=vtmpnam (NULL); - fp=vfopen (prf1_file, "w"); - if ( A1) - { - fprintf (fp, ">%s\n%s\n",(CL->S)->name[s1], aln2cons_seq_mat(A1, "blosum62mt")); - for ( a=0; a< A1->nseq; a++)fprintf (fp, ">prf_seq1_%d\n%s\n", a, A1->seq_al[a]); - } - else - { - fprintf ( fp, ">%s\n%s\n", (CL->S)->name[s1], (CL->S)->seq[s1]); - } - vfclose (fp); - - prf2_file=vtmpnam (NULL); - fp=vfopen (prf2_file, "w"); - if (A2) - { - fprintf (fp, ">%s\n%s\n",(CL->S)->name[s2], aln2cons_seq_mat(A2, "blosum62mt")); - for ( a=0; a< A2->nseq; a++)fprintf (fp, ">prf_seq2_%d\n%s\n", a, A2->seq_al[a]); - } - else - { - fprintf ( fp, ">%s\n%s\n", (CL->S)->name[s2], (CL->S)->seq[s2]); - } - vfclose (fp); - - result=vtmpnam (NULL); - if ( M->param) - { - param=vcalloc(strlen (M->param)+1, sizeof (char)); - sprintf ( param, "%s", M->param); - param=substitute ( param, " ", ""); - param=substitute ( param, "\n", ""); - } - - sprintf ( command, "tc_generic_method.pl -mode=profile_pair -method=%s %s%s %s%s %s%s -param=%s -tmpdir=%s", M->executable2,M->in_flag,prf1_file, M->in_flag2,prf2_file,M->out_flag, result, param, get_tmp_4_tcoffee()); - - my_system ( command); - - - - if ( !check_file_exists (result)) - { - fprintf ( stderr, "\n\tprofile_pair/%s failed:\n\t%s\n",M->executable2, command); - myexit (EXIT_FAILURE); - } - else if ( is_lib (result)) - { - CL=read_constraint_list_file(CL,result); - } - else if ( is_aln (result)) - { - F=main_read_aln (result, NULL); - char *name1, *name2; - name1=(CL->S)->name[s1]; - name2=(CL->S)->name[s2]; - - fp=vfopen (result, "w"); - for ( a=0; a< F->nseq; a++) - if (strm ( F->name[a], name1) || strm (F->name[a], name2)) - fprintf ( fp, ">%s\n%s\n", F->name[a], F->seq_al[a]); - vfclose (fp); - free_aln (F); - F=main_read_aln (result, NULL); - CL=aln2constraint_list (F, CL, "sim"); - free_aln (F); - } - return CL; - } -Constraint_list * pdbid_pair (TC_method *M , char *in_seq, Constraint_list *CL) - { - - char seq[1000]; - char *alternative_method; - char *current_,method; - - int s1, s2; - char *result, *pdb1,*pdb1_file, *pdb2, *pdb2_file; - Alignment *F=NULL; - char *command; - - - if ( M->executable2[0]=='\0') - { - fprintf ( stderr, "\nERROR: pdbid_pair requires a structural alignment method: pdb_pair@EP@EXECUTABLE2@ [FATAL:%s]\n", PROGRAM); - myexit (EXIT_FAILURE); - } - sprintf ( seq, "%s", in_seq); - - - atoi(strtok (seq,SEPARATORS)); - s1=atoi(strtok (NULL,SEPARATORS)); - s2=atoi(strtok (NULL,SEPARATORS)); - - pdb1=seq2P_pdb_id(CL->S,s1); - pdb2=seq2P_pdb_id(CL->S,s2); - - if (!is_pdb_name (pdb1) || !is_pdb_name(pdb2)) - { - return CL; - } - - - result=vtmpnam (NULL); - command = vcalloc ( 1000, sizeof (char)); - - sprintf ( command, "tc_generic_method.pl -mode=pdbid_pair -method=%s %s%s %s%s %s%s -email=%s -cache=%s -tmpdir=%s", M->executable2,M->in_flag,pdb1, M->in_flag2,pdb2,M->out_flag, result, Email(ENV,SET),get_cache_dir(), get_tmp_4_tcoffee()); - my_system ( command); - vfree (command); - if (file_is_empty (result))return CL; - else - { - F=main_read_aln (result, NULL); - - if ( !F) - { - fprintf ( stderr, "\n\tpdb_pair/%s failed:\n\t%s\n",M->executable2, command); - } - else - { - - sprintf ( F->name[0],"%s", (CL->S)->name[s1]); - sprintf ( F->name[1],"%s", (CL->S)->name[s2]); - F=fix_aln_seq (F, CL->S); - CL=aln2constraint_list (F, CL, "sim"); - } - free_aln (F); - } - return CL; - } - -Constraint_list * pdb_pair (TC_method *M , char *in_seq, Constraint_list *CL) - { - - char seq[1000]; - int s1, s2; - char *result, *pdb1,*pdb1_file, *pdb2, *pdb2_file; - Alignment *F=NULL; - - - char command[10000]; - - if ( M->executable2[0]=='\0') - { - fprintf ( stderr, "\nERROR: pdb_pair requires a structural alignment method: pdb_pair@EP@EXECUTABLE2@ [FATAL:%s]\n", PROGRAM); - myexit (EXIT_FAILURE); - } - - sprintf ( seq, "%s", in_seq); - - - atoi(strtok (seq,SEPARATORS)); - s1=atoi(strtok (NULL,SEPARATORS)); - s2=atoi(strtok (NULL,SEPARATORS)); - - pdb1=seq2P_template_file(CL->S,s1); - pdb2=seq2P_template_file(CL->S,s2); - if ( !pdb1 || !pdb2) return CL; - - - pdb1_file=vtmpnam (NULL); - pdb2_file=vtmpnam (NULL); - - - - sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb1, pdb1_file); - my_system (command); - - - - sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb2, pdb2_file); - my_system (command); - - - result=vtmpnam (NULL); - - sprintf ( command, "tc_generic_method.pl -mode=pdb_pair -method=%s %s%s %s%s %s%s -tmpdir=%s", M->executable2,M->in_flag,pdb1_file, M->in_flag2,pdb2_file,M->out_flag, result, get_tmp_4_tcoffee()); - my_system ( command); - - F=main_read_aln (result, NULL); - - if ( !F) - { - fprintf ( stderr, "\n\tpdb_pair/%s failed:\n\t%s\n",M->executable2, command); - } - else - { - - sprintf ( F->name[0],"%s", (CL->S)->name[s1]); - sprintf ( F->name[1],"%s", (CL->S)->name[s2]); - F=fix_aln_seq (F, CL->S); - CL=aln2constraint_list (F, CL, "sim"); - } - - - free_aln (F); - return CL; - } - -Constraint_list * seq_msa (TC_method *M , char *in_seq, Constraint_list *CL) -{ - char seq[1000]; - char *infile, *outfile; - int a, n, s; - Alignment *F=NULL; - FILE *fp; - char command[1000]; - - - infile=vtmpnam (NULL); - outfile=vtmpnam (NULL); - - sprintf ( seq, "%s", in_seq); - - n=atoi(strtok (seq,SEPARATORS)); - - fp=vfopen (infile, "w"); - for ( a=0; a%s\n%s\n", (CL->S)->name[s], (CL->S)->seq[s]); - } - vfclose (fp); - - sprintf ( command, "t_coffee -other_pg tc_generic_method.pl -mode=seq_msa -method=%s %s%s %s%s -tmpdir=%s", M->executable2, M->in_flag, infile, M->out_flag, outfile, get_tmp_4_tcoffee()); - my_system (command); - - - if ( strm (M->out_mode, "aln") || strm (M->out_mode, "A")) - { - F=main_read_aln (outfile, NULL); - if ( !F) - { - fprintf ( stderr, "\n\tseq_msa/%s failed:\n\t%s\n", M->executable2,command); - } - else - { - CL=aln2constraint_list (F, CL, "sim"); - } - free_aln (F); - } - else if ( strm (M->out_mode, "fL")|| strm (M->out_mode, "lib")) - { - Constraint_list *NCL; - NCL=read_constraint_list_file(CL,outfile); - if ( !NCL) - { - fprintf ( stderr, "\n\tseq_msa/%s failed:\n\t%s\n", M->executable2,command); - } - else - { - CL=NCL; - } - } - return CL; -} -Constraint_list * thread_pair (TC_method *M , char *in_seq, Constraint_list *CL) - { - - char seq[1000]; - int s1, s2; - - if ( M->executable2[0]=='\0') - { - fprintf ( stderr, "\nERROR: thread_pair requires a threading method: pdb_pair@EP@EXECUTABLE2@ [FATAL:%s]\n", PROGRAM); - myexit (EXIT_FAILURE); - } - - sprintf ( seq, "%s", in_seq); - atoi(strtok (seq,SEPARATORS)); - s1=atoi(strtok (NULL,SEPARATORS)); - s2=atoi(strtok (NULL,SEPARATORS)); - - CL=thread_pair2(M,s1, s2, CL); - CL=thread_pair2(M,s2, s1, CL); - - return CL; - } - - - -Constraint_list* thread_pair2 ( TC_method *M, int s1, int s2, Constraint_list *CL) - { - char *result, *pep, *pdb, *pdb1; - Alignment *F=NULL; - Sequence *STL; - FILE *fp; - char command[10000]; - - STL=(CL)?CL->STRUC_LIST:NULL; - - - if ( !(CL->S) || !((CL->S)->T[s1]) || !((CL->S)->T[s1])->P || !seq2P_template_file(CL->S,s1))return CL; - else pdb1=seq2P_template_file(CL->S,s1); - - - pdb=vtmpnam (NULL); - result=vtmpnam (NULL); - pep=vtmpnam (NULL); - - sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb1, pdb); - my_system (command); - - - - fp=vfopen (pep, "w"); - fprintf ( fp, ">%s\n%s\n",(CL->S)->name[s2],(CL->S)->seq[s2] ); - vfclose (fp); - sprintf ( command, "tc_generic_method.pl -mode=thread_pair -method=%s %s%s %s%s %s%s -tmpdir=%s", M->executable2,M->in_flag,pep, M->in_flag2,pdb,M->out_flag, result, get_tmp_4_tcoffee()); - my_system ( command); - F=main_read_aln (result, NULL); - - if ( !F) - { - fprintf ( stderr, "\n\tthread_pair/%s failed:\n\t%s\n", M->executable2,command); - } - else - { - sprintf ( F->name[0],"%s", (CL->S)->name[s1]); - sprintf ( F->name[1],"%s", (CL->S)->name[s2]); - F=fix_aln_seq (F, CL->S); - CL=aln2constraint_list (F, CL, "sim"); - } - - - free_aln (F); - return CL; - } - -Constraint_list * lsqman_pair ( char *in_seq, Constraint_list *CL) - { - FILE *fp; - static CLIST_TYPE *entry; - char command[STRING]; - char seq[1000]; - int s1, s2; - char *seq_file, *lsqman_result, *tmp_name1; - Alignment *F=NULL; - int n_failure=0; - - sprintf ( seq, "%s", in_seq); - - - if ( !entry)entry=vcalloc ( LIST_N_FIELDS, sizeof ( CLIST_TYPE )); - - atoi(strtok (seq,SEPARATORS)); - s1=atoi(strtok (NULL,SEPARATORS)); - s2=atoi(strtok (NULL,SEPARATORS)); - - - tmp_name1=vcalloc (100, sizeof (char)); - sprintf ( tmp_name1, "%s_%s.lsqman_aln", (CL->S)->name[s1], (CL->S)->name[s2]); - if ( check_file_exists ( tmp_name1) && (F=main_read_aln(tmp_name1, NULL))!=NULL) - { - free_aln(F); - lsqman_result=tmp_name1; - } - - else - { - seq_file=vtmpnam (NULL); - lsqman_result=tmp_name1; - fp=vfopen (seq_file, "w"); - fprintf ( fp, ">%s\n%s\n",(CL->S)->name[s1],(CL->S)->seq[s2] ); - vfclose (fp); - sprintf ( command, "%s -pdb %s -pep %s > %s%s", LSQMAN_4_TCOFFEE, (CL->S)->name[s1], seq_file,get_cache_dir(), lsqman_result); - - while (!F) - { - my_system ( command); - F=main_read_aln (lsqman_result, NULL); - if ( !F) - { - fprintf ( stderr, "\n\tlsqman failed: will be retried"); - if ( n_failure==0)fprintf ( stderr, "\n\t%s", command); - n_failure++; - if ( n_failure==10) - { - fprintf ( stderr, "\nCould not run Fugue: will replace it with slow_pair\n"); - vremove (lsqman_result); - return NULL; - } - } - free_aln (F); - } - vremove( seq_file); - - } - - - F=main_read_aln(lsqman_result, NULL); - /*sprintf ( F->name[0],"%s", (CL->S)->name[s1]); - sprintf ( F->name[1],"%s", (CL->S)->name[s2]); - */ - F=fix_aln_seq (F, CL->S); - - - - - - CL=aln2constraint_list (F, CL, "100"); - free_aln (F); - return CL; - } - -Constraint_list * sap_pair (char *seq, char *weight, Constraint_list *CL) - { - register int a; - FILE *fp; - char full_name[FILENAMELEN]; - char *tmp_pdb1, *tmp_pdb2; - char *sap_seq1, *sap_seq2; - char *sap_lib, *tmp_name, *tmp_name1, *tmp_name2; - char *buf=NULL; - int s1, s2, r1=0, r2=0; - int sim=0, tot=0, score=0; - - char program[STRING]; - char *string1, *string2, *string3, *string4, *string5; - int max_struc_len=10000; - char *template1, *template2; - - /*check_program_is_installed ( "sap" ,SAP_4_TCOFFEE, "SAP_4_TCOFFEE",MAIL, IS_FATAL);*/ - - - - - atoi(strtok (seq,SEPARATORS)); - s1=atoi(strtok (NULL,SEPARATORS)); - s2=atoi(strtok (NULL,SEPARATORS)); - - template1=seq2T_value(CL->S,s1, "template_name", "_P_"); - template2=seq2T_value(CL->S,s2, "template_name", "_P_"); - - - if (!template1 || !template2) return CL; - - - declare_name (string1); - declare_name (string2); - declare_name (string3); - declare_name (string4); - declare_name (string5); - -#ifndef SAP_4_TCOFFEE - if ( getenv ( "SAP_4_TCOFFEE")==NULL)crash ("SAP_4_TCOFFEE IS NOT DEFINED"); - else sprintf ( program, "%s", (getenv ( "SAP_4_TCOFFEE"))); -#else - if ( getenv ( "SAP_4_TCOFFEE")==NULL)sprintf (program, "%s", SAP_4_TCOFFEE); - else sprintf ( program, "%s", (getenv ( "SAP_4_TCOFFEE"))); -#endif - - - - tmp_name1=vcalloc (100, sizeof (char)); - sprintf ( tmp_name1, "%s_%s.sap_results",template1,template2); - tmp_name2=vcalloc (100, sizeof (char)); - sprintf ( tmp_name2, "%s_%s.sap_results",template2,template1); - - - - if (is_sap_file (tmp_name1)) - { - tmp_name=tmp_name1; - } - else if ( is_sap_file (tmp_name2)) - { - tmp_name=tmp_name2; - SWAP (s1, s2); - } - else - { - tmp_name=tmp_name1; - - tmp_pdb1=normalize_pdb_file(seq2P_template_file(CL->S,s1),(CL->S)->seq[s1], vtmpnam (NULL)); - tmp_pdb2=normalize_pdb_file(seq2P_template_file(CL->S,s2),(CL->S)->seq[s2], vtmpnam (NULL)); - sprintf ( full_name, "%s%s", get_cache_dir (), tmp_name); - printf_system ("%s %s %s >%s",program,tmp_pdb1,tmp_pdb2, full_name); - - if ( !check_file_exists (full_name) || !is_sap_file(full_name)) - { - add_warning ( stderr, "WARNING: SAP failed to align: %s against %s [%s:WARNING]\n", seq2P_template_file(CL->S,s1),seq2P_template_file(CL->S,s2), PROGRAM); - if ( check_file_exists (full_name))add2file2remove_list (full_name); - return CL; - } - if ( flag_file2remove_is_on())add2file2remove_list (full_name); - remove ("super.pdb"); - } - - - - sap_seq1=vcalloc (max_struc_len, sizeof (char)); - sap_seq2=vcalloc (max_struc_len, sizeof (char)); - - - fp=find_token_in_file ( tmp_name, NULL, "Percent"); - fp=find_token_in_file ( tmp_name, fp , "Percent"); - while ( (fgetc (fp))!='\n'); - while ((buf=vfgets (buf, fp))) - { - - if ( !strstr (buf, "eighted")) - { - remove_charset (buf, "!alnum"); - r1=buf[0]; - r2=buf[strlen(buf)-1]; - } - - sim+=(r1==r2)?1:0; - if ( tot>max_struc_len) - {max_struc_len+=max_struc_len; - sap_seq1=vrealloc ( sap_seq1, sizeof(char)*max_struc_len); - sap_seq2=vrealloc ( sap_seq2, sizeof(char)*max_struc_len); - } - - sap_seq1[tot]=r1; - sap_seq2[tot]=r2; - tot++; - } - sim=(sim*100)/tot; - - if ( is_number (weight))score=atoi(weight); - else if ( strstr ( weight, "OW")) - { - int ow; - sscanf ( weight, "OW%d", &ow); - score=sim*ow; - } - else - { - score=sim; - } - - - vfclose (fp); - sap_seq1[tot]=sap_seq2[tot]='\0'; - - - fp=vfopen ( sap_lib=vtmpnam(NULL), "w"); - fprintf (fp, "! TC_LIB_FORMAT_01\n"); - fprintf (fp, "2\n"); - fprintf (fp, "%s %d %s\n", (CL->S)->name[s2],(int)strlen (sap_seq1), sap_seq1); - fprintf (fp, "%s %d %s\n", (CL->S)->name[s1],(int)strlen (sap_seq2), sap_seq2); - fprintf (fp, "#1 2\n"); - - for ( a=0; a< tot; a++) - { - - fprintf (fp, "%d %d %d 1 0\n", a+1, a+1, score); - - } - fprintf (fp, "! CPU 0\n"); - fprintf (fp, "! SEQ_1_TO_N\n"); - vfclose (fp); - CL=read_constraint_list_file(CL,sap_lib); - vremove (sap_lib); - - vfree ( string1);vfree ( string2);vfree ( string3);vfree ( string4);vfree ( string5); - vfree (sap_seq1); vfree(sap_seq2);vfree (tmp_name1); vfree(tmp_name2); - vfree (buf); - return CL; - } - - - -Constraint_list *rna_pair (TC_method *M , - char *in_seq, - Constraint_list *CL) -{ - char seq[1000]; - int s1, s2; - char *result, *pdb1, *pdb2, *pdb1_file, *pdb2_file; - Alignment *F=NULL; - - - - char command[10000]; - - if ( M->executable2[0]=='\0') - { - fprintf ( stderr, "\nERROR: rna_pair requires a structural alignment method: pdb_pair@EP@EXECUTABLE2@ [FATAL:%s]\n", PROGRAM); - myexit (EXIT_FAILURE); - } - - sprintf ( seq, "%s", in_seq); - - - atoi(strtok (seq,SEPARATORS)); - s1=atoi(strtok (NULL,SEPARATORS)); - s2=atoi(strtok (NULL,SEPARATORS)); - - pdb1=seq2P_template_file(CL->S,s1); - pdb1_file=vtmpnam (NULL); - sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb1, pdb1_file); - my_system (command); - // - pdb2=seq2P_template_file(CL->S,s2); - pdb2_file=vtmpnam (NULL); - sprintf ( command, "extract_from_pdb -infile %s -atom ALL -chain FIRST -nodiagnostic > %s", pdb2, pdb2_file); - my_system (command); - - result=vtmpnam (NULL); - - sprintf ( command, "tc_generic_method.pl -mode=rna_pair -method=%s %s%s %s%s %s%s -tmpdir=%s", M->executable2,M->in_flag,pdb1_file, M->in_flag2,pdb2_file,M->out_flag, result, get_tmp_4_tcoffee()); - - my_system ( command); - - F=main_read_aln (result, NULL); - - - if ( !F) - { - fprintf ( stderr, "\n\trna_pair/%s failed:\n\t%s\n",M->executable2, command); - } - else - { - sprintf ( F->name[0],"%s", (CL->S)->name[s1]); - sprintf ( F->name[1],"%s", (CL->S)->name[s2]); - F=fix_aln_seq (F, CL->S); - CL=aln2constraint_list (F, CL, "sim"); - } - - - free_aln (F); - return CL; -} - - -/******************************************************************/ -/* GENERIC PAIRWISE METHODS */ -/* */ -/* */ -/******************************************************************/ - - - -Constraint_list * best_pair4rna(Job_TC *job) -{ - int n,a; - - - static char *seq; - Alignment *A; - Constraint_list *PW_CL; - Constraint_list *CL, *RCL; - char *seq_in; - Sequence *S; - TC_method *M, *sara_pairM, *proba_pairM; - int*seqlist; - - int id, s1, s2; - Template *T1, *T2; - int ml=0; - struct X_template *r1, *r2, *p1, *p2; - static int **blosum; - - if (!seq)seq=vcalloc (100, sizeof (char)); - - A=(job->io)->A; - M=(job->param)->TCM; - CL=(job->io)->CL; - S=CL->S; - for (a=0; anseq; a++)ml=MAX(ml, strlen (S->name[a])); - - - if ( !strm ( retrieve_seq_type(), "RNA") ) - printf_exit (EXIT_FAILURE, stderr, "ERROR: RNA Sequences Only with best4rna_pair [FATAL:%s]\n",PROGRAM); - - - seq_in=(job->param)->seq_c; - sprintf (seq, "%s", seq_in); - seqlist=string2num_list (seq); - n=seqlist[1]; - if ( n!=2){fprintf ( stderr, "\nERROR: best_pair can only handle two seq at a time [FATAL]\n");myexit (EXIT_FAILURE);} - s1=seqlist[2]; - s2=seqlist[3]; - - T1=S->T[s1]; - T2=S->T[s2]; - r1=T1->R; - r2=T2->R; - p1=T1->P; - p2=T2->P; - - PW_CL=((job->param)->TCM)->PW_CL; - CL=(job->io)->CL; - - if (!blosum)blosum=read_matrice ("blosum62mt"); - -// id=idscore_pairseq (S->seq[s1], S->seq[s2],-10,-1,blosum, "sim"); - - - proba_pairM=method_file2TC_method(method_name2method_file ("proba_pair")); - proba_pairM->PW_CL=method2pw_cl(proba_pairM, CL); - - sara_pairM=method_file2TC_method(method_name2method_file ("sara_pair")); - sara_pairM->PW_CL=method2pw_cl(sara_pairM, CL); - - if ( p1 && p2) - { - //Avoid Structural Tem - T1->R=NULL; - T2->R=NULL; - fprintf ( stderr, "\n\t%-*s %-*s: Structure Based Alignment\n", ml,S->name[s1], ml,S->name[s2]); - (job->param)->TCM=sara_pairM; - } - else - { - fprintf ( stderr, "\n\t%-*s %-*s: Direct Sequence Alignment\n", ml,S->name[s1], ml,S->name[s2]); - (job->param)->TCM=proba_pairM; - } - - RCL=seq2list (job); - T1->R=r1; - T2->R=r2; - - return RCL; -} - - - - - - - - - - - - - - - -Constraint_list * best_pair4prot (Job_TC *job) -{ - int n,a; - - - static char *seq; - Alignment *A; - Constraint_list *PW_CL; - Constraint_list *CL, *RCL; - char *seq_in; - Sequence *S; - TC_method *M, *sap_pairM, *proba_pairM; - int*seqlist; - - int id, s1, s2; - Template *T1, *T2; - int ml=0; - struct X_template *r1, *r2, *p1, *p2; - static int **blosum; - - if (!seq)seq=vcalloc (100, sizeof (char)); - - A=(job->io)->A; - M=(job->param)->TCM; - CL=(job->io)->CL; - S=CL->S; - for (a=0; anseq; a++)ml=MAX(ml, strlen (S->name[a])); - - - if ( strm ( retrieve_seq_type(), "DNA") ||strm ( retrieve_seq_type(), "RNA") )printf_exit (EXIT_FAILURE, stderr, "ERROR: Protein Sequences Only with bestprot_pair [FATAL:%s]\n",PROGRAM); - - - seq_in=(job->param)->seq_c; - sprintf (seq, "%s", seq_in); - seqlist=string2num_list (seq); - n=seqlist[1]; - if ( n!=2){fprintf ( stderr, "\nERROR: best_pair can only handle two seq at a time [FATAL]\n");myexit (EXIT_FAILURE);} - s1=seqlist[2]; - s2=seqlist[3]; - - T1=S->T[s1]; - T2=S->T[s2]; - r1=T1->R; - r2=T2->R; - p1=T1->P; - p2=T2->P; - - PW_CL=((job->param)->TCM)->PW_CL; - CL=(job->io)->CL; - - if (!blosum)blosum=read_matrice ("blosum62mt"); - - id=idscore_pairseq (S->seq[s1], S->seq[s2],-10,-1,blosum, "sim"); - - - proba_pairM=method_file2TC_method(method_name2method_file ("proba_pair")); - proba_pairM->PW_CL=method2pw_cl(proba_pairM, CL); - - sap_pairM=method_file2TC_method(method_name2method_file ("sap_pair")); - sap_pairM->PW_CL=method2pw_cl(sap_pairM, CL); - - if ( id>80) - { - //Hide The Template - T1->R=NULL; - T2->R=NULL; - fprintf ( stderr, "\n\t%-*s %-*s: Direct Sequence Alignment\n", ml,S->name[s1], ml,S->name[s2]); - (job->param)->TCM=proba_pairM; - } - else if ( p1 && p2) - { - //Avoid Structural Tem - T1->R=NULL; - T2->R=NULL; - fprintf ( stderr, "\n\t%-*s %-*s: Structure Based Alignment\n", ml,S->name[s1], ml,S->name[s2]); - (job->param)->TCM=sap_pairM; - } - else if ( r1 || r2) - { - fprintf ( stderr, "\n\tt%-*s %-*s: PSIBLAST Profile Alignment\n", ml,S->name[s1], ml,S->name[s2]); - (job->param)->TCM=proba_pairM; - } - else - { - fprintf ( stderr, "\n\t%-*s %-*s: Direct Sequence Alignment (No Profile)\n", ml,S->name[s1], ml,S->name[s2]); - (job->param)->TCM=proba_pairM; - } - - RCL=seq2list (job); - T1->R=r1; - T2->R=r2; - - return RCL; -} - - -Alignment * fast_pair (Job_TC *job) - { - int s, n,a; - static int **l_s; - static int *ns; - char seq[1000]; - Alignment *A; - Constraint_list *PW_CL; - Constraint_list *CL; - char *seq_in; - Sequence *S; - TC_method *M; - int*seqlist; - char **buf; - - A=(job->io)->A; - - M=(job->param)->TCM; - PW_CL=((job->param)->TCM)->PW_CL; - CL=(job->io)->CL; - seq_in=(job->param)->seq_c; - - - sprintf (seq, "%s", seq_in); - seqlist=string2num_list (seq); - n=seqlist[1]; - if ( n!=2){fprintf ( stderr, "\nERROR: fast_pw_aln can only handle two seq at a time [FATAL]\n");myexit (EXIT_FAILURE);} - - S=(CL)->S; - - if (!A) {A=declare_aln (CL->S);} - if ( !ns) - { - ns=vcalloc ( 2, sizeof (int)); - l_s=declare_int (2,(CL->S)->nseq); - } - buf=vcalloc ( S->nseq, sizeof (char*)); - - for ( a=0; a< n; a++) - { - s=seqlist[a+2]; - if ( strm (M->seq_type, "G")) - { - buf[s]=S->seq[s]; - S->seq[s]=((((S->T[s])->G)->VG)->S)->seq[0]; - } - else - buf[s]=S->seq[s]; - - - sprintf ( A->seq_al[a], "%s",S->seq[s]); - sprintf ( A->name[a], "%s", (CL->S)->name[s]); - A->order[a][0]=s; - } - - A->S=CL->S; - PW_CL->S=CL->S; - A->CL=CL; - ns[0]=ns[1]=1; - l_s[0][0]=0; - l_s[1][0]=1; - - - if (PW_CL->reverse_seq) - { - invert_string2(A->seq_al[0]); - invert_string2(A->seq_al[1]); - invert_string2 ((CL->S)->seq[A->order[0][0]]); - invert_string2 ((CL->S)->seq[A->order[1][0]]); - - } - - pair_wise ( A, ns, l_s, PW_CL); - - if (PW_CL->reverse_seq) - { - - invert_string2(A->seq_al[0]); - invert_string2(A->seq_al[1]); - invert_string2 ((CL->S)->seq[A->order[0][0]]); - invert_string2 ((CL->S)->seq[A->order[1][0]]); - } - A->nseq=n; - - for ( a=0; anseq; a++) - { - if ( !buf[a] || buf[a]==S->seq[a]); - else S->seq[a]=buf[a]; - } - vfree (buf);vfree (seqlist); - return A; - - } -Alignment * align_two_aln ( Alignment *A1, Alignment *A2, char *in_matrix, int gop, int gep, char *in_align_mode) - { - Alignment *A=NULL; - Constraint_list *CL; - Sequence *S; - int a; - int *ns; - int **ls; - static char *matrix; - static char *align_mode; - - if (!matrix)matrix=vcalloc ( 100, sizeof (char)); - if (!align_mode)align_mode=vcalloc ( 100, sizeof (char)); - sprintf ( matrix, "%s", in_matrix); - sprintf ( align_mode, "%s", in_align_mode); - - CL=vcalloc ( 1, sizeof (Constraint_list)); - CL->pw_parameters_set=1; - CL->M=read_matrice (matrix); - CL->matrices_list=declare_char (10, 10); - - CL->evaluate_residue_pair=evaluate_matrix_score; - CL->get_dp_cost=consensus_get_dp_cost; - CL->normalise=1; - - CL->extend_jit=0; - CL->maximise=1; - CL->gop=gop; - CL->gep=gep; - CL->TG_MODE=2; - sprintf (CL->matrix_for_aa_group, "vasiliky"); - CL->use_fragments=0; - CL->ktup=5; - if ( !CL->use_fragments)CL->diagonal_threshold=0; - else CL->diagonal_threshold=6; - - sprintf (CL->dp_mode, "%s", align_mode); - - A=copy_aln (A1, A); - A=stack_aln (A, A2); - CL->S=fill_sequence_struc(A->nseq, A->seq_al,A->name); - - ns=vcalloc ( 2, sizeof(int)); - ls=declare_int ( 2,A->nseq); - ns[0]=A1->nseq; - ns[1]=A2->nseq; - for ( a=0; anseq; - - A->score_aln=pair_wise (A, ns, ls,CL); - - vfree (ns); - free_int (ls, -1); - S=free_constraint_list (CL); - free_sequence (S,-1); - A->S=NULL; - return A; - } - -static int align_two_seq_keep_case; -void toggle_case_in_align_two_sequences(int value) -{ - align_two_seq_keep_case=value; -} -Alignment * align_two_sequences ( char *seq1, char *seq2, char *in_matrix, int gop, int gep, char *in_align_mode) - { - static Alignment *A; - Constraint_list *CL; - Sequence *S; - - int *ns; - int **l_s; - - char **seq_array; - char **name_array; - static char *matrix; - static int **M; - - static char *align_mode; - - if (!matrix)matrix=vcalloc ( 100, sizeof (char)); - if (!align_mode)align_mode=vcalloc ( 100, sizeof (char)); - sprintf ( align_mode, "%s", in_align_mode); - - CL=vcalloc ( 1, sizeof (Constraint_list)); - - CL->pw_parameters_set=1; - - CL->matrices_list=declare_char (10, 10); - - - if ( !strm (matrix, in_matrix)) - { - sprintf ( matrix,"%s", in_matrix); - M=CL->M=read_matrice (matrix); - - } - else - { - CL->M=M; - } - - if (strstr (in_align_mode, "cdna")) - CL->evaluate_residue_pair=evaluate_cdna_matrix_score; - else - CL->evaluate_residue_pair=evaluate_matrix_score; - - CL->get_dp_cost=get_dp_cost; - CL->extend_jit=0; - CL->maximise=1; - CL->gop=gop; - CL->gep=gep; - CL->TG_MODE=2; - sprintf (CL->matrix_for_aa_group, "vasiliky"); - CL->use_fragments=0; - CL->ktup=3; - if ( !CL->use_fragments)CL->diagonal_threshold=0; - else CL->diagonal_threshold=6; - - sprintf (CL->dp_mode, "%s", align_mode); - - seq_array=declare_char ( 2, MAX(strlen(seq1), strlen (seq2))+1); - sprintf (seq_array[0], "%s",seq1); - sprintf (seq_array[1],"%s", seq2); - ungap_array(seq_array,2); - if (align_two_seq_keep_case !=KEEP_CASE)string_array_lower(seq_array,2); - - name_array=declare_char (2, STRING); - sprintf ( name_array[0], "A"); - sprintf ( name_array[1], "B"); - - - ns=vcalloc ( 2, sizeof(int)); - l_s=declare_int ( 2, 1); - ns[0]=ns[1]=1; - l_s[0][0]=0; - l_s[1][0]=1; - - - - CL->S=fill_sequence_struc(2, seq_array, name_array); - - A=seq2aln(CL->S, NULL, 1); - - ungap (A->seq_al[0]); - ungap (A->seq_al[1]); - - - - A->score_aln=pair_wise (A, ns, l_s,CL); - - vfree (ns); - free_int (l_s, -1); - free_char (name_array, -1);free_char ( seq_array,-1); - - CL->M=NULL; - S=free_constraint_list (CL); - free_sequence (S,-1); - A->S=NULL; - return A; - } - - -NT_node make_root_tree ( Alignment *A,Constraint_list *CL,int gop, int gep,Sequence *S, char *tree_file,int maximise) -{ - NT_node **T=NULL; - T=make_tree (A, CL, gop, gep,S,tree_file,maximise); - (T[3][0])->nseq=S->nseq; - return T[3][0]; -} -NT_node ** make_tree ( Alignment *A,Constraint_list *CL,int gop, int gep,Sequence *S, char *tree_file,int maximise) - { - int a, b, ra, rb; - NT_node **T=NULL; - int **distances; - int out_nseq; - char **out_seq_name; - char **out_seq; - - - if ( !CL || !CL->tree_mode || !CL->tree_mode[0]) - { - fprintf ( stderr, "\nERROR: No CL->tree_mode specified (make_tree::util_dp_drivers.c [FATAL:%s]", PROGRAM); - myexit (EXIT_FAILURE); - } - else - fprintf (CL->local_stderr , "\nMAKE GUIDE TREE \n\t[MODE=%s][",CL->tree_mode); - - - if ( A->nseq==2) - { - int tot_node; - char *tmp; - FILE *fp; - fprintf (CL->local_stderr, "---Two Sequences Only: Make Dummy Pair-Tree ---]"); - tmp=vtmpnam (NULL); - fp=vfopen (tmp,"w"); - fprintf ( fp, "(%s:0.1, %s:0.1):0.1;\n",S->name[0], S->name[1]); - vfclose (fp); - T=read_tree (tmp, &tot_node, (CL->S)->nseq,(CL->S)->name); - - return T; - } - else if ( strm (CL->tree_mode, "cwph")) - { - return seq2cw_tree ( S, tree_file); - } - else if (strm ( CL->tree_mode, "upgma") || strm ( CL->tree_mode, "nj")) - { - out_nseq=S->nseq; - out_seq_name=S->name; - out_seq=S->seq; - - CL->DM=cl2distance_matrix (CL, NOALN,NULL,NULL,0); - - if ( CL->S!=S) - { - /*Shrink the distance matrix so that it only contains the required sequences*/ - distances=declare_int (S->nseq, S->nseq); - for (a=0; a< S->nseq; a++) - { - ra=name_is_in_list ((S)->name[a],(CL->S)->name, (CL->S)->nseq, 100); - for ( b=0; b< S->nseq; b++) - { - rb=name_is_in_list ((S)->name[b],(CL->S)->name, (CL->S)->nseq, 100); - distances[a][b]=(CL->DM)->score_similarity_matrix[ra][rb]; - } - } - } - else - { - distances=duplicate_int ( (CL->DM)->score_similarity_matrix, -1, -1); - } - - - - distances=sim_array2dist_array (distances, MAXID*SCORE_K); - distances=normalize_array (distances, MAXID*SCORE_K, 100); - if ( strm (CL->tree_mode, "order")) - { - for ( a=0; a< S->nseq; a++) - for ( b=0; b< S->nseq; b++) - distances[b][a]=100; - T=make_nj_tree (A,distances,gop,gep,out_seq,out_seq_name,out_nseq, tree_file, CL->tree_mode); - } - else if ( strm (CL->tree_mode, "nj")) - { - T=make_nj_tree (A,distances,gop,gep,out_seq,out_seq_name,out_nseq, tree_file, CL->tree_mode); - } - else if ( strm (CL->tree_mode, "upgma")) - T=make_upgma_tree (A,distances,gop,gep,out_seq,out_seq_name,out_nseq, tree_file, CL->tree_mode); - else - { - printf_exit (EXIT_FAILURE, stderr, "ERROR: %s is an unknown tree computation mode [FATAL:%s]", CL->tree_mode, PROGRAM); - } - free_int (distances, out_nseq); - - } - - fprintf (CL->local_stderr , "DONE]\n"); - return T; - } - - -Alignment *recompute_local_aln (Alignment *A, Sequence *S,Constraint_list *CL, int scale, int gep) - { - int **coor; - int a; - Alignment *B; - - sort_constraint_list (CL, 0, CL->ne); - coor=declare_int (A->nseq, 3); - for ( a=0; a< A->nseq; a++) - { - coor[a][0]=A->order[a][0]; - coor[a][1]=A->order[a][1]+1; - coor[a][2]=strlen(S->seq[A->order[a][0]])-coor[a][1]; - } - B=stack_progressive_nol_aln_with_seq_coor(CL,0,0,S,coor,A->nseq); - A=copy_aln ( B, A); - - CL=compact_list (CL, 0,CL->ne, "shrink"); - free_Alignment(B); - return A; - } - - -Alignment *stack_progressive_nol_aln_with_seq_coor(Constraint_list *CL,int gop, int gep,Sequence *S, int **seq_coor, int nseq) - { - - static int ** local_coor1; - static int ** local_coor2; - if ( local_coor1!=NULL)free_int (local_coor1, -1); - if ( local_coor2!=NULL)free_int (local_coor2, -1); - - local_coor1=get_nol_seq ( CL,seq_coor, nseq, S); - local_coor2=minimise_repeat_coor ( local_coor1, nseq, S); - - return stack_progressive_aln_with_seq_coor(CL,gop, gep,S, local_coor2,nseq); - } - - -Alignment *stack_progressive_aln_with_seq_coor (Constraint_list*CL,int gop, int gep, Sequence *S, int **coor, int nseq) - { - Alignment *A=NULL; - - A=seq_coor2aln (S,NULL, coor, nseq); - - return stack_progressive_aln ( A,CL, gop, gep); - } - -Alignment *est_progressive_aln(Alignment *A, Constraint_list *CL, int gop, int gep) - { - int a,n; - int**group_list; - int *n_groups; - char *seq; - n_groups=vcalloc ( 2, sizeof (int)); - group_list=declare_int ( 2, A->nseq); - - n=A->nseq; - - n_groups[0]=1; - n_groups[1]=1; - group_list[0][0]=0; - group_list[0][1]=1; - - group_list[1][0]=1; - fprintf ( stderr, "\n"); - for ( a=1; aseq_al[1], "%s", A->seq_al[a]); - fprintf ( stderr, "\t[%30s]->[len=%5d]", A->name[a],(int)strlen ( A->seq_al[0])); - pair_wise ( A,n_groups, group_list, CL); - - seq=dna_aln2cons_seq(A); - - sprintf ( A->seq_al[0], "%s", seq); - vfree (seq); - fprintf ( stderr, "\n"); - } - - A->nseq=1; - return A; - } - -void analyse_seq ( Alignment *A, int s) - { - int a, b, c; - int r; - - int len=0; - - - int state=0; - int pstate=-1; - float score=0; - - for ( a=0; a< A->len_aln; a++) - { - for ( b=0, c=0; b< s; b++) - if ( !is_gap(A->seq_al[b][a])){c=1; break;} - - r=!is_gap(A->seq_al[s][a]); - - if ( r && c) state=1; - else if ( !r && !c) state=2; - else if ( !r && c) state=3; - else if ( r && !c) state=4; - - if ( state !=pstate) - { - score+=len*len; - len=0; - } - len+=r; - pstate=state; - } - score=score/(float)(((A->S)->len[s]*(A->S)->len[s])); - fprintf ( stderr, "[%.2f]", score); - - return; - } - -Alignment *realign_aln ( Alignment*A, Constraint_list *CL) -{ - int a, b, c; - int *ns, **ls; - A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq); - - ns=vcalloc (2, sizeof(int)); - ls=declare_int ( 2, A->nseq); - - for (a=0; a< A->nseq; a++) - { - ns[0]=A->nseq-1; - for (c=0,b=0; bnseq; b++)if (b!=a)ls[0][c++]=b; - ungap_sub_aln ( A, ns[0], ls[0]); - - ns[1]=1; - ls[1][0]=a; - ungap_sub_aln ( A, ns[1], ls[1]); - A->score_aln=pair_wise (A, ns, ls,CL); - } - - vfree (ns); free_int (ls, -1); - return A; -} - -Alignment *realign_aln_random_bipart ( Alignment*A, Constraint_list *CL) -{ - int *ns; - int **l_s; - - int a,g; - - ns=vcalloc (2, sizeof (int)); - l_s=declare_int (2,A->nseq); - - for ( a=0; a< A->nseq; a++) - { - g=rand()%2; - l_s[g][ns[g]++]=a; - } - - fprintf ( stderr, "\n"); - ungap_sub_aln ( A, ns[0], l_s[0]); - ungap_sub_aln ( A, ns[1], l_s[1]); - - /* //Display Groups - for (a=0;a<2; a++) - for (b=0; bscore_aln=pair_wise (A, ns, l_s,CL); - - vfree(ns);free_int(l_s, -1); - return A; -} -Alignment *realign_aln_random_bipart_n ( Alignment*A, Constraint_list *CL, int n) -{ - int *ns; - int **ls; - int *used; - - int a,b,c, p; - - if (n>=A->nseq)n=A->nseq/2; - used=vcalloc (A->nseq, sizeof (int)); - c=0; - while (cnseq; - if (!used[p]){used[p]=1;c++;} - } - ns=vcalloc (2, sizeof (int)); - ls=declare_int (2,A->nseq); - for (a=0; a<2; a++) - { - for (b=0; bnseq; b++) - if (used[b]==a)ls[a][ns[a]++]=b; - } - ungap_sub_aln ( A, ns[0], ls[0]); - ungap_sub_aln ( A, ns[1], ls[1]); - - - A->score_aln=pair_wise (A, ns, ls,CL); - vfree(ns);free_int(ls, -1);vfree (used); - return A; -} -int ** seq2ecl_mat (Constraint_list *CL); -int ** seq2ecl_mat (Constraint_list *CL) - -{ - int a, b, n; - - Alignment *A; - int *ns, **ls; - int **dm; - - ns=vcalloc (2, sizeof (int)); - ls=declare_int ((CL->S)->nseq, 2); - - A=seq2aln (CL->S,NULL, RM_GAP); - n=(CL->S)->nseq; - dm=declare_int (n, n); - for (a=0; a<(CL->S)->nseq-1; a++) - for (b=a+1; b<(CL->S)->nseq; b++) - { - ns[0]=ns[1]=1; - ls[0][0]=a; - ls[1][0]=b; - ungap (A->seq_al[a]); - ungap (A->seq_al[b]); - dm[a][b]=dm[b][a]=linked_pair_wise (A, ns, ls, CL); - } - - return dm; -} -Alignment *realign_aln_clust ( Alignment*A, Constraint_list *CL) -{ - int *ns; - int **ls; - - int a, b, c,n; - static int **rm, **dm, **target; - int score; - - - - if (!A) - { - free_int (dm, -1); free_int (rm, -1);free_int (target, -1); - dm=rm=target=NULL; - } - - - if (!rm)rm=seq2ecl_mat(CL); - if (!dm)dm=declare_int (A->nseq, A->nseq); - if (!target)target=declare_int (A->nseq*A->nseq, 3); - - ns=vcalloc (2, sizeof (int)); - ls=declare_int (2,A->nseq); - - - for (a=0; anseq-1; a++) - for (b=a+1; bnseq; b++) - { - ns[0]=2; - ls[0][0]=a; - ls[0][1]=b; - score=sub_aln2ecl_raw_score (A, CL, ns[0], ls[0]); - dm[a][b]=dm[b][a]=MAX(0,(rm[a][b]-score)); - } - for (n=0,a=0; anseq; a++) - { - for (b=a; bnseq; b++, n++) - { - - target[n][0]=a; - target[n][1]=b; - for ( c=0; cnseq; c++) - { - if (c!=a && c!=b)target[n][2]+=dm[a][c]+dm[b][c]; - } - } - } - sort_int_inv (target,3, 2, 0, n-1); - - for (a=0; anseq; a++) - { - if (target[a][0]==target[a][1]) - { - ns[0]=1; - ls[0][0]=target[a][0]; - } - else - { - ns[0]=2; - ls[0][0]=target[a][0]; ls[0][1]=target[a][1]; - } - - for (ns[1]=0,b=0; bnseq; b++) - { - if (b!=target[a][0] && b!=target[a][1])ls[1][ns[1]++]=b; - } - - ungap_sub_aln (A, ns[0], ls[0]); - ungap_sub_aln (A, ns[1], ls[1]); - - A->score_aln=pair_wise (A, ns, ls,CL); - fprintf ( stderr, "\nSEQ: %d %d SCORE=%d\n",target[a][0],target[a][1], aln2ecl_raw_score(A, CL)); - } - return A; -} - -int get_best_group ( int **used, Constraint_list *CL); -int seq_aln_thr1(Alignment *A, int **used, int threshold, Constraint_list *CL); -int seq_aln_thr2( Alignment*A, int **used, int threshold, int g, Constraint_list *CL); - -int get_best_group ( int **used, Constraint_list *CL) -{ - int a,b,c,d,n, tot,stot, best_tot, best_seq, nseq; - int ns[2]; - int *ls[2]; - - best_seq=0; - nseq=((CL->S)->nseq); - tot=best_tot=0; - for (a=0; aDM)->similarity_matrix[ls[0][c]][ls[1][d]]; - } - if (n>0)stot/=n; - tot+=stot; - } - if (tot>best_tot) - { - best_tot=tot; - best_seq=a; - } - } - return best_seq; -} - - -char ** list_file2dpa_list_file (char **list, int *len,int maxnseq, Sequence *S) -{ - char **nlist, **profile_list; - int nl, l, a, np, has_lib; - char *seq, *profile; - Alignment *A, *F; - - - nlist=declare_char (read_array_size_new ((void *)list), read_array_size_new ((void *)list[0])); - nl=0; - - profile_list=declare_char (read_array_size_new ((void *)list), read_array_size_new ((void *)list[0])); - np=0; - - l=len[0]; - for (a=0; anseq; a++)fprintf ( fp, ">%s %s\n%s\n", A->name[a],A->seq_comment[a], A->seq_al[a]); - vfclose (fp); - - sprintf (profile_list[np++], "%s", file); - A=A->A; - } - free_aln (F); - - for (has_lib=0,a=0; alocal_stderr, "\n##### DPA ##### Compute Fast Alignment"); - A=iterative_tree_aln (A,1, CL); - fprintf (CL->local_stderr, "\n##### DPA ##### Identify Nodes"); - P=make_root_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1); - set_node_score (A, P, "idmat_sim"); - fprintf (CL->local_stderr, "\n##### DPA ##### Split Nodes"); - list=split_nodes_nseq (A,P,N, list=vcalloc (P->nseq*200, sizeof (char))); - - list2=string2list (list); - fprintf (CL->local_stderr, "\n##### DPA ##### Save Nodes"); - - F=A; - for (a=1; aA=main_read_aln(list2[a], NULL); - A=A->A; - } - fprintf (CL->local_stderr, "\n##### DPA ##### Finished"); - vfree (list); free_char (list2, -1); - - A=F; - while (A) - { - A=A->A; - } - - return F; -} - - - - -Alignment * seq_aln ( Alignment*A, int n,Constraint_list *CL) -{ - - int **used, a, t,n1, nseq; - - - n1=nseq=(CL->S)->nseq; - used=declare_int (nseq, nseq+3); - - - for (a=0; a< nseq; a++) - { - used[a][1]=1; - used[a][2]=a; - } - - - for (t=50; t>=0 && nseq>1; t-=5) - { - nseq=seq_aln_thr1 (A, used,t, CL); - } - - vfree (used); - return A; -} - -int seq_aln_thrX(Alignment *A, int **used, int threshold, Constraint_list *CL) -{ - int n=0,a; - seq_aln_thr1(A,used,threshold,CL); - for ( a=0; a< (CL->S)->nseq; a++) - n+=(used[a][1]>0)?1:0; - - return n; -} -int seq_aln_thr1(Alignment *A, int **used, int threshold, Constraint_list *CL) -{ - int a,g, nseq, n_groups; - nseq=(CL->S)->nseq; - - g=get_best_group(used, CL); - - used[g][0]=1; - - - - while ( seq_aln_thr2 (A, used, threshold,g, CL)!=0) - { - g=get_best_group (used, CL); - used[g][0]=1; - } - - for (n_groups=0,a=0; a< nseq; a++) - if ( used[a][1]!=0) - { - n_groups++; - used[a][0]=0; - } - return n_groups; -} - - -int seq_aln_thr2( Alignment*A, int **used, int threshold, int g, Constraint_list *CL) -{ - int a, b,c,d; - int ns[2], *ls[2]; - int nseq, n_members; - double sim; - - n_members=0; - - nseq=((CL->S)->nseq); - used[g][0]=1; - ns[0]=used[g][1]; - ls[0]=used[g]+2; - - for ( a=0; a< nseq; a++) - { - if (used[a][0]!=0); - else - { - ns[1]=used[a][1]; - ls[1]=used[a]+2; - - ungap_sub_aln (A, ns[0], ls[0]); - ungap_sub_aln (A, ns[1], ls[1]); - - A->score_aln=pair_wise (A, ns, ls,CL); - - for (sim=0,b=0; bseq_al[ls[0][b]], A->seq_al[ls[1][c]], NULL,"idmat_sim2"); - } - } - sim/=(double)(ns[0]*ns[1]); - if (sim>=threshold) - { - - used[g][1]+=ns[1]; - for (d=0; d0)used[g][0]=-1; - return n_members; -} -/****************************************************************************/ -/* */ -/* */ -/* Alignment Methods */ -/* */ -/* */ -/****************************************************************************/ - -Alignment * tsp_aln (Alignment *A, Constraint_list *CL, Sequence *S) -{ - int a, b ; - int ** distances; - int *ns, **ls; - int **used; - - A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq); - ns=vcalloc (2, sizeof (int)); - ls=declare_int (2, (CL->S)->nseq); - used=declare_int ( A->nseq, 2); - - - CL->DM=cl2distance_matrix (CL, NOALN,NULL,NULL,0); - distances=declare_int (A->nseq+1, A->nseq+1); - distances=duplicate_int ( (CL->DM)->score_similarity_matrix, -1, -1); - - for (a=0; a< A->nseq; a++) - { - used[a][0]=a; - for (b=0; b< A->nseq; b++) - { - used[a][1]+=distances[a][b]; - } - } - - sort_int_inv (used,2,1,0,(CL->S)->nseq-1); - - ls[0][ns[0]++]=used[0][0]; - ns[1]=1; - - for (a=1; a< S->nseq; a++) - { - fprintf ( stderr, "\n%s %d", (CL->S)->name[used[a][0]], used[a][1]); - ls[1][0]=used[a][0]; - pair_wise ( A,ns,ls, CL); - ls[0][ns[0]++]=used[a][0]; - } - - A->nseq=(CL->S)->nseq; - return A; - -} - -Alignment *stack_progressive_aln(Alignment *A, Constraint_list *CL, int gop, int gep) - { - int a,n; - int**group_list; - int *n_groups; - char dp_mode[100]; - - - sprintf ( dp_mode, "%s", CL->dp_mode); - sprintf (CL->dp_mode, "gotoh_pair_wise"); - - n_groups=vcalloc ( 2, sizeof (int)); - group_list=declare_int ( 2, A->nseq); - - n=A->nseq; - - for ( a=0; aseq_al[a]); - for ( a=1; a[%d]", a,(int)strlen ( A->seq_al[0])); - } - fprintf (stderr, "\n"); - vfree(n_groups); - free_int ( group_list, -1); - sprintf (CL->dp_mode, "%s",dp_mode); - - return A; - } -Alignment *realign_aln_clust ( Alignment*A, Constraint_list *CL); -Alignment *realign_aln_random_bipart_n ( Alignment*A, Constraint_list *CL, int n); -Alignment *iterate_aln ( Alignment*A, int nit, Constraint_list *CL) -{ - int it; - int mode=1; - int score, iscore, delta; - fprintf ( CL->local_stderr, "Iterated Refinement: %d cycles START: score= %d\n", nit,iscore=aln2ecl_raw_score (A, CL) ); - - - if ( nit==-1)nit=A->nseq*2; - if ( A->len_aln==0)A=very_fast_aln (A, A->nseq, CL); - A=reorder_aln (A,(CL->S)->name, A->nseq); - - for (it=0; it< nit; it++) - { - //CL->local_stderr=output_completion (CL->local_stderr,it, nit,1, ""); - if (mode==0)A=realign_aln (A, CL); - else if (mode ==1)A=realign_aln_random_bipart (A, CL); - else if (mode ==2)A=realign_aln_clust (A, CL); - else if (mode ==3)A=realign_aln_random_bipart_n (A, CL,2); - - - score=aln2ecl_raw_score (A, CL); - delta=iscore-score; - fprintf (CL->local_stderr, "\n\tIteration Cycle: %d Score=%d Improvement= %d", it+1,score, delta); - } - fprintf ( CL->local_stderr, "\nIterated Refinement: Completed Improvement=%d\n", delta); - return A; -} - -int get_next_best (int seq, int nseq, int *used, int **dm); -int get_next_best (int seq, int nseq, int *used, int **dm) -{ - int a,set, d, bd, bseq; - - for (set=0,a=0; a< nseq; a++) - { - if (used[a] || seq==a)continue; - d=dm[seq][a]; - if (set==0 || d>bd) - { - bseq=a; - bd=d; - set=1; - } - } - return bseq; -} -Alignment * full_sorted_aln (Alignment *A, Constraint_list *CL) -{ - int a,b; - A=sorted_aln_seq (0, A, CL); - print_aln(A); - for (a=1; anseq; a++) - { - A=A->A=copy_aln (A, NULL); - for (b=0; bnseq; b++)ungap(A->seq_al[b]); - A=sorted_aln_seq (a, A, CL); - print_aln(A); - } - return A; -} -Alignment * sorted_aln (Alignment *A, Constraint_list *CL) -{ - return sorted_aln_seq (-1, A, CL); -} -Alignment * sorted_aln_seq (int new_seq, Alignment *A, Constraint_list *CL) -{ - int a, b=0, nseq; - int *ns, **ls, **score, *used, **dm; - int old_seq; - - dm=(CL->DM)->score_similarity_matrix; - nseq=(CL->S)->nseq; - score=declare_int (nseq, 3); - used=vcalloc (nseq, sizeof (int)); - ls=declare_int (2, nseq); - ns=vcalloc (2, sizeof (int)); - - - if ( new_seq==-1) - { - for (a=0; ascore_aln=pair_wise (A, ns, ls,CL); - - } - return A; -} - -Alignment * ungap_aln4tree (Alignment *A); -Alignment * ungap_aln4tree (Alignment *A) -{ - int t, n, max_sim, sim; - Alignment *B; - - - - n=35; - max_sim=60; - - t=A->len_aln/10; - - B=copy_aln (A, NULL); - B=ungap_aln_n(B, n); - return B; - - sim=aln2sim (B, "idmat"); - while (B->len_alnmax_sim && n>0) - { - n-=10; - B=copy_aln (A, B); - B=ungap_aln_n(B, n); - sim=aln2sim (B, "idmat"); - } - if ( B->len_alnmax_sim)B=copy_aln (A, B); - return B; -} - - - -Alignment * iterative_tree_aln (Alignment *A,int n, Constraint_list *CL) -{ - NT_node **T=NULL; - int a; - - T=make_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1); - tree_aln ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL); - for ( a=0; a< n; a++) - { - - Alignment *B; - - B=copy_aln (A, NULL); - B=ungap_aln_n (B, 20); - sprintf ( CL->distance_matrix_mode, "aln"); - - CL->DM=cl2distance_matrix ( CL,B,NULL,NULL, 1); - free_aln (B); - - degap_aln (A); - T=make_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1); - - tree_aln ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL); - } - return A; -} - -Alignment *profile_aln (Alignment *A, Constraint_list *CL) -{ - int a,nseq,nseq2; - int **ls, *ns; - nseq=A->nseq; - nseq2=2*nseq; - ls=declare_int (2, nseq2); - ns=vcalloc (2, sizeof (int)); - - A=realloc_aln2(A,nseq2, A->len_aln); - for (a=0; a< nseq; a++) - ls[0][ns[0]++]=a; - for ( a=0; aseq_al[a+nseq], "%s", (CL->S)->seq[a]); - sprintf (A->name[a+nseq], "%s", (CL->S)->name[a]); - A->order[a+nseq][0]=a; - } - - ns[1]=1; - for (a=0; ascore_aln=pair_wise (A, ns, ls,CL); - - ls[0][ns[0]++]=a+nseq; - } - for (a=0; a< nseq; a++) - { - sprintf (A->seq_al[a], "%s", A->seq_al[a+nseq]); - } - A->nseq=nseq; - return A; -} - -Alignment * iterative_aln ( Alignment*A, int n,Constraint_list *CL) -{ - int *ns,**ls, **score, **dm; - int a,b, nseq, max; - ls=declare_int (2, A->nseq); - ns=vcalloc (2, sizeof (int)); - ls[0][ns[0]++]=0; - - - - - - nseq=(CL->S)->nseq; - score=declare_int (nseq,2); - dm=(CL->DM)->score_similarity_matrix; - for (a=0; aS)->name[score[a][0]],A->name[score[a][0]], score[a][1]); - ns[1]=1; - ls[1][0]=score[a][0]; - ungap_sub_aln ( A, ns[0], ls[0]); - ungap_sub_aln ( A, ns[1], ls[1]); - A->score_aln=pair_wise (A, ns, ls,CL); - ls[0][ns[0]++]=a; - } - - return A; -} -Alignment *simple_progressive_aln (Sequence *S, NT_node **T, Constraint_list *CL, char *mat) -{ - int a; - Alignment *A; - - - A=seq2aln (S, NULL, RM_GAP); - - if ( !CL) - { - - CL=declare_constraint_list (S, NULL, NULL, 0, NULL, NULL); - sprintf ( CL->dp_mode, "myers_miller_pair_wise"); - sprintf ( CL->tree_mode, "nj"); - sprintf ( CL->distance_matrix_mode, "idscore"); - CL=choose_extension_mode ("matrix", CL); - CL->gop=-10; - CL->gep=-1; - if (mat)CL->M=read_matrice (mat); - CL->pw_parameters_set=1; - CL->local_stderr=stderr; - } - - if ( !T)T=make_tree (A, CL, CL->gop, CL->gep,S, NULL,MAXIMISE); - for ( a=0; a< A->nseq; a++)ungap (A->seq_al[a]); - - tree_aln ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL); - A=reorder_aln ( A,A->tree_order,A->nseq); - - return A; -} - -Alignment *very_fast_aln ( Alignment*A, int nseq, Constraint_list *CL) -{ -char command[10000]; -char *tmp_seq; -char *tmp_aln; -FILE *fp; - -if ( CL && CL->local_stderr)fp=CL->local_stderr; -else fp=stderr; - - fprintf (fp, "\n[Computation of an Approximate MSA..."); - tmp_seq= vtmpnam (NULL); - tmp_aln= vtmpnam (NULL); - output_fasta_seq ((tmp_seq=vtmpnam (NULL)), A); - sprintf ( command, "t_coffee -infile=%s -special_mode quickaln -outfile=%s %s -outorder=input", tmp_seq, tmp_aln, TO_NULL_DEVICE); - my_system ( command); - A->nseq=0; - A=main_read_aln (tmp_aln,A); - fprintf (fp, "]\n"); - return A; -} - -static NT_node* SNL; -NT_node* tree_aln ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL) -{ - if ( strm ((CL->TC)->use_seqan, "NO"))return local_tree_aln (LT, RT, A, nseq, CL); - else return seqan_tree_aln (LT, RT, A, nseq, CL); -} - -NT_node* seqan_tree_aln ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL) - { - - - Alignment *B; - - - char *tree, *lib, *seq, *new_aln; - - - //Output tree - tree=vtmpnam (NULL); - print_newick_tree (LT->parent, tree); - - - //Output seq - main_output_fasta_seq (seq=vtmpnam (NULL),B=seq2aln (CL->S,NULL,RM_GAP), NO_HEADER); - free_aln (B); - - //Output lib - new_aln=vtmpnam (NULL); - vfclose (save_constraint_list ( CL, 0, CL->ne,lib=vtmpnam(NULL), NULL, "ascii",CL->S)); - - fprintf (CL->local_stderr, "\n********* USE EXTERNAL ALIGNER: START:\n\tCOMMAND: %s -lib %s -seq %s -usetree %s -outfile %s\n", (CL->TC)->use_seqan,lib, seq, tree, new_aln); - printf_system ( "%s -lib %s -seq %s -usetree %s -outfile %s", (CL->TC)->use_seqan,lib, seq, tree, new_aln); - fprintf (CL->local_stderr, "\n********* USE EXTERNAL ALIGNER: END\n"); - - - main_read_aln (new_aln, A); - return tree2ao (LT,RT, A, A->nseq, CL); - - - - } -NT_node rec_local_tree_aln ( NT_node P, Alignment*A, Constraint_list *CL, int print); -NT_node* local_tree_aln ( NT_node l, NT_node r, Alignment*A,int nseq, Constraint_list *CL) -{ - int a; - NT_node P, *NL; - int **min=NULL; - - if (!r && !l) return NULL; - else if (!r)P=l; - else if (!l)P=r; - else P=r->parent; - - fprintf ( CL->local_stderr, "\nPROGRESSIVE_ALIGNMENT [Tree Based]\n"); - - //1: make sure the Alignment and the Sequences are labeled the same way - if (CL->translation)vfree (CL->translation); - CL->translation=vcalloc ( (CL->S)->nseq, sizeof (int)); - for ( a=0; a< (CL->S)->nseq; a++) - CL->translation[a]=name_is_in_list ( (CL->S)->name[a], (CL->S)->name, (CL->S)->nseq, MAXNAMES); - A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq); - A->nseq=(CL->S)->nseq; - - //2 Make sure the tree is in the same order - recode_tree (P, (CL->S)); - index_tree_node(P); - - if ( get_nproc()>1 && strstr (CL->multi_thread, "msa")) - { - int max_fork; - max_fork=get_nproc()/2;//number of nodes forked, one node =>two jobs - tree2nnode (P); - NL=tree2node_list (P, NULL); - min=declare_int (P->node+1,3); - for (a=0; a<=P->node; a++) - { - NT_node N; - N=NL[a]; - min[a][0]=a; - if (!N); - else if (N && N->nseq==1)min[a][1]=0; - else - { - min[a][1]=MIN(((N->left)->nseq),((N->right)->nseq))*A->nseq+MAX(((N->left)->nseq),((N->right)->nseq));//sort on min and break ties on max - min[a][2]=MIN(((N->left)->nseq),((N->right)->nseq)); - } - } - sort_int_inv (min,3, 1, 0, P->node); - for (a=0; a<=P->node && a1)(NL[min[a][0]])->fork=1; - } - } - free_int (min, -1); - rec_local_tree_aln (P, A,CL, 1); - for (a=0; anseq; a++)sprintf (A->tree_order[a], "%s", (CL->S)->name[P->lseq[a]]); - A->len_aln=strlen (A->seq_al[0]); - - fprintf ( CL->local_stderr, "\n\n"); - - return NULL; -} - -NT_node rec_local_tree_aln ( NT_node P, Alignment*A, Constraint_list *CL,int print) -{ - NT_node R,L; - int score; - - - if (!P || P->nseq==1) return NULL; - R=P->right;L=P->left; - - if (P->fork ) - { - int s, pid1, pid2; - char *tmp1, *tmp2; - tmp1=vtmpnam (NULL); - tmp2=vtmpnam (NULL); - - pid1=vfork(); - if (pid1==0) - { - if (print==1) - if (L->nseq>R->nseq)print=1; - - initiate_vtmpnam (NULL); - rec_local_tree_aln (L, A, CL, print); - dump_msa (tmp1,A, L->nseq, L->lseq); - exit (EXIT_SUCCESS); - } - else - { - pid2=vfork(); - if (pid2==0) - { - if (print==1) - if (L->nseq>R->nseq)print=0; - - - initiate_vtmpnam (NULL); - rec_local_tree_aln (R, A, CL, print); - dump_msa (tmp2, A, R->nseq, R->lseq); - exit (EXIT_SUCCESS); - } - } - vwaitpid (pid1, &s, 0); - vwaitpid (pid2, &s, 0); - - - undump_msa (A,tmp1); - undump_msa (A,tmp2); - } - else - { - rec_local_tree_aln (L, A, CL, print); - rec_local_tree_aln (R, A, CL, print); - } - - P->score=A->score_aln=score=profile_pair_wise (A,L->nseq, L->lseq,R->nseq,R->lseq,CL); - A->len_aln=strlen (A->seq_al[P->lseq[0]]); - score=node2sub_aln_score (A, CL, CL->evaluate_mode,P); - if (print)fprintf(CL->local_stderr, "\n\tGroup %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->[Score=%4d][Len=%5d][PID:%d]%s",P->index,R->index,R->nseq,L->index,L->nseq,score, A->len_aln,getpid(),(P->fork==1)?"[Forked]":"" ); - - return P; -} - - - -NT_node* tree2ao ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL) - { - int *n_s; - int ** l_s; - int a, b; - - static int n_groups_done, do_split=0; - int nseq2align=0; - int *translation; - - - NT_node P=NULL; - - - - - if (n_groups_done==0) - { - if (SNL)vfree(SNL); - SNL=vcalloc ( (CL->S)->nseq, sizeof (NT_node)); - - if (CL->translation)vfree(CL->translation); - CL->translation=vcalloc ( (CL->S)->nseq, sizeof (int)); - - for ( a=0; a< (CL->S)->nseq; a++) - CL->translation[a]=name_is_in_list ( (CL->S)->name[a], (CL->S)->name, (CL->S)->nseq, MAXNAMES); - - n_groups_done=(CL->S)->nseq; - A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq); - A->nseq=nseq; - } - - translation=CL->translation; - n_s=vcalloc (2, sizeof ( int)); - l_s=declare_int ( 2, nseq); - - - if ( RT->parent !=LT->parent)fprintf ( stderr, "Tree Pb [FATAL:%s]", PROGRAM); - else P=RT->parent; - - if ( LT->leaf==1 && RT->leaf==0) - tree2ao ( RT->left, RT->right,A, nseq,CL); - - else if ( RT->leaf==1 && LT->leaf==0) - tree2ao ( LT->left, LT->right,A,nseq,CL); - - else if (RT->leaf==0 && LT->leaf==0) - { - tree2ao ( LT->left, LT->right,A,nseq,CL); - tree2ao ( RT->left, RT->right,A,nseq,CL); - } - - if ( LT->leaf==1 && RT->leaf==1) - { - /*1 Identify the two groups of sequences to align*/ - - nseq2align=LT->nseq+RT->nseq; - n_s[0]=LT->nseq; - for ( a=0; a< LT->nseq; a++)l_s[0][a]=translation[LT->lseq[a]]; - if ( LT->nseq==1)LT->group=l_s[0][0]; - - n_s[1]=RT->nseq; - for ( a=0; a< RT->nseq; a++)l_s[1][a]=translation[RT->lseq[a]]; - if ( RT->nseq==1)RT->group=l_s[1][0]; - - - P->group=n_groups_done++; - - if (nseq2align==nseq) - { - for (b=0, a=0; a< n_s[0]; a++, b++)sprintf ( A->tree_order[b],"%s", (CL->S)->name[l_s[0][a]]); - for (a=0; a< n_s[1] ; a++, b++)sprintf ( A->tree_order[b], "%s",(CL->S)->name[l_s[1][a]]); - n_groups_done=0; - } - } - if (P->parent)P->leaf=1; - if ( LT->isseq==0)LT->leaf=0; - if ( RT->isseq==0)RT->leaf=0; - - if (RT->isseq){SNL[translation[RT->lseq[0]]]=RT;RT->score=100;} - if (LT->isseq){SNL[translation[LT->lseq[0]]]=LT;LT->score=100;} - - do_split=split_condition (nseq2align,A->score_aln,CL); - if (CL->split && do_split) - { - - for (a=0; a< P->nseq; a++)SNL[CL->translation[P->lseq[a]]]=NULL; - SNL[CL->translation[RT->lseq[0]]]=P; - - } - - vfree ( n_s); - free_int ( l_s, 2); - return SNL; - - } - -NT_node* tree_realn ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL) - { - int *n_s; - int ** l_s; - int a, b; - int score; - static int n_groups_done; - int nseq2align=0; - int *translation; - - - NT_node P=NULL; - - - - - if (n_groups_done==0) - { - if (SNL)vfree(SNL); - SNL=vcalloc ( (CL->S)->nseq, sizeof (NT_node)); - - if (CL->translation)vfree(CL->translation); - CL->translation=vcalloc ( (CL->S)->nseq, sizeof (int)); - - for ( a=0; a< (CL->S)->nseq; a++) - CL->translation[a]=name_is_in_list ( (CL->S)->name[a], (CL->S)->name, (CL->S)->nseq, MAXNAMES); - if (nseq>2)fprintf ( CL->local_stderr, "\nPROGRESSIVE_ALIGNMENT [Tree Based]\n"); - else fprintf ( CL->local_stderr, "\nPAIRWISE_ALIGNMENT [No Tree]\n"); - n_groups_done=(CL->S)->nseq; - A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq); - A->nseq=nseq; - } - - translation=CL->translation; - n_s=vcalloc (2, sizeof ( int)); - l_s=declare_int ( 2, nseq); - - - if ( nseq==2) - { - n_s[0]=n_s[1]=1; - l_s[0][0]=name_is_in_list ((CL->S)->name[0],(CL->S)->name, (CL->S)->nseq, MAXNAMES); - l_s[1][0]=name_is_in_list ((CL->S)->name[1],(CL->S)->name, (CL->S)->nseq, MAXNAMES); - A->score_aln=score=pair_wise (A, n_s, l_s,CL); - - vfree ( n_s); - free_int ( l_s, 2); - return SNL; - } - else - { - if ( RT->parent !=LT->parent)fprintf ( stderr, "Tree Pb [FATAL:%s]", PROGRAM); - else P=RT->parent; - - if ( LT->leaf==1 && RT->leaf==0) - tree_realn ( RT->left, RT->right,A, nseq,CL); - - else if ( RT->leaf==1 && LT->leaf==0) - tree_realn ( LT->left, LT->right,A,nseq,CL); - - else if (RT->leaf==0 && LT->leaf==0) - { - tree_realn ( LT->left, LT->right,A,nseq,CL); - tree_realn ( RT->left, RT->right,A,nseq,CL); - } - - if ( LT->leaf==1 && RT->leaf==1 && (RT->nseq+LT->nseq)nseq; a++) - { - s=translation[LT->lseq[a]]; - list[s]=1; - } - for (a=0; anseq; a++) - { - s=translation[RT->lseq[a]]; - list[s]=1; - } - for (a=0; ascore=A->score_aln=score=pair_wise (A, n_s, l_s,CL); - id2=sub_aln2sim (A, n_s, l_s, "idmat_sim"); - - - - - if (nseq2align==nseq) - { - for (b=0, a=0; a< n_s[0]; a++, b++)sprintf ( A->tree_order[b],"%s", (CL->S)->name[l_s[0][a]]); - for (a=0; a< n_s[1] ; a++, b++)sprintf ( A->tree_order[b], "%s",(CL->S)->name[l_s[1][a]]); - n_groups_done=0; - } - } - if (P->parent)P->leaf=1; - //Recycle the tree - if ( LT->isseq==0)LT->leaf=0; - if ( RT->isseq==0)RT->leaf=0; - - if (RT->isseq){SNL[translation[RT->lseq[0]]]=RT;RT->score=100;} - if (LT->isseq){SNL[translation[LT->lseq[0]]]=LT;LT->score=100;} - - vfree ( n_s); - free_int ( l_s, 2); - return SNL; - } - - - } - - - -Alignment* profile_tree_aln ( NT_node P,Alignment*A,Constraint_list *CL, int threshold) -{ - int *ns, **ls, a, sim; - NT_node LT, RT, D, UD; - Alignment *F; - static NT_node R; - static int n_groups_done; - - - //first pass - //Sequences must be in the same order as the tree sequences - if (!P->parent) - { - R=P; - n_groups_done=P->nseq+1; - } - - LT=P->left; - RT=P->right; - - if (LT->leaf==0)A=delayed_tree_aln1 (LT, A,CL, threshold); - if (RT->leaf==0)A=delayed_tree_aln1 (RT, A,CL, threshold); - - ns=vcalloc (2, sizeof (int)); - ls=declare_int ( 2,R->nseq); - - if ( LT->nseq==1) - { - ls[0][ns[0]++]=LT->lseq[0]; - LT->group=ls[0][0]+1; - } - else - node2seq_list (LT,&ns[0], ls[0]); - - if ( RT->nseq==1) - { - ls[1][ns[1]++]=RT->lseq[0]; - RT->group=ls[1][0]+1; - } - else - node2seq_list (RT,&ns[1], ls[1]); - - - P->group=++n_groups_done; - fprintf (CL->local_stderr, "\n\tGroup %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->",P->group,RT->group, ns[1],LT->group, ns[0]); - - P->score=A->score_aln=pair_wise (A, ns, ls,CL); - sim=sub_aln2sim(A, ns, ls, "idmat_sim1"); - - if ( simaligned=1; - D->aligned=0; - - fprintf (CL->local_stderr, "[Delayed (Sim=%4d). Kept Group %4d]",sim,UD->group); - - - ungap_sub_aln (A, ns[0],ls[0]); - ungap_sub_aln (A, ns[1],ls[1]); - A->nseq=MAX(ns[0],ns[1]); - - F=A; - while (F->A)F=F->A; - F->A=main_read_aln (output_fasta_sub_aln (NULL, A, ns[(D==LT)?0:1], ls[(D==LT)?0:1]), NULL); - if ( P==R) - { - F=F->A; - F->A=main_read_aln (output_fasta_sub_aln (NULL, A, ns[(D==LT)?1:0], ls[(D==LT)?1:0]), NULL); - } - if (F->A==NULL) - { - printf_exit (EXIT_FAILURE, stderr, "\nError: Empty group"); - } - } - else - { - LT->aligned=1; RT->aligned=1; - fprintf (CL->local_stderr, "[Score=%4d][Len=%5d]",sub_aln2sub_aln_score (A, CL, CL->evaluate_mode,ns, ls), strlen ( A->seq_al[ls[0][0]])); - A->nseq=ns[0]+ns[1]; - if (P==R) - { - F=A; - while (F->A)F=F->A; - F->A=main_read_aln (output_fasta_sub_aln2 (NULL, A, ns, ls), NULL); - } - } - P->nseq=0; - for (a=0; anseq;a++)P->lseq[P->nseq++]=LT->lseq[a]; - for (a=0; anseq;a++)P->lseq[P->nseq++]=RT->lseq[a]; - - P->aligned=1; - - vfree ( ns); - free_int ( ls,-1); - return A; -}//////////////////////////////////////////////////////////////////////////////////////// -// -// Frame Tree Aln -// -//////////////////////////////////////////////////////////////////////////////////////// - -//Alignment *frame_tree_aln (Alignment *A, Constraint_list *CL) -//{ - - -//////////////////////////////////////////////////////////////////////////////////////// -// -// Delayed Tree Aln -// -//////////////////////////////////////////////////////////////////////////////////////// -int delayed_pair_wise (Alignment *A, int *ns, int **ls,Constraint_list *CL); -NT_node* delayed_tree_aln_mode1 ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL); -NT_node* delayed_tree_aln_mode2 ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL); -int paint_nodes2aligned ( NT_node P,char **list, int n); - -int reset_visited_nodes ( NT_node P); -int reset_visited_nodes2 ( NT_node P); -Alignment * make_delayed_tree_aln (Alignment *A,int n, Constraint_list *CL) -{ - NT_node **T=NULL; - int a; - - T=make_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1); - delayed_tree_aln_mode1 ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL); - - for ( a=0; a< n; a++) - { - - sprintf ( CL->distance_matrix_mode, "aln"); - CL->DM=cl2distance_matrix ( CL,A,NULL,NULL, 1); - degap_aln (A); - T=make_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1); - delayed_tree_aln_mode1 ((T[3][0])->left,(T[3][0])->right,A,(CL->S)->nseq, CL); - } - - return A; -} -NT_node* delayed_tree_aln_mode1 ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL) -{ - NT_node P; - - - - P=LT->parent;P->nseq=nseq; - paint_nodes2aligned (P, NULL, 0); - - A=delayed_tree_aln1 (P, A, CL,50); - A=delayed_tree_aln2 (P, A, CL, 0); - return NULL; -} - -NT_node* delayed_tree_aln_mode2 ( NT_node LT, NT_node RT, Alignment*A, int nseq, Constraint_list *CL) -{ - NT_node P; - - int thr=50; - - P=LT->parent;P->nseq=nseq; - - A=delayed_tree_aln1 (P, A, CL,thr); - thr-=10; - while (thr>=0) - { - A=delayed_tree_aln2 (P, A, CL, thr); - thr-=10; - } - return NULL; -} - -Alignment* delayed_tree_aln1 ( NT_node P,Alignment*A,Constraint_list *CL, int threshold) -{ - int *ns, **ls, a, sim; - NT_node LT, RT, D, UD; - - static NT_node R; - static int n_groups_done; - - - //first pass - //Sequences must be in the same order as the tree sequences - if (!P->parent) - { - R=P; - n_groups_done=P->nseq+1; - } - - LT=P->left; - RT=P->right; - - if (LT->leaf==0)A=delayed_tree_aln1 (LT, A,CL, threshold); - if (RT->leaf==0)A=delayed_tree_aln1 (RT, A,CL, threshold); - - ns=vcalloc (2, sizeof (int)); - ls=declare_int ( 2,R->nseq); - - - node2seq_list (LT,&ns[0], ls[0]); - if ( LT->nseq==1)LT->group=LT->lseq[0]+1; - - node2seq_list (RT,&ns[1], ls[1]); - if ( RT->nseq==1)RT->group=RT->lseq[0]+1; - - - P->group=++n_groups_done; - - - if ( ns[0]==0 || ns[1]==0) - { - fprintf (CL->local_stderr, "\n\tF-Group %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->Skipped",P->group,RT->group, ns[1],LT->group, ns[0]); - - LT->aligned=(ns[0]==0)?0:1; - RT->aligned=(ns[1]==0)?0:1; - } - else - { - fprintf (CL->local_stderr, "\n\tF-Group %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->",P->group,RT->group, ns[1],LT->group, ns[0]); - P->score=A->score_aln=pair_wise (A, ns, ls,CL); - sim=sub_aln2max_sim(A, ns, ls, "idmat_sim1"); - - - if ( simaligned=1; - D->aligned=0; - - fprintf (CL->local_stderr, "[Delayed (Sim=%4d). Kept Group %4d]",sim,UD->group); - - ungap_sub_aln (A, ns[0],ls[0]); - ungap_sub_aln (A, ns[1],ls[1]); - A->nseq=MAX(ns[0],ns[1]); - } - else - { - LT->aligned=1; RT->aligned=1; - fprintf (CL->local_stderr, "[Score=%4d][Len=%5d]",sub_aln2sub_aln_score (A, CL, CL->evaluate_mode,ns, ls), strlen ( A->seq_al[ls[0][0]])); - A->nseq=ns[0]+ns[1]; - } - P->nseq=0; - for (a=0; anseq;a++)P->lseq[P->nseq++]=LT->lseq[a]; - for (a=0; anseq;a++)P->lseq[P->nseq++]=RT->lseq[a]; - - P->aligned=1; - } - vfree ( ns); - free_int ( ls,-1); - return A; -} - -Alignment* delayed_tree_aln2 ( NT_node P,Alignment*A,Constraint_list *CL, int thr) -{ - - NT_node LT, RT, D; - - static NT_node R; - - - LT=P->left; - RT=P->right; - if (!P->parent) - { - R=P; - fprintf (CL->local_stderr, "\n"); - } - if (!LT->aligned && !RT->aligned) - { - printf_exit (EXIT_FAILURE, stderr, "ERROR: Unresolved Node On Groups %d [FATAL:%s]\n", P->group,PROGRAM); - } - else if (!LT->aligned || !RT->aligned) - { - int *ns, **ls, sim; - ns=vcalloc (2, sizeof (int)); - ls=declare_int (2, R->nseq); - - node2seq_list (R,&ns[0], ls[0]); - - D=(!LT->aligned)?LT:RT; - D->aligned=1; - node2seq_list (D,&ns[1], ls[1]); - - fprintf (CL->local_stderr, "\tS-Delayed Group %4d: [Group %4d (%4d seq)] with [Group %4d (%4d seq)]-->",P->group,D->group, ns[1],R->group, ns[0]); - P->score=A->score_aln=pair_wise (A, ns, ls,CL); - sim=sub_aln2max_sim(A, ns, ls, "idmat_sim1"); - if (simlocal_stderr, " [Further Delayed]\n"); - ungap_sub_aln (A, ns[0],ls[0]); - ungap_sub_aln (A, ns[1],ls[1]); - D->aligned=0; - } - else - { - fprintf (CL->local_stderr, "[Score=%4d][Len=%5d][thr=%d]\n",sub_aln2sub_aln_score (A, CL, CL->evaluate_mode,ns, ls), (int)strlen ( A->seq_al[ls[0][0]]), thr); - D->aligned=1; - } - vfree (ns);free_int (ls, -1); - } - else - { - ; - } - - if (LT->leaf==0)A=delayed_tree_aln2 (LT, A,CL, thr); - if (RT->leaf==0)A=delayed_tree_aln2 (RT, A,CL, thr); - - return A; -} - -int delayed_pair_wise (Alignment *A, int *ns, int **ls,Constraint_list *CL) -{ - int s,s1, s2, a, b; - int **sim; - - - pair_wise (A, ns, ls, CL); - - sim=fast_aln2sim_list (A, "sim3", ns, ls); - - sort_int_inv ( sim,3, 2,0, ns[0]*ns[1]-1); - - for (a=0; a< 2; a++) - for ( b=0; b< ns[a]; b++) - A->order[ls[a][b]][4]=-1; - - for (a=0; a< 10 && sim[a][0]!=-1; a++) - { - s1=sim[a][0]; - s2=sim[a][1]; - A->order[s1][4]=0; - A->order[s2][4]=0; - } - - ungap_sub_aln (A, ns[0],ls[0]); - ungap_sub_aln (A, ns[1],ls[1]); - - s=pair_wise (A, ns, ls, CL); - - for (a=0; a< 2; a++) - for ( b=0; b< ns[a]; b++) - A->order[ls[a][b]][4]=0; - - free_int (sim, -1); - return s; -} - -int node2seq_list2 (NT_node P, int *ns, int *ls) -{ - - if ( !P || P->visited ) return ns[0]; - else P->visited=1; - - if ( P->isseq) - { - ls[ns[0]++]=P->lseq[0]; - } - - if (P->left && (P->left) ->aligned)node2seq_list2 (P->left, ns,ls); - if (P->right && (P->right)->aligned)node2seq_list2 (P->right,ns,ls); - if (P->aligned && P->parent)node2seq_list2 (P->parent,ns,ls); - - - return ns[0]; -} - -int node2seq_list (NT_node P, int *ns, int *ls) -{ - - if ( P->isseq && P->aligned) - { - ls[ns[0]++]=P->lseq[0]; - } - else - { - if (P->left && (P->left) ->aligned)node2seq_list (P->left, ns,ls); - if (P->right && (P->right)->aligned)node2seq_list (P->right,ns,ls); - } - return ns[0]; -} -int paint_nodes2aligned ( NT_node P,char **list, int n) -{ - int r=0; - if ( P->leaf) - { - if ( list==NULL) - P->aligned=1; - else if ( name_is_in_list ( P->name, list, n, 100)!=-1) - P->aligned=1; - else - P->aligned=0; - return P->aligned; - } - else - { - r+=paint_nodes2aligned (P->left, list, n); - r+=paint_nodes2aligned (P->right, list, n); - } - return r; -} - -int reset_visited_nodes ( NT_node P) -{ - while (P->parent)P=P->parent; - return reset_visited_nodes2 (P); -} -int reset_visited_nodes2 ( NT_node P) -{ - int r=0; - if (P->left)r+=reset_visited_nodes2(P->left); - if (P->right)r+=reset_visited_nodes2(P->right); - r+=P->visited; - P->visited=0; - return r; -} - -//////////////////////////////////////////////////////////////////////////////////////// -// -// DPA_MSA -// -//////////////////////////////////////////////////////////////////////////////////////// - -Alignment* dpa_msa2 ( NT_node P,Alignment*A,Constraint_list *CL); -Alignment *dpa_align_node (NT_node P,Alignment*A,Constraint_list *CL); -char *node2profile_list (NT_node P,Alignment*A,Constraint_list *CL, char *list); -char * output_node_aln (NT_node P, Alignment *A, char *name); -int node2nleaf ( NT_node P); - -Alignment* dpa_aln (Alignment*A,Constraint_list *CL) -{ - NT_node P; - - A=iterative_tree_aln (A,1, CL); - P=make_root_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1); - degap_aln (A); - while (!P->leaf) - A=dpa_msa2(P, A, CL); - return A; -} - -int node2nleaf ( NT_node P) -{ - int n=0; - if ( P->leaf) return 1; - else - { - n+=node2nleaf ( P->left); - n+=node2nleaf ( P->right); - } - return n; -} -Alignment* dpa_msa2 ( NT_node P,Alignment*A,Constraint_list *CL) -{ - int maxnseq=20; - int n, n_l, n_r; - n=node2nleaf (P); - - - if ( n>maxnseq) - { - n_l=node2nleaf (P->left); - n_r=node2nleaf (P->right); - if (n_l>n_r) - { - return dpa_msa2 (P->left, A, CL); - } - else - { - return dpa_msa2 (P->right, A, CL); - } - } - A=dpa_align_node (P, A, CL); - P->leaf=1; - return A; -} - -Alignment *dpa_align_node (NT_node P,Alignment*A,Constraint_list *CL) -{ - - char *list, *tmp_aln; - int a, b; - Alignment *B; - - - list=vcalloc ( P->nseq*100, sizeof (char)); - list=node2profile_list (P,A, CL, list); - - printf_system ( "t_coffee -profile %s -outfile=%s -dp_mode gotoh_pair_wise_lgp -msa_mode iterative_tree_aln -quiet", list,tmp_aln=vtmpnam (NULL)); - B=main_read_aln (tmp_aln, NULL); - A=realloc_aln (A, B->len_aln+1); - for ( a=0; a< B->nseq; a++) - if ( (b=name_is_in_list (B->name[a], A->name, A->nseq, 100))!=-1) - sprintf (A->seq_al[b], "%s", B->seq_al[a]); - A->len_aln=B->len_aln; - free_aln (B); - vfree (list); - return A; -} -char *node2profile_list (NT_node P,Alignment*A,Constraint_list *CL, char *list) -{ - if (!P->leaf) - { - list=node2profile_list (P->left, A, CL, list); - list=node2profile_list (P->right, A, CL, list); - } - else - { - - list=strcatf (list," %s", output_node_aln (P, A, NULL)); - if ( !P->isseq)P->leaf=0; - } - return list; -} -char * output_node_aln (NT_node P, Alignment *A, char *name) -{ - FILE *fp; - int a; - if (name==NULL) name=vtmpnam (NULL); - fp=vfopen (name, "w"); - - for (a=0; a< P->nseq; a++) - fprintf ( fp, ">%s\n%s", A->name[P->lseq[a]], A->seq_al[P->lseq[a]]); - vfclose (fp); - return name; -} -//////////////////////////////////////////////////////////////////////////////////////// -// -// NEW_DPA_MSA -// -//////////////////////////////////////////////////////////////////////////////////////// - -Alignment * new_dpa_aln (Alignment *A,Constraint_list *CL) -{ - NT_node P; - - char *tmp_aln; - char *list; - - A=make_delayed_tree_aln (A,1, CL); - P=make_root_tree (A, CL, CL->gop, CL->gep,CL->S,NULL, 1); - set_node_score (A, P, "idmat_sim"); - - - list=split_nodes_nseq (A,P,15, list=vcalloc (P->nseq*200, sizeof (char))); - printf_system ( "t_coffee -profile %s -outfile=%s -dp_mode gotoh_pair_wise_lgp -msa_mode iterative_tree_aln", list,tmp_aln=vtmpnam (NULL)); - return main_read_aln (tmp_aln, NULL); -} - -char *split_nodes_nseq (Alignment *A, NT_node P, int nseq, char *list) -{ - int a,n; - - n=P->nseq; - a=100; - while ( n>=nseq) - { - a--; - n=count_threshold_nodes (A, P, a); - } - - return split_nodes_idmax (A, P, a,list); -} -char *split_nodes_idmax (Alignment *A, NT_node P, int t, char *list) -{ - if (P->isseq || P->score>=t) - { - list=strcatf (list," %s", output_node_aln (P, A, NULL)); - } - else if ( P->scoreleft,t, list); - list=split_nodes_idmax (A, P->right,t,list); - } - return list; -} -int count_threshold_nodes (Alignment *A, NT_node P, int t) -{ - int s=0; - - if (P->isseq || P->score>=t) - { - s=1; - } - else if ( P->scoreleft,t); - s+=count_threshold_nodes (A, P->right,t); - } - - return s; -} -int set_node_score (Alignment *A, NT_node P, char *mode) -{ - int a; - int ns[2], *ls[2]; - - if (P->isseq) return 0; - for (a=0; a<2; a++) - { - NT_node N; - N=(a==0)?P->left:P->right; - ns[a]=N->nseq; - ls[a]=N->lseq; - } - P->score=sub_aln2max_sim(A, ns, ls,mode); - set_node_score (A,P->left, mode); - set_node_score (A,P->right, mode); - return 1; -} -///////////////////////////////////////////////////////////////////////////////////////// -int split_condition (int nseq, int score, Constraint_list *CL) -{ - int cond1=1, cond2=1; - - - if ( CL->split_nseq_thres)cond1 =(nseq<=CL->split_nseq_thres)?1:0; - if ( CL->split_score_thres)cond2=(score>=CL->split_score_thres)?1:0; - - return (cond1 && cond2); -} -int profile_pair_wise (Alignment *A, int n1, int *l1, int n2, int *l2, Constraint_list *CL) -{ - static int *ns; - static int **ls; - - if (!ns) - { - ns=vcalloc (2, sizeof (int)); - ls=vcalloc (2, sizeof (int*)); - } - ns[0]=n1; - ls[0]=l1; - ns[1]=n2; - ls[1]=l2; - return pair_wise (A, ns, ls, CL); -} -int pair_wise (Alignment *A, int*ns, int **l_s,Constraint_list *CL ) - { - /* - CL->maximise - CL->gop; - CL->gep - CL->TG_MODE; - */ - int score; - - int glocal; - Pwfunc function; - - - - /*Make sure evaluation functions update their cache if needed*/ - A=update_aln_random_tag (A); - - if (! CL->pw_parameters_set) - { - fprintf ( stderr, "\nERROR pw_parameters_set must be set in pair_wise [FATAL]\n" );crash(""); - } - - - function=get_pair_wise_function(CL->pair_wise, CL->dp_mode,&glocal); - if ( CL->get_dp_cost==NULL)CL->get_dp_cost=get_dp_cost; - - if (strlen ( A->seq_al[l_s[0][0]])==0 || strlen ( A->seq_al[l_s[1][0]])==0) - score=empty_pair_wise ( A, ns, l_s, CL, glocal); - else - score=function ( A, ns, l_s, CL); - - return score; - } - -int empty_pair_wise ( Alignment *A, int *ns, int **l_s, Constraint_list *CL, int glocal) -{ - int n=0, a, b; - int *l=NULL; - char *string; - int l0, l1, len; - - if ( glocal==GLOBAL) - { - l0=strlen (A->seq_al[l_s[0][0]]); - l1=strlen (A->seq_al[l_s[1][0]]); - len=MAX(l1,l0); - - if ( len==0)return 0; - else if (l0>l1){n=ns[1];l=l_s[1];} - else if (l0seq_al[l[a]], "%s", string); - A->score=A->score_aln=0; - A->len_aln=len; - vfree ( string); - return 0; - } - else if ( glocal==LALIGN) - { - A->A=declare_aln (A->S); - (A->A)->len_aln=0; - for ( a=0; a< 2; a++) - for ( b=0; bseq_al[l_s[a][b]][0]='\0'; - (A->A)->score_aln=(A->A)->score=0; - return 0; - } - else return 0; -} - - - - -Pwfunc get_pair_wise_function (Pwfunc pw,char *dp_mode, int *glocal) - { - /*Returns a function and a mode (Glogal, Local...)*/ - - - - int a; - static int npw; - static Pwfunc *pwl; - static char **dpl; - static int *dps; - - /*The first time: initialize the list of pairwse functions*/ - if ( npw==0) - { - pwl=vcalloc ( 100, sizeof (Pwfunc)); - dpl=declare_char (100, 100); - dps=vcalloc ( 100, sizeof (int)); - - pwl[npw]=fasta_cdna_pair_wise; - sprintf (dpl[npw], "fasta_cdna_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=cfasta_cdna_pair_wise; - sprintf (dpl[npw], "cfasta_cdna_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=idscore_pair_wise; - sprintf (dpl[npw], "idscore_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=gotoh_pair_wise; - sprintf (dpl[npw], "gotoh_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=gotoh_pair_wise_lgp; - sprintf (dpl[npw], "gotoh_pair_wise_lgp"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=gotoh_pair_wise_lgp_sticky; - sprintf (dpl[npw], "gotoh_pair_wise_lgp_sticky"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=proba_pair_wise; - sprintf (dpl[npw], "proba_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=biphasic_pair_wise; - sprintf (dpl[npw], "biphasic_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=subop1_pair_wise; - sprintf (dpl[npw], "subop1_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=subop2_pair_wise; - sprintf (dpl[npw], "subop2_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=myers_miller_pair_wise; - sprintf (dpl[npw], "myers_miller_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=test_pair_wise; - sprintf (dpl[npw], "test_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=fasta_gotoh_pair_wise; - sprintf (dpl[npw], "fasta_pair_wise"); - dps[npw]=GLOBAL; - npw++; - pwl[npw]=cfasta_gotoh_pair_wise; - sprintf (dpl[npw], "cfasta_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=very_fast_gotoh_pair_wise; - sprintf (dpl[npw], "very_fast_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=gotoh_pair_wise_sw; - sprintf (dpl[npw], "gotoh_pair_wise_sw"); - dps[npw]=LOCAL; - npw++; - - pwl[npw]=cfasta_gotoh_pair_wise_sw; - sprintf (dpl[npw], "cfasta_sw_pair_wise"); - dps[npw]=LOCAL; - npw++; - - pwl[npw]=gotoh_pair_wise_lalign; - sprintf (dpl[npw], "gotoh_pair_wise_lalign"); - dps[npw]=LALIGN; - npw++; - - pwl[npw]=sim_pair_wise_lalign; - sprintf (dpl[npw], "sim_pair_wise_lalign"); - dps[npw]=LALIGN; - npw++; - - pwl[npw]=domain_pair_wise; - sprintf (dpl[npw], "domain_pair_wise"); - dps[npw]=MOCCA; - npw++; - - pwl[npw]=gotoh_pair_wise; - sprintf (dpl[npw], "ssec_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=ktup_pair_wise; - sprintf (dpl[npw], "ktup_pair_wise"); - dps[npw]=LOCAL; - npw++; - - pwl[npw]=precomputed_pair_wise; - sprintf (dpl[npw], "precomputed_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=myers_miller_pair_wise; - sprintf (dpl[npw], "default"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=viterbi_pair_wise; - sprintf (dpl[npw], "viterbi_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=viterbiL_pair_wise; - sprintf (dpl[npw], "viterbiL_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=viterbiD_pair_wise; - sprintf (dpl[npw], "viterbiD_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=seq_viterbi_pair_wise; - sprintf (dpl[npw], "seq_viterbi_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=pavie_pair_wise; - sprintf (dpl[npw], "pavie_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=glocal_pair_wise; - sprintf (dpl[npw], "glocal_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=linked_pair_wise; - sprintf (dpl[npw], "linked_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - pwl[npw]=clinked_pair_wise; - sprintf (dpl[npw], "clinked_pair_wise"); - dps[npw]=GLOBAL; - npw++; - - /* - pwl[npw]=viterbiDGL_pair_wise; - sprintf (dpl[npw], "viterbiDGL_pair_wise"); - dps[npw]=GLOBAL; - npw++; - */ - } - - for ( a=0; a< npw; a++) - { - if ( (dp_mode && strm (dpl[a], dp_mode)) || pwl[a]==pw) - { - pw=pwl[a]; - if (dp_mode)sprintf (dp_mode,"%s", dpl[a]); - glocal[0]=dps[a]; - return pw; - } - } - fprintf ( stderr, "\n[%s] is an unknown mode for dp_mode[FATAL]\n", dp_mode); - crash ( "\n"); - return NULL; - } - - -/*******************************************************************************/ -/* */ -/* */ -/* Util Functions */ -/* */ -/* */ -/*******************************************************************************/ - -char *build_consensus ( char *seq1, char *seq2, char *dp_mode) - { - Alignment *A; - char *buf; - int a; - char c1, c2; - static char *mat; - - - if ( !mat) mat=vcalloc ( STRING, sizeof (char)); - - - A=align_two_sequences (seq1, seq2, strcpy(mat,"idmat"), 0, 0,dp_mode); - buf=vcalloc ( A->len_aln+1, sizeof (char)); - - for ( a=0; a< A->len_aln; a++) - { - c1=A->seq_al[0][a]; - c2=A->seq_al[1][a]; - if (is_gap(c1) && is_gap(c2))buf[a]='-'; - else if (is_gap(c1))buf[a]=c2; - else if (is_gap(c2))buf[a]=c1; - else if (c1!=c2){vfree (buf);buf=NULL;free_aln(A);return NULL;} - else buf[a]=c1; - } - buf[a]='\0'; - free_sequence (free_aln (A), -1); - return buf; - } - - - -#ifdef FASTAL - -combine_profile () Comobine two profiles into one, using the edit sequence produce by the DP - edit_sequence () insert the gaps using the - -int fastal (int argv, char **arg) -{ - Sequence *S; - int a, b; - SeqHasch *H=NULL; - int ktup=2; - - S=get_fasta_sequence (arg[1], NULL); - - - - for (a=0; anseq-1; a++) - { - for (b=a+1; bnseq; b++) - { - mat[b][a]=mat[a][b]addrand()%100; - } - } - - int_dist2nj_tree (s, S->name, S->nseq, tree_name); - T=main_read_tree (BT); - =fastal_tree_aln (T->L,T->R,S); -} - - - - -NT_node fastal_tree_aln ( NT_node P, Sequence *S) -{ - int score; - - - if (!P || P->nseq==1) return NULL; - R=P->right;L=P->left; - - fastal_tree_aln (P->left,S); - fastal_tree_aln (P->right,S); - fastal_pair_wise (P); - return P; -} - - -NT_node fastal_pair_wise (NT_node P) -{ - //X- 1 - //-X 2 - //XX 3 - //-- 4 - - tb=fastal_align_profile ((P->right)->prf, (P->left)->prf); - - l=strlen (tb); - for (a=0; a< l; a++) - { - pr1=pr2=0; - if (tb[a]== 1 || tb[a] ==3)pr1=1; - if (tb[a]== 2 || tb[a] ==3)pr2=1; - - for (b=0; b<20; b++) - P->prf[a][b]=((pr1==1)?(P->right)->prf[ppr1][b]:0) + ((pr2==1)?(P->left)->prf[ppr2][b]:0); - ppr1+=pr1; - ppr2+=pr2; - } - free_int ((P->left)->prf, -1); - free_int ((P->right)->prf, -1); -} -#endif -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_dp_est.c b/binaries/src/tcoffee/t_coffee_source/util_dp_est.c deleted file mode 100644 index a575e02..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_dp_est.c +++ /dev/null @@ -1,89 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" - - - -int evaluate_est_order (Sequence *S, char *concat, Constraint_list *CL, int ktuple) - { - int a; - static char *alphabet; - int *hasched_seq, *hasched_seq1, *hasched_seq2; - int *lu_seq, *lu_seq1, *lu_seq2; - int pos_ktup1, pos_ktup2; - double score=0; - int n_ktup; - int n_dots=0; - - if ( !alphabet)alphabet=get_alphabet ( concat, alphabet); - n_ktup=(int)pow ( (double)alphabet[0]+1, (double)ktuple); - - hasch_seq (concat,&hasched_seq, &lu_seq,ktuple, alphabet); - hasched_seq1=hasched_seq2=hasched_seq; - lu_seq1=lu_seq2=lu_seq; - - - - for ( a=1; a< n_ktup; a++) - { - pos_ktup1=lu_seq1[a]; - - while (TRUE) - { - - if (!pos_ktup1)break; - pos_ktup2=lu_seq2[a]; - while (pos_ktup2) - { - score+=abs ((int)(pos_ktup1-pos_ktup2)); - pos_ktup2=hasched_seq2[pos_ktup2]; - n_dots++; - } - pos_ktup1=hasched_seq1[pos_ktup1]; - } - } - - score=(score/(double)(n_dots*strlen(concat)))*100000; - vfree ( hasched_seq); - vfree(lu_seq); - - - return score; - } - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_dp_fasta_nw.c b/binaries/src/tcoffee/t_coffee_source/util_dp_fasta_nw.c deleted file mode 100644 index b2887c2..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_dp_fasta_nw.c +++ /dev/null @@ -1,1840 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" - - -int commonsextet( short *table, int *pointt ); -void makecompositiontable( short *table, int *pointt ); -int *code_seq (char *seq, char *type); -int * makepointtable( int *pointt, int *n, int ktup ); - -static int tsize; - -/** -* calculates the number of common tuples -*/ -int commonsextet( short *table, int *pointt ) -{ - int value = 0; - short tmp; - int point; - static short *memo = NULL; - static int *ct = NULL; - static int *cp; - - if( !memo ) - { - memo = vcalloc( tsize+1, sizeof( short ) ); - ct = vcalloc( tsize+1, sizeof( int ) ); - } - - cp = ct; - while( ( point = *pointt++ ) != END_ARRAY ) - { - tmp = memo[point]++; - if( tmp < table[point] ) - value++; - if( tmp == 0 ) - { - *cp++ = point; - } - } - *cp = END_ARRAY; - - cp = ct; - while( *cp != END_ARRAY ) - memo[*cp++] = 0; - - return( value ); -} - -/** -* calculates how many of each tupble exist -*/ -void makecompositiontable( short *table, int *pointt ) -{ - int point; - - while( ( point = *pointt++ ) != END_ARRAY ) - { - table[point]++; - } -} - -int *code_seq (char *seq, char *type) -{ - static int *code; - static int *aa, ng; - int a, b, l; - - - if (!aa) - { - char **gl; - if ( strm (type, "DNA") || strm (type, "RNA")) - { - gl=declare_char (4,5); - sprintf ( gl[ng++], "Aa"); - sprintf ( gl[ng++], "Gg"); - sprintf ( gl[ng++], "TtUu"); - sprintf ( gl[ng++], "Cc"); - } - else - { - - gl=make_group_aa ( &ng, "mafft"); - } - aa=vcalloc ( 256, sizeof (int)); - for ( a=0; amaximise=1; - sprintf ( CL->matrix_for_aa_group, "vasiliky"); - CL->M=read_matrice ("blosum62mt"); - CL->evaluate_residue_pair=evaluate_cdna_matrix_score; - CL->get_dp_cost=slow_get_dp_cost; - type=get_string_type(seq1); - - if ( strm (type, "CDNA")) - CL->evaluate_residue_pair= evaluate_matrix_score; - else if ( strm(type, "PROTEIN")) - CL->evaluate_residue_pair=evaluate_matrix_score; - else if ( strm (type, "DNA") || strm (type, "RNA")) - CL->evaluate_residue_pair= evaluate_matrix_score; - vfree(type); - } - else - { - in_cl=1; - } - - - - - if ( !gl) - { - gl=make_group_aa (&ng, CL->matrix_for_aa_group); - ns=vcalloc (2, sizeof (int)); - ns[0]=ns[1]=1; - l_s=declare_int (2, 2); - l_s[0][0]=0; - l_s[1][0]=1; - } - - - A=strings2aln (2, "A",seq1,"B", seq2); - ungap(A->seq_al[0]); - ungap(A->seq_al[1]); - - CL->S=A->S; - - diag=evaluate_diagonals ( A,ns, l_s, CL,maximise, ng, gl, ktup); - free_sequence (A->S, (A->S)->nseq); - free_aln (A); - if (!in_cl) - { - free_int (CL->M, -1); - vfree (CL); - } - - - return diag; - } - - -int ** evaluate_diagonals ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup) - { - int **tot_diag; - - - - if ( CL->L) - { - tot_diag=evaluate_diagonals_with_clist ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup); - } - else if ( CL->use_fragments) - { - - tot_diag=evaluate_segments_with_ktup ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup); - } - else - { - - tot_diag=evaluate_diagonals_with_ktup ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup); - } - - return tot_diag; - } -int ** evaluate_segments_with_ktup ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup) - { - /* - Reads in an alignmnet A, with two groups of sequences marked. - 1-Turn each group into a conscensus, using the group list identifier. - -if the group list is left empty original symbols are used - 2-hash groupc the two sequences - 3-score each diagonal, sort the list and return it (diag_list) - */ - - char *seq1, *seq2, *alphabet=NULL; - int a,b,l1, l2, n_ktup,pos_ktup1, pos_ktup2, **pos; - int *hasched_seq1, *hasched_seq2,*lu_seq1,*lu_seq2; - int n_diag, **diag, current_diag, **dot_list, n_dots, cost; - int l,delta_diag, delta_res; - - - pos=aln2pos_simple ( A,-1, ns, l_s); - seq1=aln2cons_seq (A, ns[0], l_s[0], n_groups, group_list); - seq2=aln2cons_seq (A, ns[1], l_s[1], n_groups, group_list); - - - - alphabet=get_alphabet (seq1,alphabet); - alphabet=get_alphabet (seq2,alphabet); - - - - l1=strlen ( seq1); - l2=strlen ( seq2); - - n_diag=l1+l2-1; - diag=declare_int ( n_diag+2, 3); - n_ktup=(int)pow ( (double)alphabet[0]+1, (double)ktup); - - hasch_seq(seq1, &hasched_seq1, &lu_seq1,ktup, alphabet); - hasch_seq(seq2, &hasched_seq2, &lu_seq2,ktup, alphabet); - - - - /*EVALUATE THE DIAGONALS*/ - for ( a=0; a<= n_diag; a++)diag[a][0]=a; - for ( n_dots=0,a=1; a<= n_ktup; a++) - { - pos_ktup1=lu_seq1[a]; - while (TRUE) - { - if (!pos_ktup1)break; - pos_ktup2=lu_seq2[a]; - while (pos_ktup2) - { - n_dots++; - pos_ktup2=hasched_seq2[pos_ktup2]; - } - pos_ktup1=hasched_seq1[pos_ktup1]; - } - } - - if ( n_dots==0) - { - vfree (seq1); - vfree (seq2); - vfree (alphabet); - vfree (hasched_seq1); - vfree (hasched_seq2); - vfree (lu_seq1); - vfree (lu_seq2); - free_int (diag, -1); - return evaluate_segments_with_ktup (A,ns,l_s,CL,maximise,n_groups, group_list,ktup-1); - } - - dot_list=declare_int ( n_dots,3); - - for ( n_dots=0,a=1; a<= n_ktup; a++) - { - pos_ktup1=lu_seq1[a]; - while (TRUE) - { - if (!pos_ktup1)break; - pos_ktup2=lu_seq2[a]; - while (pos_ktup2) - { - current_diag=(pos_ktup2-pos_ktup1+l1); - dot_list[n_dots][0]=current_diag; - dot_list[n_dots][1]=pos_ktup1; - dot_list[n_dots][2]=pos_ktup2; - pos_ktup2=hasched_seq2[pos_ktup2]; - n_dots++; - } - pos_ktup1=hasched_seq1[pos_ktup1]; - } - } - - - - hsort_list_array ((void **)dot_list, n_dots, sizeof (int), 3, 0, 3); - current_diag= (int)dot_list[0][0]; - - for ( b=0; b< ktup; b++)diag[current_diag][2]+=(CL->get_dp_cost) ( A, pos, ns[0], l_s[0], dot_list[0][1]+b-1, pos,ns[1], l_s[1], dot_list[0][2]+b-1, CL); - - - for ( l=0,a=1; a< n_dots; a++) - { - - delta_diag=dot_list[a][0]-dot_list[a-1][0]; - delta_res =dot_list[a][1]-dot_list[a-1][1]; - - for ( cost=0, b=0; b< ktup; b++)cost++; - - /*=(CL->get_dp_cost) ( A, pos, ns[0], l_s[0], dot_list[a][1]+b-1, pos,ns[1], l_s[1], dot_list[a][2]+b-1, CL);*/ - - - - if (delta_diag!=0 || FABS(delta_res)>5) - { - - l=0; - diag[current_diag][1]=best_of_a_b(diag[current_diag][2], diag[current_diag][1], 1); - if ( diag[current_diag][2]<0); - else diag[current_diag][1]= MAX(diag[current_diag][1],diag[current_diag][2]); - diag[current_diag][2]=0; - current_diag=dot_list[a][0]; - } - l++; - diag[current_diag][2]+=cost; - - } - diag[current_diag][1]=best_of_a_b(diag[current_diag][2], diag[current_diag][1], 1); - sort_int (diag+1, 3, 1,0, n_diag-1); - - - vfree (seq1); - vfree (seq2); - vfree (alphabet); - vfree (hasched_seq1); - vfree (hasched_seq2); - vfree (lu_seq1); - vfree (lu_seq2); - free_int (pos, -1); - free_int (dot_list, -1); - return diag; - } - - - - - -int ** evaluate_diagonals_with_clist ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup) - { - - /* - Reads in an alignmnent A, with two groups of sequences marked. - Weight the diagonals with the values read in the constraint list - */ - - int l1, l2,n_diag, s1, s2, r1=0, r2=0; - int a, b, c, d; - int **diag; - int **code; - int **pos; - static int *entry; - - - if ( !entry)entry=vcalloc ( CL->entry_len, CL->el_size); - CL=index_constraint_list (CL); - - l1=strlen (A->seq_al[l_s[0][0]]); - l2=strlen (A->seq_al[l_s[1][0]]); - - n_diag=l1+l2-1; - diag=declare_int ( n_diag+2, 3); - for ( a=0; a<= n_diag; a++)diag[a][0]=a; - - A->S=CL->S; - code=seq2aln_pos (A, ns, l_s); - pos =aln2pos_simple ( A,-1, ns, l_s); - - - for (a=0; aorder[l_s[0][a]][0]; - for (b=0; border[l_s[1][b]][0]; - for (c=CL->start_index[s1][s2], d=0; cend_index[s1][s2];c++, d++) - { - entry=extract_entry ( entry,c, CL); - if (s1==entry[SEQ1]) - { - r1=code [s1][entry[R1]]; - r2=code [s2][entry[R2]]; - } - else if ( s2==entry[SEQ1]) - { - r2=code [s2][entry[R1]]; - r1=code [s1][entry[R2]]; - } - - - diag[(r2-r1+l1)][1]+=(CL->get_dp_cost) ( A, pos, ns[0], l_s[0],r1-1, pos,ns[1], l_s[1], r2-1, CL); - - } - } - } - - - sort_int (diag+1, 2, 1,0, n_diag-1); - - free_int (code,-1); - free_int (pos, -1); - return diag; - } - -int * flag_diagonals (int l1, int l2, int **sorted_diag, float T, int window) - { - int a, b, up, low,current_diag,n_diag; - int * slopes; - int *diag_list; - double mean; - double sd; - int use_z_score=1; - - - n_diag=l1+l2-1; - mean=return_mean_int ( sorted_diag, n_diag+1, 1); - - sd =return_sd_int ( sorted_diag, n_diag+1, 1, (int)mean); - - if ( T==0) - { - use_z_score=1; - T=(((double)sorted_diag[n_diag][1]-mean)/sd)/25; - } - - - diag_list=vcalloc (l1+l2+1, sizeof (int)); - slopes=vcalloc ( n_diag+1, sizeof (int)); - - for ( a=n_diag; a>0; a--) - { - current_diag=sorted_diag[a][0]; - - - if ( !use_z_score && sorted_diag[a][1]>T) - { - up=MAX(1,current_diag-window); - low=MIN(n_diag, current_diag+window); - for ( b=up; b<=low; b++)slopes[b]=1; - } - else if (use_z_score && ((double)sorted_diag[a][1]-mean)/sd>T) - { - up=MAX(1,current_diag-window); - low=MIN(n_diag, current_diag+window); - for ( b=up; b<=low; b++)slopes[b]=1; - } - else break; - } - - for ( a=1, b=0; a<=n_diag; a++) - { - b+=slopes[a]; - } - - slopes[1]=1; - slopes[l1+l2-1]=1; - slopes[l2]=1; - for (a=0; a<= (l1+l2-1); a++) - if ( slopes[a]){diag_list[++diag_list[0]]=a;} - - vfree (slopes); - - return diag_list; - } -int * extract_N_diag (int l1, int l2, int **sorted_diag, int n_chosen_diag, int window) - { - int a, b, up, low,current_diag,n_diag; - int * slopes; - int *diag_list; - - - n_diag=l1+l2-1; - - diag_list=vcalloc (l1+l2+1, sizeof (int)); - slopes=vcalloc ( n_diag+1, sizeof (int)); - - - for ( a=n_diag; a>0 && a>(n_diag-n_chosen_diag); a--) - { - current_diag=sorted_diag[a][0]; - up=MAX(1,current_diag-window); - low=MIN(n_diag, current_diag+window); - - for ( b=up; b<=low; b++)slopes[b]=1; - } - - /*flag bottom right*/ - up=MAX(1,1-window);low=MIN(n_diag,1+window); - for ( a=up; a<=low; a++) slopes[a]=1; - - /*flag top left */ - up=MAX(1,(l1+l2-1)-window);low=MIN(n_diag,(l1+l2-1)+window); - for ( a=up; a<=low; a++) slopes[a]=1; - - - /*flag MAIN DIAG SEQ1*/ - up=MAX(1,l1-window);low=MIN(n_diag,l1+window); - for ( a=up; a<=low; a++) slopes[a]=1; - - /*flag MAIN DIAG SEQ2*/ - up=MAX(1,l2-window);low=MIN(n_diag,l2+window); - for ( a=up; a<=low; a++) slopes[a]=1; - - - for (a=0; a<= (l1+l2-1); a++) - if ( slopes[a]){diag_list[++diag_list[0]]=a;} - - vfree (slopes); - return diag_list; - } - - - - -int cfasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL) - { -/*TREATMENT OF THE TERMINAL GAP PENALTIES*/ -/*TG_MODE=0---> gop and gep*/ -/*TG_MODE=1---> --- gep*/ -/*TG_MODE=2---> --- ---*/ - - - int maximise; - -/*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/ - int **tot_diag; - - int *diag; - int ktup; - static int n_groups; - static char **group_list; - int score, new_score; - int n_chosen_diag=20; - int step; - int max_n_chosen_diag; - int l1, l2; - /********Prepare Penalties******/ - - - maximise=CL->maximise; - ktup=CL->ktup; - - /********************************/ - - - - - if ( !group_list) - { - - group_list=make_group_aa (&n_groups, CL->matrix_for_aa_group); - } - - l1=strlen (A->seq_al[l_s[0][0]]); - l2=strlen (A->seq_al[l_s[1][0]]); - - if ( !CL->fasta_step) - { - step=MIN(l1,l2); - step=(int) log ((double)MAX(step, 1)); - step=MAX(step, 20); - } - else - { - step=CL->fasta_step; - } - - - tot_diag=evaluate_diagonals ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup); - - - max_n_chosen_diag=strlen (A->seq_al[l_s[0][0]])+strlen (A->seq_al[l_s[1][0]])-1; - /*max_n_chosen_diag=(int)log10((double)(l1+l2))*10;*/ - - n_chosen_diag+=step; - n_chosen_diag=MIN(n_chosen_diag, max_n_chosen_diag); - - - diag=extract_N_diag (strlen (A->seq_al[l_s[0][0]]),strlen (A->seq_al[l_s[1][0]]), tot_diag, n_chosen_diag, 0); - - - score =make_fasta_gotoh_pair_wise ( A, ns, l_s, CL, diag); - - new_score=0; - vfree ( diag); - - - while (new_score!=score && n_chosen_diag< max_n_chosen_diag ) - { - - - score=new_score; - - ungap_sub_aln ( A, ns[0], l_s[0]); - ungap_sub_aln ( A, ns[1], l_s[1]); - - - n_chosen_diag+=step; - n_chosen_diag=MIN(n_chosen_diag, max_n_chosen_diag); - - - diag =extract_N_diag (strlen (A->seq_al[l_s[0][0]]),strlen (A->seq_al[l_s[1][0]]), tot_diag, n_chosen_diag, 0); - new_score=make_fasta_gotoh_pair_wise ( A, ns, l_s, CL, diag); - - vfree ( diag); - - } - - score=new_score; - free_int (tot_diag, -1); - - return score; - } - -int fasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL) - { -/*TREATMENT OF THE TERMINAL GAP PENALTIES*/ -/*TG_MODE=0---> gop and gep*/ -/*TG_MODE=1---> --- gep*/ -/*TG_MODE=2---> --- ---*/ - - - int maximise; - -/*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/ - int **tot_diag; - int *diag; - int ktup; - float diagonal_threshold; - static int n_groups; - static char **group_list; - int score; - /********Prepare Penalties******/ - - - maximise=CL->maximise; - ktup=CL->ktup; - diagonal_threshold=CL->diagonal_threshold; - /********************************/ - - - - if ( !group_list) - { - group_list=make_group_aa (&n_groups, CL->matrix_for_aa_group); - } - - - tot_diag=evaluate_diagonals ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup); - - if ( !CL->fasta_step) - { - diag=flag_diagonals (strlen(A->seq_al[l_s[0][0]]),strlen(A->seq_al[l_s[1][0]]), tot_diag,diagonal_threshold,0); - } - - else - { - - diag=extract_N_diag (strlen (A->seq_al[l_s[0][0]]),strlen (A->seq_al[l_s[1][0]]), tot_diag,CL->fasta_step,0); - - } - score=make_fasta_gotoh_pair_wise ( A, ns, l_s, CL, diag); - - free_int (tot_diag, -1); - vfree (diag); - return score; - } -int very_fast_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL) - { -/*TREATMENT OF THE TERMINAL GAP PENALTIES*/ -/*TG_MODE=0---> gop and gep*/ -/*TG_MODE=1---> --- gep*/ -/*TG_MODE=2---> --- ---*/ - - - int maximise; -/*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/ - int **tot_diag; - int *diag; - int ktup; - static int n_groups; - static char **group_list; - int score; - /********Prepare Penalties******/ - - - maximise=CL->maximise; - ktup=CL->ktup; - /********************************/ - - - if ( !group_list) - { - - group_list=make_group_aa (&n_groups, CL->matrix_for_aa_group); - } - - CL->use_fragments=0; - tot_diag=evaluate_diagonals ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup); - - /*Note: 20 diagonals. 5 shadows on each side: tunned on Hom39, 2/2/04 */ - diag=extract_N_diag (strlen (A->seq_al[l_s[0][0]]),strlen (A->seq_al[l_s[1][0]]), tot_diag,20,5); - score=make_fasta_gotoh_pair_wise ( A, ns, l_s, CL, diag); - free_int (tot_diag, -1); - vfree (diag); - return score; - } -int make_fasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL, int *diag) - { -/*TREATMENT OF THE TERMINAL GAP PENALTIES*/ -/*TG_MODE=0---> gop and gep*/ -/*TG_MODE=1---> --- gep*/ - /*TG_MODE=2---> --- ---*/ - - - int TG_MODE, gop, l_gop, gep,l_gep, maximise; - -/*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/ - int a, b,c,k, t; - int l1, l2,eg, ch, sub,score=0, last_i=0, last_j=0, i, delta_i, j, pos_j, ala, alb, LEN, n_diag, match1, match2; - int su, in, de, tr; - - int **C, **D, **I, **trace, **pos0, **LD; - int lenal[2], len; - char *buffer, *char_buf; - char **aln, **al; - - /********Prepare Penalties******/ - gop=CL->gop*SCORE_K; - gep=CL->gep*SCORE_K; - TG_MODE=CL->TG_MODE; - maximise=CL->maximise; - - - /********************************/ - - - n_diag=diag[0]; - - - - l1=lenal[0]=strlen (A->seq_al[l_s[0][0]]); - l2=lenal[1]=strlen (A->seq_al[l_s[1][0]]); - - if ( getenv ("DEBUG_TCOFFEE"))fprintf ( stderr, "\n\tNdiag=%d%% ", (diag[0]*100)/(l1+l2)); - - /*diag: - diag[1..n_diag]--> flaged diagonal in order; - diag[0]=0--> first diagonal; - diag[n_diag+1]=l1+l2-1; - */ - - /*numeration of the diagonals strats from the bottom right [1...l1+l2-1]*/ - /*sequence s1 is vertical and seq s2 is horizontal*/ - /*D contains the best Deletion in S2==>comes from diagonal N+1*/ - /*I contains the best insertion in S2=> comes from diagonal N-1*/ - - - - - - C=declare_int (lenal[0]+lenal[1]+1, n_diag+2); - D=declare_int (lenal[0]+lenal[1]+1, n_diag+2); - LD=declare_int (lenal[0]+lenal[1]+1, n_diag+2); - I=declare_int (lenal[0]+lenal[1]+1, n_diag+2); - trace=declare_int (lenal[0]+lenal[1]+1, n_diag+2); - - - al=declare_char (2,lenal[0]+lenal[1]+lenal[1]+1); - - len= MAX(lenal[0],lenal[1])+1; - buffer=vcalloc ( 2*len, sizeof (char)); - char_buf= vcalloc (2*len, sizeof (char)); - - pos0=aln2pos_simple ( A,-1, ns, l_s); - C[0][0]=0; - - t=(TG_MODE==0)?gop:0; - for ( j=1; j<= n_diag; j++) - { - l_gop=(TG_MODE==0)?gop:0; - l_gep=(TG_MODE==2)?0:gep; - - - - if ( (diag[j]-lenal[0])<0 ) - { - trace[0][j]=UNDEFINED; - continue; - } - C[0][j]=(diag[j]-lenal[0])*l_gep +l_gop; - D[0][j]=(diag[j]-lenal[0])*l_gep +l_gop+gop; - } - D[0][j]=D[0][j-1]+gep; - - - t=(TG_MODE==0)?gop:0; - for ( i=1; i<=lenal[0]; i++) - { - l_gop=(TG_MODE==0)?gop:0; - l_gep=(TG_MODE==2)?0:gep; - - C[i][0]=C[i][n_diag+1]=t=t+l_gep; - I[i][0]=D[i][n_diag+1]=t+ gop; - - for ( j=1; j<=n_diag; j++) - { - C[i][j]=C[i][0]; - D[i][j]=I[i][j]=I[i][0]; - } - - for (eg=0, j=1; j<=n_diag; j++) - { - - pos_j=diag[j]-lenal[0]+i; - if (pos_j<=0 || pos_j>l2 ) - { - trace[i][j]=UNDEFINED; - continue; - } - sub=(CL->get_dp_cost) ( A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],pos_j-1, CL ); - - /*1 identify the best insertion in S2:*/ - l_gop=(i==lenal[0])?((TG_MODE==0)?gop:0):gop; - l_gep=(i==lenal[0])?((TG_MODE==2)?0:gep):gep; - len=(j==1)?0:(diag[j]-diag[j-1]); - if ( a_better_than_b(I[i][j-1], C[i][j-1]+l_gop, maximise))eg++; - else eg=1; - I[i][j]=best_of_a_b (I[i][j-1], C[i][j-1]+l_gop, maximise)+len*l_gep; - - /*2 Identify the best deletion in S2*/ - l_gop=(pos_j==lenal[1])?((TG_MODE==0)?gop:0):gop; - l_gep=(pos_j==lenal[1])?((TG_MODE==2)?0:gep):gep; - - len=(j==n_diag)?0:(diag[j+1]-diag[j]); - delta_i=((i-len)>0)?(i-len):0; - - if ( a_better_than_b(D[delta_i][j+1],C[delta_i][j+1]+l_gop, maximise)){LD[i][j]=LD[delta_i][j+1]+1;} - else {LD[i][j]=1;} - D[i][j]=best_of_a_b (D[delta_i][j+1],C[delta_i][j+1]+l_gop, maximise)+len*l_gep; - - - /*Identify the best way*/ - /* - score=C[i][j]=best_int ( 3, maximise, &fop, I[i][j], C[i-1][j]+sub, D[i][j]); - fop-=1; - if ( fop<0)trace[i][j]=fop*eg; - else if ( fop>0 ) {trace[i][j]=fop*LD[i][j];} - else if ( fop==0) trace[i][j]=0; - */ - - su=C[i-1][j]+sub; - in=I[i][j]; - de=D[i][j]; - - /*HERE ("%d %d %d", su, in, de);*/ - if (su>=in && su>=de) - { - score=su; - tr=0; - } - else if (in>=de) - { - score=in; - tr=-eg; - } - else - { - score=de; - tr=LD[i][j]; - } - trace[i][j]=tr; - C[i][j]=score; - - - last_i=i; - last_j=j; - } - } - - - /* - [0][Positive] - ^ ^ - | / - | / - | / - | / - |/ - [Neg]<-------[*] - */ - - - i=last_i; - j=last_j; - - - - ala=alb=0; - match1=match2=0; - while (!(match1==l1 && match2==l2)) - { - - - if ( match1==l1) - { - len=l2-match2; - for ( a=0; a< len; a++) - { - al[0][ala++]=0; - al[1][alb++]=1; - match2++; - } - k=0; - break; - - /*k=-(j-1);*/ - - } - else if ( match2==l2) - { - len=l1-match1; - for ( a=0; a< len; a++) - { - al[0][ala++]=1; - al[1][alb++]=0; - match1++; - } - k=0; - break; - /*k= n_diag-j;*/ - } - else - { - k=trace[i][j]; - } - - - if ( k==0) - { - if ( match2==l2 || match1==l1); - else - { - - al[0][ala++]=1; - al[1][alb++]=1; - i--; - match1++; - match2++; - } - } - else if ( k>0) - { - - len=diag[j+k]-diag[j]; - for ( a=0; adeclared_len<=LEN)A=realloc_aln2 ( A,A->max_n_seq, 2*LEN); - aln=A->seq_al; - - for ( c=0; c< 2; c++) - { - for ( a=0; a< ns[c]; a++) - { - ch=0; - for ( b=0; b< LEN; b++) - { - if (al[c][b]==1) - char_buf[b]=aln[l_s[c][a]][ch++]; - else - char_buf[b]='-'; - } - char_buf[b]='\0'; - sprintf (aln[l_s[c][a]],"%s", char_buf); - } - } - - - A->len_aln=LEN; - A->nseq=ns[0]+ns[1]; - - free_int (pos0, -1); - free_int (C, -1); - free_int (D, -1); - free_int (I, -1); - free_int (trace, -1); - free_int (LD, -1); - free_char ( al, -1); - vfree(buffer); - vfree(char_buf); - - - return score; - } - -int hasch_seq(char *seq, int **hs, int **lu,int ktup,char *alp) - { - static int a[10]; - - int i,j,l,limit,code,flag; - char residue; - - int alp_lu[10000]; - int alp_size; - - alp_size=alp[0]; - alp++; - - - - for ( i=0; i< alp_size; i++) - { - alp_lu[(int)alp[i]]=i; - } - - - - l=strlen (seq); - limit = (int) pow((double)(alp_size+1),(double)ktup); - hs[0]=vcalloc ( l+1,sizeof (int)); - lu[0]=vcalloc ( limit+1, sizeof(int)); - - - if ( l==0)myexit(EXIT_FAILURE); - - for (i=1;i<=ktup;i++) - a[i] = (int) pow((double)(alp_size+1),(double)(i-1)); - - - for(i=1;i<=(l-ktup+1);++i) - { - code=0; - flag=FALSE; - for(j=1;j<=ktup;++j) - { - if (is_gap(seq[i+j-2])){flag=TRUE;break;} - else residue=alp_lu[(int)seq[i+j-2]]; - code+=residue*a[j]; - } - - if ( flag)continue; - ++code; - - if (lu[0][code])hs[0][i]=lu[0][code]; - lu[0][code]=i; - } - return 0; - } - - - -/*********************************************************************/ -/* */ -/* KTUP_DP */ -/* */ -/* */ -/*********************************************************************/ - -/**************Hasch DAta Handling*******************************************************/ - -struct Hasch_data * free_ktup_hasch_data (struct Hasch_data *d); -struct Hasch_data * declare_ktup_hasch_data (struct Hasch_entry *e); -struct Hasch_data * allocate_ktup_hasch_data (struct Hasch_data *e, int action); - -struct Hasch_data -{ - int *list; -}; -typedef struct Hasch_data Hasch_data; -struct Hasch_data * free_ktup_hasch_data (struct Hasch_data *d) -{ - return allocate_ktup_hasch_data (d, FREE); -} -struct Hasch_data * declare_ktup_hasch_data (struct Hasch_entry *e) -{ - e->data=allocate_ktup_hasch_data (NULL,DECLARE); - return e->data; -} - -struct Hasch_data * allocate_ktup_hasch_data (struct Hasch_data *e, int action) -{ - static struct Hasch_data **heap; - static int heap_size, free_heap, a; - - if ( action == 100) - { - fprintf ( stderr, "\nHeap size: %d, Free Heap: %d", heap_size, free_heap); - return NULL; - } - else if ( action==DECLARE) - { - if ( free_heap==0) - { - free_heap=100; - heap_size+=free_heap; - heap=vrealloc (heap,heap_size*sizeof (struct Hasch_entry *)); - for ( a=0; alist=vcalloc ( 10, sizeof (int)); - (heap[a])->list[0]=10; - } - } - return heap[--free_heap]; - } - else if ( action==FREE) - { - heap[free_heap++]=e; - e->list[1]=0; - return NULL; - } - return NULL; -} - - -/**************Hasch DAta Handling*******************************************************/ - -int precomputed_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL) - { - int l1, l2, a, b, c; - int nid=0, npos=0, id; - int r1, r2, s1, s2; - - l1=strlen(A->seq_al[l_s[0][0]]); - l2=strlen(A->seq_al[l_s[1][0]]); - if (l1!=l2) - { - fprintf ( stderr, "\nERROR: improper use of the function precomputed pairwise:[FATAL:%s]", PROGRAM); - crash (""); - } - else if ( l1==0) - { - A->score_aln=A->score=0; - return 0; - } - - for (npos=0, nid=0, a=0; a< ns[0]; a++) - { - s1=l_s[0][a]; - - for (b=0; b< ns[1]; b++) - { - s2=l_s[1][b]; - for ( c=0; cseq_al[s1][c]; - r2=A->seq_al[s2][c]; - if ( is_gap(r1) || is_gap(r2)); - else - { - npos++; - nid+=(r1==r2); - } - } - } - } - id=(npos==0)?0:((nid*100)/npos); - A->score=A->score_aln=id; - return A->score; - } -int ktup_comparison_str ( char *seq1, char *seq2, const int ktup); -int ktup_comparison_hasch ( char *i_seq1, char *i_seq2, const int ktup); -int ktup_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL) - { - static char **gl; - static int ng; - char *seq1; - char *seq2; - - int min_len=10; - - - - if ( !gl) - gl=make_group_aa (&ng, "vasiliky"); - - - if ( ns[0]>1)seq1=sub_aln2cons_seq_mat (A, ns[0], l_s[0],"blosum62mt"); - else - { - seq1=vcalloc ( strlen (A->seq_al[l_s[0][0]])+1, sizeof (char)); - sprintf ( seq1, "%s",A->seq_al[l_s[0][0]]); - } - if ( ns[1]>1)seq2=sub_aln2cons_seq_mat (A, ns[1], l_s[1],"blosum62mt"); - else - { - seq2=vcalloc ( strlen (A->seq_al[l_s[1][0]])+1, sizeof (char)); - sprintf ( seq2, "%s",A->seq_al[l_s[1][0]]); - } - - if ( strlen (seq1)score=A->score_aln=aln2sim(B, "idmat"); - free_aln (B); - return A->score; - } - else - { - - string_convert (seq1, ng, gl); - string_convert (seq2, ng, gl); - A->score=A->score_aln=ktup_comparison (seq1,seq2, CL->ktup); - } - - vfree (seq1); vfree (seq2); - return A->score; - } -int ktup_comparison( char *seq2, char *seq1, const int ktup) -{ - return ktup_comparison_hasch ( seq2, seq1, ktup); -} -int ktup_comparison_str ( char *seq2, char *seq1, const int ktup) -{ - int a,l1, l2,c1, c2, end, start; - char *s1, *s2; - double score=0; - int max_dist=-1; - - if ( max_dist==-1)max_dist=MAX((strlen (seq1)),(strlen (seq2))); - l1=strlen (seq1)-ktup; - l2=strlen (seq2); - - - for ( a=0; a< l1; a++) - { - c1=seq1[a+ktup];seq1[a+ktup]='\0'; - s1=seq1+a; - - start=((a-max_dist)<0)?0:a-max_dist; - end=((a+max_dist)>=l2)?l2:a+max_dist; - - c2=seq2[end];seq2[end]='\0'; - s2=seq2+start; - - score+=(strstr(s2, s1)!=NULL)?1:0; - - seq1[a+ktup]=c1; - seq2[end]=c2; - } - score/=(l1==0)?1:l1; - score=((log(0.1+score)-log(0.1))/(log(1.1)-log(0.1))); - - return score*100; - -} -int ktup_comparison_hasch ( char *i_seq1, char *i_seq2, const int ktup) -{ - /*Ktup comparison adapted from Rob Edgar, NAR, vol32, No1, 381, 2004*/ - /*1: hasch sequence 1 - 2: Count the number of seq2 ktup found in seq1 - */ - - char c; - int key; - - static HaschT*H1; - static char *pseq; - Hasch_entry *e; - char *s; - int l, ls; - int p, a, max_dist=-1; - double score=0; - - - - if (!strm (i_seq1, pseq)) - { - if (H1) - { - hdestroy (H1, declare_ktup_hasch_data, free_ktup_hasch_data); - string2key (NULL, NULL); - } - H1=hasch_sequence ( i_seq1, ktup); - vfree (pseq);pseq=vcalloc ( strlen (i_seq1)+1, sizeof (char)); - sprintf ( pseq, "%s", i_seq1); - } - - ls=l=strlen (i_seq2); - s=i_seq2; - p=0; - while (ls>ktup) - { - c=s[ktup];s[ktup]='\0'; - key=string2key (s, NULL); - e=hsearch (H1,key,FIND, declare_ktup_hasch_data, free_ktup_hasch_data); - - if ( e==NULL); - else if ( max_dist==-1)score++; - else - { - for ( a=1; a<=(e->data)->list[1]; a++) - if (FABS((p-(e->data)->list[a]))<=max_dist) - {score++; break;} - } - s[ktup]=c;s++;p++;ls--; - } - score/=(l-ktup); - score=(log(0.1+score)-log(0.1))/(log(1.1)-log(0.1)); - - if ( score>100) score=100; - return (int)(score*100); -} - -HaschT* hasch_sequence ( char *seq1, int ktup) -{ - char c; - int key, offset=0, ls; - HaschT *H; - Hasch_entry *e; - - H=hcreate ( strlen (seq1), declare_ktup_hasch_data, free_ktup_hasch_data); - ls=strlen (seq1); - while (ls>=(ktup)) - { - c=seq1[ktup];seq1[ktup]='\0'; - key=string2key (seq1, NULL); - e=hsearch (H,key,FIND, declare_ktup_hasch_data, free_ktup_hasch_data); - - if (e==NULL) - { - e=hsearch (H,key,ADD,declare_ktup_hasch_data,free_ktup_hasch_data); - (e->data)->list[++(e->data)->list[1]+1]=offset; - } - else - { - if ((e->data)->list[0]==((e->data)->list[1]+2)){(e->data)->list[0]+=10;(e->data)->list=vrealloc ((e->data)->list,(e->data)->list[0]*sizeof (int));} - (e->data)->list[++(e->data)->list[1]+1]=offset; - } - seq1[ktup]=c;seq1++;ls--; - offset++; - } - return H; -} - - - -char *dayhoff_translate (char *seq1) -{ -int l, a, c; -l=strlen (seq1); - for ( a=0; a< l; a++) - { - c=tolower(seq1[a]); - if ( strchr ("agpst", c))seq1[a]='a'; - else if (strchr ("denq", c))seq1[a]='d'; - else if (strchr ("fwy", c))seq1[a]='f'; - else if (strchr ("hkr", c))seq1[a]='h'; - else if (strchr ("ilmv", c))seq1[a]='i'; - } -return seq1; -} - -int ** evaluate_diagonals_with_ktup ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup) -{ - /*Ktup comparison as in Rob Edgar, NAR, vol32, No1, 381, 2004*/ - char character; - int key,ls; - HaschT*H1, *H2; - Hasch_entry *e1, *e2; - char *s, *sb, *seq1, *seq2; - int l1, l2; - int score=0; - int **diag,n_diag, ktup1, ktup2,a,b,c,d, **pos; - int n_dots=0; - - pos=aln2pos_simple ( A,-1, ns, l_s); - - seq1=aln2cons_maj (A, ns[0], l_s[0], n_groups, group_list); - seq2=aln2cons_maj (A, ns[1], l_s[1], n_groups, group_list); - l1=strlen (seq1); - l2=strlen (seq2); - n_diag=l1+l2-1; - - - diag=declare_int (n_diag+2, 3); - for ( a=0; a=(ktup)) - { - character=s[ktup];s[ktup]='\0'; - key=string2key (s, NULL); - e1=hsearch (H1,key,FIND,declare_ktup_hasch_data, free_ktup_hasch_data); - e2=hsearch (H2,key,FIND,declare_ktup_hasch_data, free_ktup_hasch_data); - if ( !e2 || !e1); - else - { - - for (b=2; b<(e1->data)->list[1]+2; b++) - for (c=2; c<(e2->data)->list[1]+2; c++) - { - - ktup1=(e1->data)->list[b]; - ktup2=(e2->data)->list[c]; - diag[(ktup2-ktup1)+l1][2]++; - for (score=0, d=0; dget_dp_cost) ( A, pos, ns[0], l_s[0], ktup1+d, pos,ns[1], l_s[1], ktup2+d, CL); - diag[(ktup2-ktup1)+l1][1]+=score; - n_dots++; - } - (e1->data)->list[1]=(e2->data)->list[1]=0; - } - s[ktup]=character;s++;ls--; - } - - sort_int (diag+1, 2, 1,0,n_diag-1); - - hdestroy (H1,declare_ktup_hasch_data, free_ktup_hasch_data); hdestroy (H2,declare_ktup_hasch_data, free_ktup_hasch_data); - vfree (seq1); vfree (seq2);vfree (sb);free_int (pos, -1); - return diag; -} - /*********************************************************************/ -/* */ -/* OLD FUNCTIONS */ -/* */ -/* */ -/*********************************************************************/ -int ** evaluate_diagonals_with_ktup_1 ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup) - { - /* - Reads in an alignmnent A, with two groups of sequences marked. - 1-Turn each group into a conscensus, using the group list identifier. - -if the group list is left empty original symbols are used - 2-hasch the two sequences - 3-score each diagonal, sort the list and return it (diag_list) - - diag_list: - - */ - - char *seq1, *seq2, *alphabet=NULL; - int a,b,l1, l2, n_ktup,pos_ktup1, pos_ktup2, **pos; - int *hasched_seq1, *hasched_seq2,*lu_seq1,*lu_seq2; - int n_diag, **diag, current_diag, n_dots; - static char *buf; - pos=aln2pos_simple ( A,-1, ns, l_s); - - - seq1=aln2cons_seq (A, ns[0], l_s[0], n_groups, group_list); - seq2=aln2cons_seq (A, ns[1], l_s[1], n_groups, group_list); - - - - - alphabet=get_alphabet (seq1,alphabet); - alphabet=get_alphabet (seq2,alphabet); - - l1=strlen ( seq1); - l2=strlen ( seq2); - - n_diag=l1+l2-1; - diag=declare_int ( n_diag+2, 3); - n_ktup=(int)pow ( (double)alphabet[0]+1, (double)ktup); - - - hasch_seq(seq1, &hasched_seq1, &lu_seq1,ktup, alphabet); - hasch_seq(seq2, &hasched_seq2, &lu_seq2,ktup, alphabet); - - - - - /*EVALUATE THE DIAGONALS*/ - for ( a=0; a<= n_diag; a++)diag[a][0]=a; - for ( n_dots=0,a=1; a<= n_ktup; a++) - { - pos_ktup1=lu_seq1[a]; - while (TRUE) - { - if (!pos_ktup1)break; - pos_ktup2=lu_seq2[a]; - while (pos_ktup2) - { - current_diag=(pos_ktup2-pos_ktup1+l1); - for ( b=0; b< ktup; b++) - { - diag[current_diag][1]+=(CL->get_dp_cost) ( A, pos, ns[0], l_s[0], pos_ktup1+b-1, pos,ns[1], l_s[1], pos_ktup2+b-1, CL); - n_dots++; - - } - diag[current_diag][2]++; - pos_ktup2=hasched_seq2[pos_ktup2]; - } - pos_ktup1=hasched_seq1[pos_ktup1]; - } - - } - if ( n_dots==0) - { - if ( !buf) - { - buf=vcalloc ( 30, sizeof (30)); - sprintf ( buf, "abcdefghijklmnopqrstuvwxyz"); - } - vfree ( hasched_seq1); - vfree ( hasched_seq2); - vfree (lu_seq1); - vfree (lu_seq2); - return evaluate_diagonals_with_ktup ( A,ns,l_s, CL,maximise,1,&buf,1); - } - - - sort_int (diag+1, 2, 1,0, n_diag-1); - vfree (seq1); - vfree (seq2); - vfree (alphabet); - vfree ( hasched_seq1); - vfree ( hasched_seq2); - vfree (lu_seq1); - vfree (lu_seq2); - free_int (pos, -1); - return diag; - } -///////////////////////////////////////////////////////////////// - -Constraint_list * hasch2constraint_list (Sequence*S, Constraint_list *CL) -{ - int a,b,c, n; - SeqHasch h,*H=NULL; - int *entry; - int ktup=2; - - - entry=vcalloc ( CL->entry_len, sizeof (int)); - - for (a=0; anseq; a++) - { - H=seq2hasch (a, S->seq[a],ktup,H); - } - - n=1; - while (H[n]) - { - h=H[n]; - - for (a=0; an-2; a+=2) - { - for (b=a+2; bn; b+=2) - { - - if (h->l[a]==h->l[b])continue; - else - { - for (c=0; cl[a]; - entry[SEQ2]=h->l[b]; - entry[R1]=h->l[a+1]+c; - entry[R2]=h->l[b+1]+c; - entry[WE]=100; - add_entry2list (entry,CL); - } - } - } - } - n++; - } - - return CL; -} -SeqHasch *cleanhasch (SeqHasch *H) -{ - int n=1; - SeqHasch *N; - N=vcalloc (2, sizeof (SeqHasch)); - N[0]=H[0]; - - while (H[n]) - { - (H[n])->n=0; - vfree ((H[n])->l); - (H[n])->l=NULL; - n++; - } - vfree (H); - return N; -} -int hasch2sim (SeqHasch *H, int nseq) -{ - int n=1; - - int a,cs, ps, ns; - int id=0, tot=0; - - while (H[n]) - { - for (ps=-1,ns=0,a=0; a<(H[n])->n; a+=2) - { - //HERE ("%d--[%d %d]",n, (H[n])->l[a], (H[n])->l[a+1]); - cs=(H[n])->l[a]; - if (cs!=ps)ns++; - ps=cs; - } - n++; - if (ns==nseq)id++; - tot++; - } - - return (id*MAXID)/tot; -} -SeqHasch * seq2hasch (int i,char *seq, int ktup, SeqHasch *H) -{ - int a,b,l, n=0; - SeqHasch h; - - - if (!H) - { - H=vcalloc (2, sizeof (SeqHasch)); - H[0]=vcalloc (1, sizeof (hseq)); - n=1; - } - else - { - n=0; - while (H[++n]); - } - - l=strlen (seq); - for (a=0; ahl[r]) h->hl[r]=vcalloc (1, sizeof (hseq)); - h=h->hl[r]; - } - if (!h->l) - { - - h->n=2; - h->l=vcalloc (2, sizeof (int)); - H=vrealloc (H,(n+2)*sizeof (SeqHasch)); - H[n]=h; - n++; - } - else - { - h->n+=2; - h->l=vrealloc (h->l, (h->n)*sizeof (int)); - } - - h->l[h->n-2]=i; - h->l[h->n-1]=a; - } - return H; -} - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_dp_fasta_sw.c b/binaries/src/tcoffee/t_coffee_source/util_dp_fasta_sw.c deleted file mode 100644 index bf8507e..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_dp_fasta_sw.c +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include -#include -#include - - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" - -#include "dp_lib_header.h" - -int cfasta_gotoh_pair_wise_sw (Alignment *A,int*ns, int **l_s,Constraint_list *CL) - { - fprintf ( stderr, "\ncfasta_gotoh_pair_wise_sw not implemented yet\n"); - myexit (EXIT_FAILURE); - return 0; - - } - -int fasta_gotoh_pair_wise_sw (Alignment *A,int*ns, int **l_s,Constraint_list *CL) - { - fprintf ( stderr, "\nfasta_gotoh_pair_wise_sw not implemented yet\n"); - myexit (EXIT_FAILURE); - return 0; - } - -int make_fasta_gotoh_pair_wise_sw (Alignment *A,int*ns, int **l_s,Constraint_list *CL, int *diag) - { - fprintf ( stderr, "\nmake_fasta_gotoh_pair_wise_sw not implemented yet\n"); - myexit (EXIT_FAILURE); - return 0; - } - - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_dp_generic_fasta_nw.c b/binaries/src/tcoffee/t_coffee_source/util_dp_generic_fasta_nw.c deleted file mode 100644 index 06781d9..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_dp_generic_fasta_nw.c +++ /dev/null @@ -1,378 +0,0 @@ -#include -#include -#include -#include -#include - - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" - - - -/*********************************************************************************/ -/* */ -/* */ -/* Generic DP */ -/* */ -/* */ -/*********************************************************************************/ - - -Dp_Result * make_fast_generic_dp_pair_wise (Alignment *A, int*ns, int **l_s,Dp_Model *M) - { - - /*SIZE VARIABLES*/ - - int ndiag; - int l0, l1, len_al,len_diag; - static int max_len_al, max_len_diag; - static int mI, mJ; - /*Evaluation*/ - int **pos0; - - - - /*DP VARIABLES*/ - static int *Mat, *LMat, *trace; - int a, i, j,l; - int state, cur_state, prev_state; - int pos_i=0, pos_j=0; - int last_i=0, last_j=0; - int prev_i, prev_j; - int len_i, len_j, len; - int t, e, em; - - int prev_score; - int pc, best_pc; - - int *prev; - int model_index; - /*TRACEBACK*/ - Dp_Result *DPR; - int k=0, next_k; - int new_i, new_j; - - - /*Cleqanning CALL*/ - if ( A==NULL) - { - max_len_al=0; max_len_diag=0;mI=0;mJ=0; - vfree (Mat); vfree(LMat);vfree(trace); - Mat=trace=LMat=NULL; - return NULL; - } - - ndiag=M->diag[0]; - - l0=strlen (A->seq_al[l_s[0][0]]); - l1=strlen (A->seq_al[l_s[1][0]]); - len_al =l0+l1+1; - len_diag=ndiag+4; - - - - if ( (len_al>max_len_al || len_diag>max_len_diag)) - { - - vfree (Mat); - vfree (LMat); - vfree(trace); - max_len_diag=max_len_al=0; - } - - if (max_len_al==0) - { - max_len_al=len_al; - max_len_diag=len_diag; - mI=max_len_al*max_len_diag; - mJ=max_len_diag; - - - Mat =vcalloc ( M->nstate*max_len_al*max_len_diag, sizeof (int)); - LMat =vcalloc ( M->nstate*max_len_al*max_len_diag, sizeof (int)); - trace=vcalloc ( M->nstate*max_len_al*max_len_diag, sizeof (int)); - - } - - prev=vcalloc ( M->nstate, sizeof (int)); - DPR=vcalloc ( 1, sizeof ( Dp_Result)); - DPR->traceback=vcalloc (max_len_al, sizeof (int)); - -/*PREPARE THE EVALUATION*/ - - - pos0=aln2pos_simple ( A,-1, ns, l_s); - -/*INITIALIZATION OF THE DP MATRICES*/ - - for (i=0; i<=l0;i++) - { - for (j=0; j<=ndiag+1;j++) - { - for ( state=0; statenstate; state++) - { - Mat [state*mI+i*mJ+j]=UNDEFINED; - LMat [state*mI+i*mJ+j]=UNDEFINED; - trace [state*mI+i*mJ+j]=M->START; - } - } - } - - M->diag[0]=1; - M->diag[ndiag+1]=M->diag[ndiag]; - - for (i=0; i<=l0; i++) - for ( j=0; j<=ndiag+1; j++) - { - pos_j=M->diag[j]-l0+i; - pos_i=i; - if (!(pos_j==0 || pos_i==0))continue; - if ( pos_j<0 || pos_i<0)continue; - if ( pos_i==0 && pos_j==0) - { - for ( a=0; a< M->nstate; a++) - { - Mat [a*mI+i*mJ+j]=0; - LMat [a*mI+i*mJ+j]=0; - trace[a*mI+i*mJ+j]=M->START; - } - } - else - { - l=MAX(pos_i,pos_j); - for ( state=0; stateSTART; state++) - { - if (pos_j==0 && M->model_properties[state][M->LEN_J])continue; - if (pos_i==0 && M->model_properties[state][M->LEN_I])continue; - - - t=M->model[M->START][state]; - e=((M->model_emission_function)[state][M->START_EMISSION])(A, pos0, ns[0], l_s[0], pos_i-1, pos0, ns[1], l_s[1],pos_j-1,M->CL); - /*e=((M->get_dp_cost_list)[M->model_properties[state][M->START_EMISSION]])(A, pos0, ns[0], l_s[0], pos_i-1, pos0, ns[1], l_s[1],pos_j-1,M->CL);*/ - - Mat [state*mI+i*mJ+j]=t+e*l; - LMat [state*mI+i*mJ+j]=l; - trace [state*mI+i*mJ+j]=M->START; - } - } - } - -/*DYNAMIC PROGRAMMING: Forward Pass*/ - - /*Diagonals: - M->diag[0]=Number of diagonals being considered - M->diag[1]=First diagonal being considered - Diagonals are numbered 1...L0+l1-1 - 1 is the bottom-left diag - */ - - for (i=1; i<=l0;i++) - { - for (j=1; j<=ndiag;j++) - { - pos_j=M->diag[j]-l0+i; - pos_i=i; - - if (pos_j<=0 || pos_j>l1 )continue; - last_i=i; - last_j=j; - - for (cur_state=0; cur_stateSTART; cur_state++) - { - if (M->model_properties[cur_state][M->DELTA_J]) - { - prev_j=j+M->model_properties[cur_state][M->DELTA_J]; - prev_i=i+M->model_properties[cur_state][M->DELTA_I]*FABS((M->diag[j]-M->diag[prev_j])); - - } - else - { - prev_j=j; - prev_i=i+M->model_properties[cur_state][M->DELTA_I]; - } - - - len_i=FABS((i-prev_i)); - len_j=FABS((M->diag[prev_j]-M->diag[j])); - len=MAX(len_i, len_j); - - em=((M->model_emission_function[cur_state][M->EMISSION]))(A, pos0, ns[0], l_s[0], pos_i-1, pos0, ns[1], l_s[1],pos_j-1,M->CL); - /*em=((M->get_dp_cost_list)[M->model_properties[cur_state][M->EMISSION]])(A, pos0, ns[0], l_s[0], pos_i-1, pos0, ns[1], l_s[1],pos_j-1,M->CL);*/ - - for (pc=best_pc=UNDEFINED, model_index=1; model_index<=M->bounded_model[cur_state][0]; model_index++) - { - prev_state=M->bounded_model[cur_state][model_index]; - - if(prev_i<0 || prev_j<0 ||prev_i>l0 || prev_j>ndiag || len==UNDEFINED)prev_score=UNDEFINED; - else prev_score=Mat[prev_state*mI+prev_i*mJ+prev_j]; - t=M->model[prev_state][cur_state]; - e=em; - - if (prev_score==UNDEFINED || len==UNDEFINED)e=UNDEFINED; - else if (len==0|| e==UNDEFINED)e=UNDEFINED; - else e=e*len; - - if (is_defined_int(3,prev_score,e, t)) - { - pc=prev_score+t+e; - } - else pc=UNDEFINED; - - /*Identify the best previous score*/ - if (best_pc==UNDEFINED || (pc>best_pc && pc!=UNDEFINED)) - { - prev[cur_state]=prev_state; - best_pc=pc; - - } - } - - Mat[cur_state*mI+i*mJ+j]=best_pc; - - - - if ( Mat[cur_state*mI+i*mJ+j]==UNDEFINED) - { - LMat[cur_state*mI+i*mJ+j]=UNDEFINED; - trace[cur_state*mI+i*mJ+j]=UNDEFINED; - continue; - } - - else if ( prev[cur_state]==cur_state) - { - LMat [cur_state*mI+i*mJ+j]= LMat [cur_state*mI+prev_i*mJ+prev_j]+len; - trace[cur_state*mI+i*mJ+j]= trace[cur_state*mI+prev_i*mJ+prev_j]; - } - else - { - LMat[cur_state*mI+i*mJ+j]=len; - trace[cur_state*mI+i*mJ+j]=prev[cur_state]; - } - } - } - } - - - i=last_i; - j=last_j; - for (pc=best_pc=UNDEFINED, state=0; stateSTART; state++) - { - t=M->model[state][M->END]; - e=( M->model_emission_function[state][M->TERM_EMISSION])(A, pos0, ns[0], l_s[0], pos_i-1, pos0, ns[1], l_s[1],pos_j-1,M->CL); - - /*e=((M->get_dp_cost_list)[M->model_properties[state][M->TERM_EMISSION]])(A, pos0, ns[0], l_s[0], pos_i-1, pos0, ns[1], l_s[1],pos_j-1,M->CL);*/ - - l=LMat[state*mI+i*mJ+j]; - - - if (!is_defined_int(4,t,e,Mat[state*mI+i*mJ+j],l))Mat[state*mI+i*mJ+j]=UNDEFINED; - else Mat[state*mI+i*mJ+j]+=t+e*(l); - pc=Mat[state*mI+i*mJ+j]; - - - if (best_pc==UNDEFINED || (pc>best_pc && pc!=UNDEFINED)) - { - k=state; - best_pc=pc; - } - } - DPR->score=best_pc; - -/*TRACEBACK*/ - - - e=0; - len=0; - - - while (k!=M->START) - { - next_k=trace[k*mI+i*mJ+j]; - - new_i=i; - new_j=j; - l=LMat[k*mI+i*mJ+j]; - for (a=0; a< l; a++) - { - DPR->traceback[len++]=k; - } - new_i+=M->model_properties[k][M->DELTA_I]*l; - - - if ( M->model_properties[k][M->DELTA_J]) - { - while ( next_k!=M->START && FABS((M->diag[j]-M->diag[new_j]))!=l)new_j+=M->model_properties[k][M->DELTA_J]; - } - - i=new_i; - j=new_j; - k=next_k; - } - DPR->len=len; - DPR->traceback[DPR->len++]=M->START; - invert_list_int (DPR->traceback,DPR->len); - DPR->traceback[DPR->len]=M->END; - - vfree (prev); - free_int (pos0, -1); - return DPR; - - - } - - -Constraint_list* free_dp_model (Dp_Model *D) - { - Constraint_list *CL; - - if ( !D)return NULL; - CL=D->CL; - vfree (D->diag); - free_int (D->model, -1); - free_int (D->model_properties, -1); - free_int (D->bounded_model, -1); - - vfree (D); - return CL; - } - -Dp_Result * free_dp_result (Dp_Result *D ) - { - if (!D) return NULL; - vfree ( D->traceback); - vfree (D); - return NULL; - } - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_dp_gotoh_nw.c b/binaries/src/tcoffee/t_coffee_source/util_dp_gotoh_nw.c deleted file mode 100644 index e56a574..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_dp_gotoh_nw.c +++ /dev/null @@ -1,3059 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" - - -int cl2pair_list_ecl ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in); - - - -/*******************************************************************************/ -/* idscore_pairseq: measure the % id without delivering thze aln*/ -/* */ -/* makes DP between the the ns[0] sequences and the ns[1] */ -/* */ -/* for MODE, see the function get_dp_cost */ -/*******************************************************************************/ -int idscore_pairseq (char *s1, char *s2, int gop, int gep, int **m, char *comp_mode) -{ - int **I, **D, **M, *P; - int i, j,l1, l2, l,score, id, igop,match; - - - l1=strlen (s1); l2=strlen (s2); - lower_string (s1); lower_string (s2); - - I=declare_int (6,l2+1);D=declare_int (6,l2+1);M=declare_int (6,l2+1); - for (j=0; j<=l2; j++) - { - D[0][j]=gep*j;M[0][j]=2*gep*j;D[4][j]=0; - } - - for (i=1; i<=l1; i++) - { - - I[1][0]=i*gep; - M[1][0]=2*i*gep; - - for (j=1; j<=l2; j++) - { - score=m[s1[i-1]-'a'][s2[j-1]-'a']; - id=(s1[i-1]==s2[j-1])?1:0; - - igop=(i==l1 || j==l2)?0:gop; - - if ((D[0][j]+gep)>(M[0][j]+igop+gep)) {D[1][j]=D[0][j]+gep; D[3][j]=D[2][j]; D[5][j]=D[4][j];} - else {D[1][j]=M[0][j]+igop+gep; D[3][j]=M[2][j]; D[5][j]=M[4][j];} - - if ( (I[1][j-1]+gep)>(M[1][j-1]+igop+gep)){I[1][j]=I[1][j-1]+gep; I[3][j]=I[3][j-1]; I[5][j]=I[5][j-1];} - else {I[1][j]=M[1][j-1]+igop+gep; I[3][j]=M[3][j-1]; I[5][j]=M[5][j-1];} - - match=M[0][j-1]+score; - if (I[1][j]>match && I[1][j]>D[1][j]) {M[1][j]=I[1][j] ; M[3][j]=I[3][j]; M[5][j]=I[5][j];} - else if (D[1][j]>match) {M[1][j]=D[1][j] ; M[3][j]=D[3][j]; M[5][j]=D[5][j];} - else {M[1][j]=match ; M[3][j]=M[2][j-1]+id; M[5][j]=M[4][j-1]+1;} - } - P=I[0]; I[0]=I[1]; I[1]=P; - P=I[2]; I[2]=I[3]; I[3]=P; - P=I[4]; I[4]=I[5]; I[5]=P; - - P=D[0]; D[0]=D[1]; D[1]=P; - P=D[2]; D[2]=D[3]; D[3]=P; - P=D[4]; D[4]=D[5]; D[5]=P; - - P=M[0]; M[0]=M[1]; M[1]=P; - P=M[2]; M[2]=M[3]; M[3]=P; - P=M[4]; M[4]=M[5]; M[5]=P; - } - - - - - if ( strstr (comp_mode, "sim2")) - { - l=MIN(l1,l2); - score=(l==0)?0:(M[2][l2]*100)/l; - } - else if ( strstr (comp_mode, "sim3")) - { - l=MAX(l1,l2); - score=(l==0)?0:(M[2][l2]*100)/l; - } - else if ( strstr (comp_mode, "cov")) - { - l=MAX(l1,l2); - score=(l==0)?0:((M[4][l2]*100)/l); - } - else - { - //default: simple sim - l=M[4][l2]; - score=(l==0)?0:(M[2][l2]*100)/l; - } - - free_int (I, -1); - free_int (D, -1); - free_int (M, -1); - - return score; -} - -int test_pair_wise (Alignment *A, int *ns, int **l_s, Constraint_list *CL) -{ - int a,l0, l1, n; - char buf[VERY_LONG_STRING]; - char *gap, *seq; - - l0=strlen (A->seq_al[l_s[0][0]]); - l1=strlen (A->seq_al[l_s[1][0]]); - - n=(l0<5)?l0/2:5; - gap=generate_null(l1-n); - for (a=0;aseq_al[l_s[0][a]]; - sprintf (buf, "%s%s",seq, gap); - sprintf (seq, "%s", buf); - } - vfree (gap); - gap=generate_null(l0-n); - - for (a=0;aseq_al[l_s[1][a]]; - sprintf (buf, "%s%s",seq, gap); - sprintf (seq, "%s", buf); - } - vfree(gap); - - - A->len_aln=strlen (A->seq_al[l_s[0][0]]); - A->score=A->score_aln=100; - return 100; -} - -int idscore_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL) -{ - - A->score_aln=A->score=idscore_pairseq (A->seq_al[l_s[0][0]], A->seq_al[l_s[1][0]], CL->gop, CL->gep,CL->M, "sim3"); - return A->score_aln; -} -int dp_max (int *trace, int n, ...); -int dp_max (int *trace, int n, ...) -{ - va_list ap; - int a, v, t, best_v=0; - - va_start (ap, n); - for (a=0; a< n; a++) - { - t=va_arg (ap, int); - v=va_arg (ap, int); - - if (a==0) - { - best_v=v; - trace[0]=t; - } - else - { - if (v>best_v) - { - best_v=v; - trace[0]=t; - } - } - } - - return best_v; -} -int is_tied (int *trace, int n, ...); -int is_tied(int *trace, int n, ...) -{ - va_list ap; - int a, v, t, best_v=0; - int nties=0; - - va_start (ap, n); - for (a=0; a< n; a++) - { - t=va_arg (ap, int); - v=va_arg (ap, int); - - if (a==0) - { - best_v=v; - trace[0]=t; - } - else - { - if (v>best_v) - { - best_v=v; - trace[0]=t; - } - } - } - va_end(ap); - va_start (ap,n); - for (a=0; aseq_al[l_s[0][0]]); - l2=strlen (A->seq_al[l_s[1][0]]); - - n=1; - M1=n++;D1=n++;I1=n++;M2=n++; - t=declare_arrayN(3, sizeof (int),n, l1+1, l2+1); - m=declare_arrayN(3, sizeof (int),n, l1+1, l2+1); - - - gop=CL->gop*SCORE_K; - gep=CL->gep*SCORE_K; - tgop=gop; - unmatch=gep; - - pos0=aln2pos_simple ( A,-1, ns, l_s); - - - for (j=1; j<=l2; j++) - { - m[D1][0][j]=gep*j; - - m[M1][0][j]=2*gep*j; - m[M2][0][j]=4*gep*j; - } - - - for (i=1; i<=l1; i++) - { - m[I1][i][0]=i*gep; - m[M2][i][0]=4*i*gep; - m[M1][i][0]=2*i*gep; - - for ( j=1; j<=l2; j++) - { - rgop=(i==l1 || j==1)?0:gop; - rgop=gop; - sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL); - m[M1][i][j]=dp_max (&trace,4,M1,m[M1][i-1][j-1],I1, m[I1][i-1][j-1],D1,m[D1][i-1][j-1],M2,m[M2][i-1][j-1])+sub; - t[M1][i][j]=trace; - - m[D1][i][j]=dp_max (&trace,3, M1,m[M1][i][j-1]+rgop,D1, m[D1][i][j-1]+gep, M2, m[M2][i][j-1]); - t[D1][i][j]=trace; - - m[I1][i][j]=dp_max (&trace,3, M1,m[M1][i-1][j]+rgop, I1, m[I1][i-1][j]+gep, M2, m[M2][i-1][j]); - t[I1][i][j]=trace; - - m[M2][i][j]=dp_max (&trace,4,M1,m[M1][i-1][j-1]+tgop,I1, m[I1][i-1][j-1]+tgop,D1,m[D1][i-1][j-1]+tgop,M2,m[M2][i-1][j-1])+unmatch; - t[M2][i][j]=trace; - - } - - } - score=dp_max (&trace,4, M1,m[M1][l1][l2],D1,m[D1][l1][l2],I1, m[I1][l1][l2],M2,m[M2][l1][l2]); - LEN=0;i=l1;j=l2; - al=declare_char (2, l1+l2+1); - - - trace=t[trace][i][j]; - while (!(i==0 &&j==0)) - { - - ntrace=t[trace][i][j]; - if (i==0) - { - al[0][LEN]=0; - al[1][LEN]=1; - j--; - LEN++; - } - else if ( j==0) - { - al[0][LEN]=1; - al[1][LEN]=0; - i--; - LEN++; - } - else if ( trace==M1) - { - al[0][LEN]=1; - al[1][LEN]=1; - i--; j--; - LEN++; - } - else if ( trace==M2) - { - al[0][LEN]=1; - al[1][LEN]=0; - LEN++; - - al[0][LEN]=0; - al[1][LEN]=1; - LEN++; - - i--; j--; - - } - else if ( trace==D1) - { - al[0][LEN]=0; - al[1][LEN]=1; - j--; - LEN++; - } - else if ( trace == I1) - { - al[0][LEN]=1; - al[1][LEN]=0; - i--; - LEN++; - } - trace=ntrace; - - } - - invert_list_char ( al[0], LEN); - invert_list_char ( al[1], LEN); - if ( A->declared_len<=LEN)A=realloc_aln2 ( A,A->max_n_seq, 2*LEN); - - aln=A->seq_al; - char_buf= vcalloc (LEN+1, sizeof (char)); - for ( c=0; c< 2; c++) - { - for ( a=0; a< ns[c]; a++) - { - int ch=0; - for ( b=0; b< LEN; b++) - { - if (al[c][b]==1) - char_buf[b]=aln[l_s[c][a]][ch++]; - else - char_buf[b]='-'; - } - char_buf[b]='\0'; - sprintf (aln[l_s[c][a]],"%s", char_buf); - } - } - - - A->len_aln=LEN; - A->nseq=ns[0]+ns[1]; - free_arrayN((void *)m, 3); - free_arrayN((void *)t, 3); - vfree (char_buf); - free_char (al, -1); - return score; -} - -int ** aln2local_penalties (Alignment *A, int n, int *ls, Constraint_list *CL, int **lg); -int ** aln2local_penalties (Alignment *A, int n, int *ls, Constraint_list *CL, int **lg) -{ - //adapted from gap_count in MAFFT V 5.5 - int p,s,l, c1, c2; - int gep,gop; - int open=3, close=4, gap=5; - - gop=CL->gop*SCORE_K; - gep=CL->gep*SCORE_K; - - l=strlen (A->seq_al[ls[0]]); - - if (!lg) - { - lg=declare_int (6, l); - } - - if ( read_array_size_new (lg[0])seq_al[ls[s]][p]; - - if (c1!='-' && c2=='-')lg[open][p]++; - if (c1=='-' && c2!='-')lg[close][p]++; - if ( c1=='-')lg[gap][p]++; - c1=c2; - } - } - - for (p=0; p gives low quality results - lg[GEP][p]=gep;//(1-((float)lg[gap][p]/(float)n))*gep; - lg[open][p]=lg[close][p]=lg[gap][p]=0; - - } - - return lg; -} -int free_gotoh_pair_wise_lgp() -{ - return gotoh_pair_wise_lgp (NULL, NULL, NULL, NULL); -} -int gotoh_pair_wise_lgp ( Alignment *A, int *ns, int **l_s, Constraint_list *CL) -{ - int i,j, li, lj, n, sub, trace,ntrace, a, b, c, score; - int I, J; - int M1, I1, D1, LEN; - char **al, *char_buf, **aln; - int **pos0, **pos; - Alignment *Aln; - - int gop[2], gcp[2], gep[2]; - static int ***gpl, ***t, ***m; - static int max_li, max_lj; - - - - //gotoh_pair_wise ( A, ns, l_s,CL); - //ungap_sub_aln (A, ns[0], l_s[0]); - //ungap_sub_aln (A, ns[1], l_s[1]); - - if (!A) - { - free_arrayN((void**)gpl, 3); - free_arrayN((void**)t, 3); - free_arrayN((void**)m, 3); - max_li=max_lj=0; - return 0; - } - - I=0;J=1; - - - li=strlen (A->seq_al[l_s[I][0]]); - lj=strlen (A->seq_al[l_s[J][0]]); - - if ( !gpl)gpl=vcalloc ( 2, sizeof (int**)); - gpl[I]=aln2local_penalties (A,ns[I], l_s[I], CL,gpl[I]); - gpl[J]=aln2local_penalties (A,ns[J], l_s[J], CL,gpl[J]); - - - n=1; - M1=n++;D1=n++;I1=n++; - - if ( li>max_li ||lj>max_lj ) - { - free_arrayN((void**)t, 3); - free_arrayN((void**)m, 3); - - - max_li=li; - max_lj=lj; - t=declare_arrayN(3, sizeof (int),n, max_li+1, max_lj+1); - m=declare_arrayN(3, sizeof (int),n, max_li+1, max_lj+1); - - } - pos0=aln2pos_simple ( A,-1, ns, l_s); - - //Compatibility with Macro - Aln=A; - pos=pos0; - - for (j=1; j<=lj; j++) - { - gep[J]=gpl[J][GEP][j-1]; - m[D1][0][j]=gep[J]*j; - m[I1][0][j]=m[D1][0][j]-1; - m[M1][0][j]=m[D1][0][j]-1; - } - - //D1: gap in sequence I - //I1: gap in sequence J - - - for (i=1; i<=li; i++) - { - gep[I]=gpl[I][GEP][i-1]; - gop[I]=gpl[I][GOP][i-1]; - gcp[I]=gpl[I][GCP][i-1]; - - m[I1][i][0]=i*gep[I]; - m[D1][i][0]= m[I1][i][0]-1; - m[M1][i][0]= m[I1][i][0]-1; - - - - gop[I]=(i==1 || i==li )?0:gop[I]; - gcp[I]=(i==1 || i==li )?0:gcp[I]; - - - for ( j=1; j<=lj; j++) - { - - gep[J]=gpl[J][GEP][j-1]; - gop[J]=gpl[J][GOP][j-1]; - gcp[J]=gpl[J][GCP][j-1]; - - //gep[J]=gep[I]=(gep[J]+gep[I])/2; - //gop[J]=gop[I]=(gop[J]+gop[I])/2; - //gcp[J]=gcp[I]=(gcp[J]+gcp[I])/2; - - - gop[J]=(j==1 || j==lj )?0:gop[J]; - gcp[J]=(j==1 || j==lj )?0:gcp[J]; - - - //sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL); - sub=TC_SCORE((i-1), (j-1)); - - m[M1][i][j]=dp_max (&trace,3,M1,m[M1][i-1][j-1],I1, m[I1][i-1][j-1]+gcp[I],D1,m[D1][i-1][j-1]+gcp[J])+sub; - t[M1][i][j]=trace; - - - m[D1][i][j]=dp_max (&trace,2, M1,m[M1][i][j-1]+gop[J]+gep[J],D1, m[D1][i][j-1]+gep[J]); - t[D1][i][j]=trace; - - - m[I1][i][j]=dp_max (&trace,2, M1,m[M1][i-1][j]+gop[I]+gep[I],I1, m[I1][i-1][j]+gep[I]); - t[I1][i][j]=trace; - - } - - } - score=dp_max (&trace,3, M1,m[M1][li][lj],D1,m[D1][li][lj],I1, m[I1][li][lj]); - - LEN=0;i=li;j=lj; - al=declare_char (2, li+lj); - - - trace=t[trace][i][j]; - while (!(i==0 &&j==0)) - { - - ntrace=t[trace][i][j]; - - - if (i==0) - { - al[0][LEN]=0; - al[1][LEN]=1; - j--; - LEN++; - } - else if ( j==0) - { - al[0][LEN]=1; - al[1][LEN]=0; - i--; - LEN++; - } - else if ( trace==M1) - { - al[0][LEN]=1; - al[1][LEN]=1; - i--; j--; - LEN++; - } - else if ( trace==D1) - { - al[0][LEN]=0; - al[1][LEN]=1; - j--; - LEN++; - } - else if ( trace == I1) - { - al[0][LEN]=1; - al[1][LEN]=0; - i--; - LEN++; - } - trace=ntrace; - - } - - invert_list_char ( al[0], LEN); - invert_list_char ( al[1], LEN); - if ( A->declared_len<=LEN)A=realloc_aln ( A,2*LEN+1); - - aln=A->seq_al; - char_buf= vcalloc (LEN+1, sizeof (char)); - for ( c=0; c< 2; c++) - { - for ( a=0; a< ns[c]; a++) - { - int ch=0; - for ( b=0; b< LEN; b++) - { - if (al[c][b]==1) - char_buf[b]=aln[l_s[c][a]][ch++]; - else - char_buf[b]='-'; - } - char_buf[b]='\0'; - sprintf (aln[l_s[c][a]],"%s", char_buf); - } - } - - - A->len_aln=LEN; - A->nseq=ns[0]+ns[1]; - vfree (char_buf); - free_char (al, -1); - free_int (pos0, -1); - return score; -} -/*******************************************************************************/ -/* GLOCAL 2 */ -/* */ -/* makes DP between the the ns[0] sequences and the ns[1] */ -/* */ -/* for MODE, see the function get_dp_cost */ -/*******************************************************************************/ -int glocal2_pair_wise (Alignment *IN,int*ns, int **ls,Constraint_list *CL) -{ - int a, b, s=0; - Alignment *A, *R,*L; - char *seq, *buf; - - buf=vcalloc (1000, sizeof (char)); - seq=vcalloc (1000, sizeof (char)); - - A=copy_aln (IN,NULL); - L=copy_aln (IN,NULL); - R=copy_aln (IN,NULL); - - gotoh_pair_wise_sw (A, ns, ls, CL); - - HERE ("1"); - for (a=0; a<2; a++) - { - for (b=0; bseq_al[s]); - - seq[A->order[s][2]]='\0'; - sprintf (L->seq_al[s], "%s", seq); - sprintf (R->seq_al[s], "%s", seq+A->order[s][3]+1); - } - } - HERE ("2"); - print_sub_aln (A, ns, ls); - gotoh_pair_wise(L, ns, ls, CL); - print_sub_aln (L, ns, ls); - gotoh_pair_wise(R, ns, ls, CL); - print_sub_aln (R, ns, ls); - - IN=realloc_aln (IN, A->len_aln+L->len_aln+R->len_aln+1); - for (a=0; a<2; a++) - { - for (b=0; bseq_al[s], "%s%s%s",L->seq_al[s], A->seq_al[s], R->seq_al[s]); - } - } - IN->len_aln=strlen (IN->seq_al[s]); - - print_sub_aln (IN, ns, ls); - vfree (seq); vfree (buf); - free_aln (A); free_aln (L);free_aln (R); - return IN->score_aln; -} - - -int gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL) - { -/*******************************************************************************/ -/* NEEDLEMAN AND WUNSCH (GOTOH) */ -/* */ -/* makes DP between the the ns[0] sequences and the ns[1] */ -/* */ -/* for MODE, see the function get_dp_cost */ -/*******************************************************************************/ - - -/*TREATMENT OF THE TERMINAL GAP PENALTIES*/ -/*TG_MODE=0---> gop and gep*/ -/*TG_MODE=1---> --- gep*/ -/*TG_MODE=2---> --- ---*/ - - - int TG_MODE; - int l_gop, l_gep; - int gop, gep; - int maximise; -/*VARIANLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/ - int a, b, i, j; - - int *cc; - int *dd,*ddg; - int e, eg; - - int lenal[2], len; - int t, c=0,s, ch; - int sub; - int fop; - int score=0; - int **pos0; - static char **al; - char **aln; - int ala, alb,LEN; - char *buffer; - char *char_buf; -/*trace back variables */ - FILE *long_trace=NULL; - TRACE_TYPE *buf_trace=NULL; - static TRACE_TYPE **trace; - TRACE_TYPE k; - TRACE_TYPE *tr; - int long_trace_flag=0; - int dim; -/********Prepare penalties*******/ - gop=CL->gop*SCORE_K; - gep=CL->gep*SCORE_K; - TG_MODE=CL->TG_MODE; - maximise=CL->maximise; - - -/********************************/ -/*CLEAN UP AFTER USE*/ - if ( A==NULL) - { - free_int (trace,-1); - trace=NULL; - free_char (al,-1); - al=NULL; - return 0; - } - -/*DO MEMORY ALLOCATION FOR DP*/ - - lenal[0]=strlen (A->seq_al[l_s[0][0]]); - lenal[1]=strlen (A->seq_al[l_s[1][0]]); - len= MAX(lenal[0],lenal[1])+1; - buf_trace=vcalloc ( len, sizeof (TRACE_TYPE)); - buffer=vcalloc ( 2*len, sizeof (char)); - al=declare_char (2, 2*len); - - char_buf= vcalloc (2*len, sizeof (char)); - - - dd = vcalloc (len, sizeof (int)); - - - cc = vcalloc (len, sizeof (int)); - ddg=vcalloc (len, sizeof (int)); - - - - if ( len>=MAX_LEN_FOR_DP) - { - long_trace_flag=1; - long_trace=vtmpfile(); - } - else - { - - dim=(trace==NULL)?0:read_size_int ( trace,sizeof (int*)); - trace =realloc_int ( trace,dim,dim,MAX(0,len-dim), MAX(0,len-dim)); - } - -/*END OF MEMORY ALLOCATION*/ - - - /* - 0(s) +(dd) - \ | - \ | - \ | - \ | - \ | - \ | - \| - -(e)----O - */ - - pos0=aln2pos_simple ( A,-1, ns, l_s); - - - cc[0]=0; - tr=(long_trace_flag)?buf_trace:trace[0]; - tr[0]=(TRACE_TYPE)1; - for ( j=1; j<=lenal[1]; j++)tr[j]=(TRACE_TYPE)-1; - if (long_trace_flag)fwrite (buf_trace, sizeof ( TRACE_TYPE),lenal[1]+1, long_trace); - - - t=(TG_MODE==0)?gop:0; - - - for (cc[0]=0,j=1; j<=lenal[1]; j++) - { - - l_gop=(TG_MODE==0)?gop:0; - l_gep=(TG_MODE==2)?0:gep; - - cc[j]=t=t+l_gep; - dd[j]= t+ gop; - } - - t=(TG_MODE==0)?gop:0; - - for (i=1; i<=lenal[0];i++) - { - tr=(long_trace_flag)?buf_trace:trace[i]; - s=cc[0]; - - l_gop=(TG_MODE==0)?gop:0; - l_gep=(TG_MODE==2)?0:gep; - - - - cc[0]=c=t=t+l_gep; - e=t+ gop; - tr[0]=(TRACE_TYPE)1; - - - - for (eg=0,j=1; j<=lenal[1];j++) - { - - sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL); - - /*get the best Insertion*/ - l_gop=(i==lenal[0] || i==1 )?((TG_MODE==0)?gop:0):gop; - l_gep=(i==lenal[0] || i==1)?((TG_MODE==2)?0:gep):gep; - - - if ( a_better_than_b ( e,c+l_gop, maximise))eg++; - else eg=1; - e=best_of_a_b (e, c+l_gop, maximise)+l_gep; - - /*Get the best deletion*/ - l_gop=(j==lenal[1] || j==1)?((TG_MODE==0)?gop:0):gop; - l_gep=(j==lenal[1] || j==1)?((TG_MODE==2)?0:gep):gep; - - - if ( a_better_than_b ( dd[j], cc[j]+l_gop, maximise))ddg[j]++; - else ddg[j]=1; - dd[j]=best_of_a_b( dd[j], cc[j]+l_gop,maximise)+l_gep; - - - - c=best_int(3,maximise,&fop, e, s+sub,dd[j]); - /*Chose Substitution for tie breaking*/ - if ( fop==0 && (s+sub)==e)fop=1; - else if ( fop==2 && (s+sub)==dd[j])fop=1; - /*Chose Deletion for tie breaking*/ - else if ( fop==2 && e==dd[j])fop=1; - - fop-=1; - s=cc[j]; - cc[j]=c; - - - if ( fop<0) - {tr[j]=(TRACE_TYPE)fop*eg; - } - else if ( fop>0) - {tr[j]=(TRACE_TYPE)fop*ddg[j]; - } - else if (fop==0) - {tr[j]=(TRACE_TYPE)0; - } - fop= -2; - } - if (long_trace_flag) - { - fwrite ( buf_trace, sizeof (TRACE_TYPE), lenal[1]+1, long_trace); - } - } - - score=c; - - i=lenal[0]; - j=lenal[1]; - ala=alb=0; - - - while (i>=0 && j>=0 && ((i+j)!=0)) - { - if ( i==0) - k=-1; - else if ( j==0) - k=1; - else if ( j==0 && i==0) - k=1; - else - { - if (long_trace_flag) - { - fseek ( long_trace, sizeof (TRACE_TYPE)*((lenal[1]+1)*(i)+j),SEEK_SET); - fread ( &k, sizeof (TRACE_TYPE), 1, long_trace); - } - else - { - - k=trace[i][j]; - } - } - - - if (k==0) - { - - al[0][ala++]=1; - al[1][alb++]=1; - i--; - j--; - } - else if (k>0) - { - - for ( a=0; a< k; a++) - { - al[0][ala++]=1; - al[1][alb++]=0; - i--; - } - } - else if (k<0) - { - - for ( a=0; a>k; a--) - { - al[0][ala++]=0; - al[1][alb++]=1; - j--; - } - } - } - - LEN=ala; - c=LEN-1; - - - - invert_list_char ( al[0], LEN); - invert_list_char ( al[1], LEN); - if ( A->declared_len<=LEN)A=realloc_aln2 ( A,A->max_n_seq, 2*LEN); - aln=A->seq_al; - - for ( c=0; c< 2; c++) - { - for ( a=0; a< ns[c]; a++) - { - ch=0; - for ( b=0; b< LEN; b++) - { - if (al[c][b]==1) - char_buf[b]=aln[l_s[c][a]][ch++]; - else - char_buf[b]='-'; - } - char_buf[b]='\0'; - sprintf (aln[l_s[c][a]],"%s", char_buf); - } - } - - - A->len_aln=LEN; - A->nseq=ns[0]+ns[1]; - - - vfree ( cc); - vfree (dd); - vfree (ddg); - vfree (buffer); - vfree (char_buf); - vfree (buf_trace); - free_char ( al, -1); - free_int (pos0, -1); - if ( long_trace_flag)fclose (long_trace); - - - - return score; - } - - -int get_transition_cost (Alignment *A, int **posi, int ni, int *li, int i, int **posj, int nj, int *lj, int j,Constraint_list *CL); -int gotoh_pair_wise_lgp_sticky ( Alignment *A, int *ns, int **l_s, Constraint_list *CL) -{ - int i,j, li, lj, n, sub, trace,ntrace, a, b, c, score; - int I, J; - int M1, I1, D1, LEN; - char **al, *char_buf, **aln; - int **pos0; - - int gop[2], gcp[2], gep[2]; - static int ***gpl, ***t, ***m; - static int max_li, max_lj; - - - - //gotoh_pair_wise ( A, ns, l_s,CL); - //ungap_sub_aln (A, ns[0], l_s[0]); - //ungap_sub_aln (A, ns[1], l_s[1]); - - I=0;J=1; - - - li=strlen (A->seq_al[l_s[I][0]]); - lj=strlen (A->seq_al[l_s[J][0]]); - - if ( !gpl)gpl=vcalloc ( 2, sizeof (int**)); - gpl[I]=aln2local_penalties (A,ns[I], l_s[I], CL,gpl[I]); - gpl[J]=aln2local_penalties (A,ns[J], l_s[J], CL,gpl[J]); - - - n=1; - M1=n++;D1=n++;I1=n++; - - if ( li>max_li ||lj>max_lj ) - { - free_arrayN((void**)t, 3); - free_arrayN((void**)m, 3); - - - max_li=li; - max_lj=lj; - t=declare_arrayN(3, sizeof (int),n, max_li+1, max_lj+1); - m=declare_arrayN(3, sizeof (int),n, max_li+1, max_lj+1); - - } - pos0=aln2pos_simple ( A,-1, ns, l_s); - - - for (j=1; j<=lj; j++) - { - gep[J]=gpl[J][GEP][j-1]; - m[D1][0][j]=gep[J]*j; - m[I1][0][j]=m[D1][0][j]-1; - m[M1][0][j]=m[D1][0][j]-1; - } - - //D1: gap in sequence I - //I1: gap in sequence J - - - for (i=1; i<=li; i++) - { - gep[I]=gpl[I][GEP][i-1]; - gop[I]=gpl[I][GOP][i-1]; - gcp[I]=gpl[I][GCP][i-1]; - - m[I1][i][0]=i*gep[I]; - m[D1][i][0]= m[I1][i][0]-1; - m[M1][i][0]= m[I1][i][0]-1; - - - - gop[I]=(i==1 || i==li )?0:gop[I]; - gcp[I]=(i==1 || i==li )?0:gcp[I]; - - - for ( j=1; j<=lj; j++) - { - int transition; - - gep[J]=gpl[J][GEP][j-1]; - gop[J]=gpl[J][GOP][j-1]; - gcp[J]=gpl[J][GCP][j-1]; - - //gep[J]=gep[I]=(gep[J]+gep[I])/2; - //gop[J]=gop[I]=(gop[J]+gop[I])/2; - //gcp[J]=gcp[I]=(gcp[J]+gcp[I])/2; - - - gop[J]=(j==1 || j==lj )?0:gop[J]; - gcp[J]=(j==1 || j==lj )?0:gcp[J]; - - - sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL); - transition=get_transition_cost (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL); - - m[M1][i][j]=dp_max (&trace,3,M1,m[M1][i-1][j-1]+transition,I1, m[I1][i-1][j-1]+gcp[I],D1,m[D1][i-1][j-1]+gcp[J])+sub; - t[M1][i][j]=trace; - - - m[D1][i][j]=dp_max (&trace,2, M1,m[M1][i][j-1]+gop[J]+gep[J],D1, m[D1][i][j-1]+gep[J]); - t[D1][i][j]=trace; - - - m[I1][i][j]=dp_max (&trace,2, M1,m[M1][i-1][j]+gop[I]+gep[I],I1, m[I1][i-1][j]+gep[I]); - t[I1][i][j]=trace; - - } - - } - score=dp_max (&trace,3, M1,m[M1][li][lj],D1,m[D1][li][lj],I1, m[I1][li][lj]); - - LEN=0;i=li;j=lj; - al=declare_char (2, li+lj); - - - trace=t[trace][i][j]; - while (!(i==0 &&j==0)) - { - - ntrace=t[trace][i][j]; - - - if (i==0) - { - al[0][LEN]=0; - al[1][LEN]=1; - j--; - LEN++; - } - else if ( j==0) - { - al[0][LEN]=1; - al[1][LEN]=0; - i--; - LEN++; - } - else if ( trace==M1) - { - al[0][LEN]=1; - al[1][LEN]=1; - i--; j--; - LEN++; - } - else if ( trace==D1) - { - al[0][LEN]=0; - al[1][LEN]=1; - j--; - LEN++; - } - else if ( trace == I1) - { - al[0][LEN]=1; - al[1][LEN]=0; - i--; - LEN++; - } - trace=ntrace; - - } - - invert_list_char ( al[0], LEN); - invert_list_char ( al[1], LEN); - if ( A->declared_len<=LEN)A=realloc_aln ( A,2*LEN+1); - - aln=A->seq_al; - char_buf= vcalloc (LEN+1, sizeof (char)); - for ( c=0; c< 2; c++) - { - for ( a=0; a< ns[c]; a++) - { - int ch=0; - for ( b=0; b< LEN; b++) - { - if (al[c][b]==1) - char_buf[b]=aln[l_s[c][a]][ch++]; - else - char_buf[b]='-'; - } - char_buf[b]='\0'; - sprintf (aln[l_s[c][a]],"%s", char_buf); - } - } - - - A->len_aln=LEN; - A->nseq=ns[0]+ns[1]; - vfree (char_buf); - free_char (al, -1); - free_int (pos0, -1); - return score; -} -int get_transition_cost (Alignment *A, int **posi, int ni, int *li, int i, int **posj, int nj, int *lj, int j,Constraint_list *CL) -{ - /*counts the number of identical transitions between position i-1, i and j-1..j*/ - float t=0; - int a,s; - Sequence *S; - - if (i==0 || j==0)return 0; - - for (a=0; aseq[li[a]][i-1]==S->seq[li[a]][i-1])t++; - } - - for (a=0; aseq[li[a]][j-1]==S->seq[li[a]][j-1])t++; - } - - t=(t*10)/(float)(ni+nj); - return t; -} -/*******************************************************************************/ -/* idscore_pairseq: measure the % id without delivering thze aln*/ -/* */ -/* makes DP between the the ns[0] sequences and the ns[1] */ -/* */ -/* for MODE, see the function get_dp_cost */ -/*******************************************************************************/ -int cl2pair_list ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in, int mode, int ndiag); -int cl2pair_list_ref ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in); -int cl2pair_list_ecf ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in); -int cl2pair_list_diag ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in, int add); -int cl2list_borders (Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in); -int cl2diag_cap (Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in); //add one element at the end of each segment so that they can be joined -int** cl2sorted_diagonals ( Alignment *A, int *ns, int **ls, Constraint_list *CL); -int** cl2sorted_diagonals_mat ( Alignment *A, int *ns, int **ls, Constraint_list *CL); -int** cl2sorted_diagonals_cs ( Alignment *A, int *ns, int **ls, Constraint_list *CL); -int list2nodup_list (Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in); -int fill_matrix ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in); -int cl2pair_list ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in, int mode, int ndiag) -{ - int v; - if (!A) - { - free_int (list_in[0], -1); list_in[0]=NULL; - n_in[0]=0; - } - - cl2list_borders(A, ns, ls, CL, list_in, n_in); - - if ( mode==0) - v=cl2pair_list_ref (A, ns, ls, CL, list_in, n_in); - else if (mode==1) - v=cl2pair_list_ecl (A, ns, ls, CL, list_in, n_in); - else if (mode==2) - v=cl2pair_list_diag (A, ns, ls, CL, list_in, n_in,ndiag); //add diagonals - - cl2diag_cap (A, ns, ls, CL, list_in, n_in); - //fill_matrix (A, ns, ls, CL, list_in, n_in);//Fill matrix with 0s - sort_list_int (list_in[0],7, 1, 0, n_in[0]-1); - list2nodup_list (A, ns, ls, CL, list_in, n_in); - return v; - -} -int fill_matrix( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list, int *n) -{ - int a, b, l1, l2, n2=0; - int score; - int **pos; - int max_n; - if (!A) return 0; - pos=aln2pos_simple ( A,-1, ns, ls); - l1=strlen (A->seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - - max_n=read_array_size (list[0], sizeof (int)); - - for (a=0; a<=l1; a++) - for (b=0; b<=l2; b++) - { - score=0; - score=(a==0 || b==0)?0:slow_get_dp_cost (A, pos, ns[0], ls[0],a-1, pos, ns[1], ls[1], b-1, CL); - if ( score>0 && a!=0 && b!=0 && a!=l1 && b!=l2) - { - if (n[0]==max_n){max_n+=1000;list[0]=vrealloc (list[0], max_n*sizeof (int*));} - if (!list[0][n[0]])list[0][n[0]]=vcalloc (7, sizeof (int)); - list[0][n[0]][0]=a; - list[0][n[0]][1]=b; - list[0][n[0]][3]=(l1-a)+b; - list[0][n[0]][2]=score; - if ( a!=0 && b!=0 && a!=l1 && b!=l2) - { - n2++; - } - n[0]++; - } - - } - - return n[0]; - } -int list2nodup_list ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in) -{ - int **list; - int n, a, b, c; - - list=list_in[0]; - n=n_in[0]; - - if ( !A)return 0; - - for (b=a=1; aL)return cl2sorted_diagonals_cs ( A, ns, ls, CL); - else return cl2sorted_diagonals_mat ( A, ns, ls, CL); -} - -static int kword; -static char **warray; -int cmp_word ( const int**a, const int**b); -int ** seq2index_list ( Sequence *S, int k); -int** cl2sorted_diagonals_mat ( Alignment *A, int *ns, int **ls, Constraint_list *CL) -{ - - int a,b,c,d, comp, k, l1, l2, ndiag; - int **diag; - - - static char **alp, alps=5; - char *buf1, *buf2; - - if (!A)return NULL; - if ( !alp) - alp=make_group_aa_upgma ("blosum62mt",alps); - - - k=2; - l1=strlen (A->seq_al[ls[0][0]]); - buf1=vcalloc ( l1+1, sizeof (char)); - l2=strlen (A->seq_al[ls[1][0]]); - buf2=vcalloc ( l2+1, sizeof (char)); - - ndiag=l1+l2; - diag=declare_int (ndiag+3,2); - for (a=0; a<=ndiag; a++) diag[a][0]=a; - vfree (diag[ndiag+1]); - diag[ndiag+1]=NULL; - - for ( a=0; aseq_al[ls[0][a]]); - lower_string (buf1); - string_convert (buf1, alps, alp); - for (b=0; bseq_al[ls[1][b]]); - lower_string (buf2); - string_convert (buf2, alps, alp); - for (c=0; cnseq; a++)ml+=strlen (S->seq[a]); - list=declare_int (ml+1, 2); - - for (n=0,a=0; anseq; a++) - { - l=strlen (S->seq[a])-k; - for ( b=0; bseq; - kword=k; - qsort (list, n, sizeof (long**), (int(*)(const void*,const void*))(cmp_word)); - - cw=NULL; - e=s=0; - nm=0; - for (a=0; a<=n; a++) - { - int s1, s2, r1, r2; - if (!cw ||a==n|| strncmp (warray[list[a][0]]+list[a][1],cw, k)!=0) - { - if (a=max){max+=1000; mlist=vrealloc (mlist, max*sizeof (int*));} - mlist[nm]=vcalloc (4, sizeof (int)); - mlist[nm][0]=s1; - mlist[nm][1]=s2; - mlist[nm][2]=r1; - mlist[nm][3]=r2; - nm++; - } - s=a; - } - } - } - - if (nm>=max){max+=1000;mlist=vrealloc (mlist, max*sizeof (int));} - sort_list_int ( mlist,4,1, 0, nm-1); - return mlist; -} -int cmp_word ( const int**a, const int**b) -{ - int c; - - c=strncmp (warray[a[0][0]]+a[0][1], warray[b[0][0]]+b[0][1], kword); - - - if (c) return c; - else - { - for (c=0; c<2; c++) - { - if ( a[0][c]>b[0][c])return 1; - else if (a[0][c]seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - - - - CL=index_res_constraint_list (CL, CL->weight_field); - ndiag=l1+l2; - diag=declare_int (ndiag+3, 2); - - for (a=1; a<=ndiag; a++)diag[a][0]=a; - - for (p1=1; p1<=l1; p1++) - { - for (p2=1; p2<=l2; p2++) - { - for (a=0; aS)->nseq, sizeof (int*)); - for (a=0; aseq_al[ls[1][a]]); - - l1=strlen (A->seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - sl2=vcalloc ((CL->S)->nseq, sizeof (int)); - - for (a=0;aweight_field); - ndiag=l1+l2; - diag=declare_int (ndiag+3, 2); - - for (a=1; a<=ndiag; a++)diag[a][0]=a; - for (p1=0; p1<=l1; p1++) - { - for (si=0;p1>0 && si0 && aresidue_index[s][r][0];a+=3) - { - - t_s=CL->residue_index[s][r][a]; - t_r=CL->residue_index[s][r][a+1]; - - if (sl2[t_s]) - { - p2=inv_pos[t_s][t_r]; - diag_i=(l1-p1)+p2; - diag[diag_i][1]+=(CL->get_dp_cost) (A, pos, ns[0], ls[0], p1-1, pos, ns[1], ls[1],p2-1,CL); - } - } - } - } - max_len=MAX(l1, l2); - for (a=1; aseq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); -// pos=aln2pos_simple ( A,-1, ns, ls); - - for (p1=0; p1<=l1; p1++) - { - if (p1==0 || p1==l1) - { - for (p2=0; p2<=l2; p2++) - { - if (n==maxlen){maxlen+=1000;list=vrealloc (list,maxlen*sizeof (int*));} - if (!list[n])list[n]=vcalloc (7, sizeof (int)); - list[n][0]=p1; - list[n][1]=p2; - list[n][3]=(l1-(p1))+(p2); - //list[n][2]=(p1==0||p2==0)?0:(CL->get_dp_cost) (A, pos, ns[0], ls[0], p1-1, pos, ns[1], ls[1],p2-1,CL);; - list[n][2]=(CL->gep)*SCORE_K*p2; - n++; - } - } - else - { - for (a=0; a<2; a++) - { - p2=(a==0)?0:l2; - if (n==maxlen){maxlen+=1000;list=vrealloc (list,maxlen*sizeof (int*));} - if (!list[n])list[n]=vcalloc (7, sizeof (int)); - list[n][0]=p1; - list[n][1]=p2; - list[n][3]=(l1-(p1))+(p2); - //list[n][2]=(p1==0||p2==0)?0:(CL->get_dp_cost) (A, pos, ns[0], ls[0], p1-1, pos, ns[1], ls[1],p2-1,CL);; - list[n][2]=(CL->gep)*SCORE_K*p1; - n++; - } - } - } -// free_int (pos, -1); - list_in[0]=list; - n_in[0]=n; - return read_array_size (list, sizeof (int*)); -} - -int cl2diag_cap (Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in) -{ - int **list; - int n, in, a, b, al1, al2; - int max_n; - int cap=0; - - if (!A) return 0; - - al1=strlen (A->seq_al[ls[0][0]]); - al2=strlen (A->seq_al[ls[1][0]]); - - list=list_in[0]; - n=n_in[0]; - max_n=read_array_size (list, sizeof (int*)); - - - - - for (a=0; a< n; a++) - { - b=list[a][3]; - list[a][3]=list[a][0]; - list[a][0]=b; - - } - sort_list_int (list, 4, 1, 0, n-1); - for (a=0; a< n; a++) - { - b=list[a][3]; - list[a][3]=list[a][0]; - list[a][0]=b; - } - - - in=n; - - for (a=0; aL)return cl2pair_list_diag_cl (A, ns, ls, CL, list_in, n_in, add); - else return cl2pair_list_diag_mat (A, ns, ls, CL, list_in, n_in, add); -} -int cl2pair_list_diag_mat ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in, int add ) -{ - int p1, p2, n,d; - int a, l1, l2; - int set=0; - static int **pos; - int max_n; - static int **diag; - int **list; - - if (A==NULL) - { - free_int (pos, -1);pos=NULL; - free_int (diag, -1);diag=NULL; - //free_int (list_in[0], -1); list_in[0]=NULL; - return 0; - } - - if ( !pos) - { - pos=aln2pos_simple ( A,-1, ns, ls); - diag=cl2sorted_diagonals (A,ns,ls,CL); - } - - list=list_in[0]; - n=n_in[0]; - max_n=read_array_size (list, sizeof (int**)); - - - - l1=strlen (A->seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - - d=0; - if ( add) - { - while ( diag[d] && diag[d][1]==-1)d++; - add+=d; - } - else - { - d=0; - while (diag[add++]); - } - - HERE ("Add %d diagonals, starts %d N=%d", add, d, n); - - for (d=0; dget_dp_cost) (A, pos, ns[0], ls[0], p1-1, pos, ns[1], ls[1],p2-1,CL); - n++; - } - } - } - HERE ("Addition Finished n=%d", n); - if (!set) return 0; - sort_list_int (list,4, 1, 0, n-1); - - list_in[0]=list; - n_in[0]=n; - HERE ("\nN=%d r=%.3f [l1=%d l2=%d]", n, (float)n/(float)(l1*l2), l1, l2); - return max_n; -} - -int cl2pair_list_diag_cl ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in, int add ) -{ - int p1, p2,n,d; - int l1, l2; - int score, set=0; - static int **pos; - int max_n; - static int **diag; - int **list; - - if (A==NULL) - { - free_int (pos, -1);pos=NULL; - free_int (diag, -1);diag=NULL; - //free_int (list_in[0], -1); list_in[0]=NULL; - return 0; - } - - if ( !pos) - { - pos=aln2pos_simple ( A,-1, ns, ls); - diag=cl2sorted_diagonals (A,ns,ls,CL); - } - - list=list_in[0]; - n=n_in[0]; - max_n=read_array_size (list, sizeof (int**)); - - - - l1=strlen (A->seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - if ( add==0)add=l1+l2; - d=0; - while ( diag[d] && diag[d][1]==-1)d++; - HERE ("Add %d diagonals, starts %d N=%d", add, d, n); - add+=d; - for (d; dget_dp_cost) (A, pos, ns[0], ls[0], p1-1, pos, ns[1], ls[1],p2-1,CL))!=0) - { - - if (n==max_n){max_n+=1000;list=vrealloc (list, max_n*sizeof (int*));} - if (!list[n])list[n]=vcalloc (7, sizeof (int)); - - list[n][0]=p1; - list[n][1]=p2; - list[n][3]=(l1-(p1))+(p2); - list[n][2]=score; - n++; - } - } - } - HERE ("Addition Finished n=%d", n); - if (!set) return 0; - sort_list_int (list,4, 1, 0, n-1); - - list_in[0]=list; - n_in[0]=n; - HERE ("\nN=%d r=%.3f [l1=%d l2=%d]", n, (float)n/(float)(l1*l2), l1, l2); - return max_n; -} - -int cl2pair_list_ecl_norm ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in); -int cl2pair_list_ecl_raw ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in); -int cl2pair_list_ecl_pc ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in); -int cl2pair_list_ecl_noext_raw ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in); -int cl2pair_list_ecl_rna2 ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in); - -int cl2pair_list_ecl_rawquad ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in); -int cl2pair_list_ecl ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in) -{ - int mode=5; - - if ( mode==1)return cl2pair_list_ecl_norm (A, ns, ls, CL, list_in, n_in); - else if ( mode==2)return cl2pair_list_ecl_raw (A, ns, ls, CL, list_in, n_in); - else if ( mode==3)return cl2pair_list_ecl_rawquad (A, ns, ls, CL, list_in, n_in); - else if ( mode==4)return cl2pair_list_ecl_noext_raw (A, ns, ls, CL, list_in, n_in); - else if ( mode==5)return cl2pair_list_ecl_pc (A, ns, ls, CL, list_in, n_in); -} -int cl2pair_list_ecl_noext_raw ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in) -{ - int p1, p2, si, s, r, t_s2, t_r2, t_w2, n,n2; - int a, b, l1, l2; - int score; - int **pos; - int **list; - int max_n; - - - - int nused; - int *used_list, **used; - int *sl2, **inv_pos; - - int filter=10; - - - - if ( !A) return 0; - list=list_in[0]; - n=n_in[0]; - max_n=read_array_size (list, sizeof (int*)); - - - n2=0; - pos=aln2pos_simple ( A,-1, ns, ls); - inv_pos=vcalloc ((CL->S)->nseq, sizeof (int*)); - for (a=0; aseq_al[ls[1][a]]); - - l1=strlen (A->seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - sl2=vcalloc ((CL->S)->nseq, sizeof (int)); - - for (a=0;aweight_field); - - used=declare_int (l2+1,2); - used_list=vcalloc (l2+1, sizeof (int)); - nused=0; - - for (p1=0; p1<=l1; p1++) - { - for (nused=0,si=0;p1>0 && si0 && aresidue_index[s][r][0];a+=3) - { - t_s2=CL->residue_index[s][r][a];t_r2=CL->residue_index[s][r][a+1];t_w2=CL->residue_index[s][r][a+2]; - if (sl2[t_s2]) - { - p2=inv_pos[t_s2][t_r2]; - score=t_w2; - if (!used[p2][1] && score>0) - { - used_list[nused++]=p2; - } - used[p2][0]+=score; - used[p2][1]++; - } - } - } - for (a=0; afilter && p1!=0 && p2!=0 && p1!=l1 && p2!=l2) - { - if (!list[n])list[n]=vcalloc (7, sizeof (int)); - - list[n][0]=p1; - list[n][1]=p2; - list[n][3]=(l1-(p1))+(p2); - list[n][2]=score; - n++; - } - } - } - - - vfree (used); - vfree (used_list); - free_int (inv_pos, -1); - free_int (pos, -1); - vfree (sl2); - - n_in[0]=n; - list_in[0]=list; - return 1; -} - -int cl2pair_list_ecl_raw ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in) -{ - int p1, p2, si, s, r, t_s, t_r,t_w, t_s2, t_r2, t_w2, n,tot; - int a, b, l1, l2; - int **pos,**list; - int max_n; - - - int set, raw_max,nscore, score, nused; - int *used_list, **used; - int *sl2, **inv_pos; - - int filter1=0, filter2=0, max=0; - int **nr; - long tot_score=0, avg; - int new_n=0; - - if ( !A) return 0; - list=list_in[0]; - n=n_in[0]; - max_n=read_array_size (list, sizeof (int*)); - - - pos=aln2pos_simple ( A,-1, ns, ls); - inv_pos=vcalloc ((CL->S)->nseq, sizeof (int*)); - for (a=0; aseq_al[ls[1][a]]); - - l1=strlen (A->seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - sl2=vcalloc ((CL->S)->nseq, sizeof (int)); - nr=declare_int (2, MAX(l1,l2)+1); - - for (a=0; aseq_al[ls[0][b]][a]))nr[0][a+1]++; - for (a=0; aseq_al[ls[1][b]][a]))nr[1][a+1]++; - - for (a=0;aweight_field); - - used=declare_int (l2+1,2); - used_list=vcalloc (l2+1, sizeof (int)); - nused=0; - - for (raw_max=0,p1=0; p1<=l1; p1++) - { - for (tot=0,nused=0,si=0;p1>0 && si0 && aresidue_index[s][r][0];a+=3) - { - t_s=CL->residue_index[s][r][a];t_r=CL->residue_index[s][r][a+1];t_w=CL->residue_index[s][r][a+2]; - for (b=0; bresidue_index[t_s][t_r][0];) - { - if (b==0){t_s2=t_s;t_r2=t_r;t_w2=t_w;b++;} - else - { - t_s2=CL->residue_index[t_s][t_r][b];t_r2=CL->residue_index[t_s][t_r][b+1];t_w2=CL->residue_index[t_s][t_r][b+2];b+=3; - } - - if (sl2[t_s2]) - { - p2=inv_pos[t_s2][t_r2]; - score=MIN(t_w,t_w2); - if (score0) - { - used_list[nused++]=p2; - } - tot+=score; - used[p2][0]+=score; - used[p2][1]++; - } - } - } - } - //set the threshold to 1/2 of the best normalised score - - for (filter2=0,set=0,a=0; a=filter2 && p1!=0 && p2!=0 && p1!=l1 && p2!=l2) - { - if (!list[n])list[n]=vcalloc (7, sizeof (int)); - - list[n][0]=p1; - list[n][1]=p2; - list[n][3]=(l1-(p1))+(p2); - list[n][2]=score; - n++; - tot_score+=score; - new_n++; - } - } - } - avg=tot_score/new_n; - - //CL->gop=-1*avg*3;CL->gep=0; - HERE ("FILTER: %d->%d [THR=%d]", max, n-n_in[0], filter2); - - vfree (used); - vfree (used_list); - free_int (inv_pos, -1); - free_int (pos, -1); - vfree (sl2); - - n_in[0]=n; - list_in[0]=list; - return 1; -} - - -/** - * Calculates scores for diagonal segments. - * - * \param Alignment The sequences. - * \param ns Number of sequences in each group - * \param ls sequences in in groups (ls[0][x] sequences in group 1, ls[1][x] squences in group 2). - * \param CL the constraint list - * \param list_in the diagonals - * \param n_in number of sequences? - */ -int cl2pair_list_ecl_pc ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in) -{ - int p1, p2, si, s, r, t_s, t_r,t_w, t_s2, t_r2, t_w2, n; - int a, b, l1, l2; - int **pos,**list; - int max_n; - - int nused; - int *used_list; - int *sl2, **inv_pos; - - int **nr; - - - float nscore, score, tot, filter, avg=0, new=0; - float **used; - - if ( !A) return 0; - list=list_in[0]; - n=n_in[0]; - max_n=read_array_size (list, sizeof (int*)); - - pos=aln2pos_simple ( A,-1, ns, ls); - inv_pos=vcalloc ((CL->S)->nseq, sizeof (int*)); - for (a=0; aseq_al[ls[1][a]]); - - l1=strlen (A->seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - sl2=vcalloc ((CL->S)->nseq, sizeof (int)); - nr=declare_int (2, MAX(l1,l2)+1); - - for (a=0; aseq_al[ls[0][b]][a]))nr[0][a+1]++; - for (a=0; aseq_al[ls[1][b]][a]))nr[1][a+1]++; - - for (a=0;aweight_field); - - used=declare_float (l2+1,2); - used_list=vcalloc (l2+1, sizeof (int)); - nused=0; - - for (p1=0; p1<=l1; p1++) - { - - for (tot=0,nused=0,si=0;p1>0 && si0 && aresidue_index[s][r][0];a+=3) - { - t_s=CL->residue_index[s][r][a];t_r=CL->residue_index[s][r][a+1];t_w=CL->residue_index[s][r][a+2]; - for (b=0; bresidue_index[t_s][t_r][0];) - { - if (b==0){t_s2=t_s;t_r2=t_r;t_w2=t_w;b++;} - else - { - t_s2=CL->residue_index[t_s][t_r][b];t_r2=CL->residue_index[t_s][t_r][b+1];t_w2=CL->residue_index[t_s][t_r][b+2];b+=3; - } - - if (sl2[t_s2]) - { - p2=inv_pos[t_s2][t_r2]; - //score=((float)t_w/(float)NORM_F)*((float)t_w2/(float)NORM_F); - score=MIN(((float)t_w/(float)NORM_F),((float)t_w2/(float)NORM_F)); - - if (!used[p2][1] && score>0) - { - used_list[nused++]=p2; - } - - tot+=score; - used[p2][0]+=score; - used[p2][1]++; - } - } - } - } - //FILTER: Keep in the graph the edges where (p1->p2/(Sum (P1->x))>0.01 - filter=0.01; - - for (a=0; afilter && p1!=0 && p2!=0 && p1!=l1 && p2!=l2) - { - if (!list[n]) - list[n]=vcalloc (7, sizeof (int)); - - list[n][0]=p1; - list[n][1]=p2; - list[n][3]=(l1-(p1))+(p2); - score/=(float)((CL->S)->nseq*nr[0][p1]*nr[1][p2]); - list[n][2]=(int)((float)score*(float)NORM_F); - avg+=(int)((float)score*(float)NORM_F); - new++; - n++; - } - } - } - free_float (used, -1); - vfree (used_list); - free_int (inv_pos, -1); - free_int (pos, -1); - vfree (sl2); - free_int (nr, -1); - - n_in[0]=n; - list_in[0]=list; - if (new)avg/=new; - return avg; -} - - -int cl2pair_list_ecl_rawquad ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in) -{ - int p1, p2, si, s, r, t_s, t_r,t_w, t_s2, t_r2, t_w2,t_s3, t_r3, t_w3, n,n2; - int a, b, c,l1, l2; - int score; - int **pos; - int **list; - int max_n; - - int tn; - - int nused; - int *used_list, **used; - int *sl2, **inv_pos; - - int filter=0; - int nseq2; - if ( !A) return 0; - list=list_in[0]; - n=n_in[0]; - max_n=read_array_size (list, sizeof (int*)); - - - n2=0; - pos=aln2pos_simple ( A,-1, ns, ls); - inv_pos=vcalloc ((CL->S)->nseq, sizeof (int*)); - for (a=0; aseq_al[ls[1][a]]); - - l1=strlen (A->seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - sl2=vcalloc ((CL->S)->nseq, sizeof (int)); - - for (a=0;aweight_field); - - used=declare_int (l2+1,2); - used_list=vcalloc (l2+1, sizeof (int)); - nused=0; - nseq2=(CL->S)->nseq*(CL->S)->nseq; - - for (p1=0; p1<=l1; p1++) - { - for (nused=0,si=0;p1>0 && si0 && aresidue_index[s][r][0];a+=3) - { - t_s=CL->residue_index[s][r][a];t_r=CL->residue_index[s][r][a+1];t_w=CL->residue_index[s][r][a+2]; - for (b=0; bresidue_index[t_s][t_r][0];) - { - if (b==0){t_s2=t_s;t_r2=t_r;t_w2=t_w;b++;} - else - { - t_s2=CL->residue_index[t_s][t_r][b];t_r2=CL->residue_index[t_s][t_r][b+1];t_w2=CL->residue_index[t_s][t_r][b+2];b+=3; - } - if (sl2[t_s2]) - { - for (c=0; cresidue_index[t_s2][t_r2][0];) - { - if (c==0){t_s3=t_s2;t_r3=t_r2;t_w3=t_w2;c++;} - else - { - t_s3=CL->residue_index[t_s2][t_r2][c];t_r3=CL->residue_index[t_s2][t_r2][c+1];t_w3=CL->residue_index[t_s2][t_r2][c+2];c+=3; - } - - if (sl2[t_s3]) - { - p2=inv_pos[t_s3][t_r3]; - score=MIN(t_w,t_w2); - score=MIN(score,t_w3); - if (!used[p2][1] && score>0) - { - used_list[nused++]=p2; - } - used[p2][0]+=score; - used[p2][1]++; - } - } - } - } - } - } - for (a=0; afilter && p1!=0 && p2!=0 && p1!=l1 && p2!=l2) - { - if (!list[n])list[n]=vcalloc (7, sizeof (int)); - - list[n][0]=p1; - list[n][1]=p2; - list[n][3]=(l1-(p1))+(p2); - list[n][2]=score; - n++; - } - } - } - - - vfree (used); - vfree (used_list); - free_int (inv_pos, -1); - free_int (pos, -1); - vfree (sl2); - - n_in[0]=n; - list_in[0]=list; - return 1; -} -int cl2pair_list_ecl_norm ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in) -{ - int p1, p2, si, s, r, t_s, t_r, n,n2; - int a, b, l1, l2; - int score; - int **pos; - int **list; - int max_n; - - - - int nused; - int *used_list, *used; - int *sl2, **inv_pos; - - int filter=0; - - if ( !A) return 0; - list=list_in[0]; - n=n_in[0]; - max_n=read_array_size (list, sizeof (int*)); - - - n2=0; - pos=aln2pos_simple ( A,-1, ns, ls); - inv_pos=vcalloc ((CL->S)->nseq, sizeof (int*)); - for (a=0; aseq_al[ls[1][a]]); - - l1=strlen (A->seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - sl2=vcalloc ((CL->S)->nseq, sizeof (int)); - - for (a=0;aweight_field); - - used=vcalloc (l2+1, sizeof (int)); - used_list=vcalloc (l2+1, sizeof (int)); - nused=0; - - - - for (p1=0; p1<=l1; p1++) - { - for (si=0;p1>0 && si0 && aresidue_index[s][r][0];a+=3) - { - - t_s=CL->residue_index[s][r][a]; - t_r=CL->residue_index[s][r][a+1]; - - for (b=0; bresidue_index[t_s][t_r][0];) - { - int t_s2, t_r2; - if (b==0){t_s2=t_s;t_r2=t_r;b++;} - else { t_s2=CL->residue_index[t_s][t_r][b];t_r2=CL->residue_index[t_s][t_r][b+1];b+=3;} - - if (sl2[t_s2]) - { - p2=inv_pos[t_s2][t_r2]; - if (!used[p2]){used[p2]=1;used_list[nused++]=p2;} - else used[p2]++; - } - } - - } - } - if (p1==0 || p1==l1) - { - for (nused=0,p2=0; p2<=l2; p2++)used_list[nused++]=p2; - } - else - { - if (!used[0])used_list[nused++]=0; - if (!used[l2])used_list[nused++]=l2; - } - for (a=0; aget_dp_cost) (A, pos, ns[0], ls[0], p1-1, pos, ns[1], ls[1],p2-1,CL); - if (score>filter && p1!=0 && p2!=0 && p1!=l1 && p2!=l2) - { - if (n==max_n){max_n+=1000;list=vrealloc (list, max_n*sizeof (int*));} - if (!list[n])list[n]=vcalloc (7, sizeof (int)); - list[n][0]=p1; - list[n][1]=p2; - list[n][3]=(l1-(p1))+(p2); - list[n][2]=score; - n++; - if (p1!=0 && p2!=0 && p1!=l1 && p2!=l2)n2++; - } - used[p2]=0; - } - - nused=0; - } - - vfree (used); - vfree (used_list); - free_int (inv_pos, -1); - free_int (pos, -1); - vfree (sl2); - n_in[0]=n; - list_in[0]=list; - - - return 1; -} - - - - -int cl2pair_list_ref( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list, int *n) -{ - int a, b, l1, l2, n2=0; - int score; - int **pos; - int max_n; - if (!A) return 0; - pos=aln2pos_simple ( A,-1, ns, ls); - l1=strlen (A->seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - - max_n=read_array_size (list[0], sizeof (int)); - - for (a=0; a<=l1; a++) - for (b=0; b<=l2; b++) - { - score=(a==0 || b==0)?0:slow_get_dp_cost_pc(A, pos, ns[0], ls[0],a-1, pos, ns[1], ls[1], b-1, CL); - - - if ( score>0 && a!=0 && b!=0 && a!=l1 && b!=l2) - { - if (n[0]==max_n){max_n+=1000;list[0]=vrealloc (list[0], max_n*sizeof (int*));} - if (!list[0][n[0]])list[0][n[0]]=vcalloc (7, sizeof (int)); - list[0][n[0]][0]=a; - list[0][n[0]][1]=b; - list[0][n[0]][3]=(l1-a)+b; - list[0][n[0]][2]=score; - if ( a!=0 && b!=0 && a!=l1 && b!=l2) - { - n2++; - } - n[0]++; - } - - } - - return n[0]; - } - -int list2linked_pair_wise ( Alignment *A, int *ns, int **l_s, Constraint_list *CL, int **list, int n); -int two_pass_linked_pair_wise ( Alignment *A, int *ns, int **l_s, Constraint_list *CL) -{ - int n=0, **list=NULL; - int nscore; - int mode=2; - int id; - - cl2pair_list (A,ns, l_s, CL, &list, &n, mode, 10); - nscore=list2linked_pair_wise (A, ns, l_s, CL, list, n); - id=sub_aln2sim (A, ns, l_s, "idmat_sim"); - - if (id>50)return nscore; - ungap_sub_aln ( A, ns[0], l_s[0]); - ungap_sub_aln ( A, ns[1], l_s[1]); - cl2pair_list (A,ns, l_s, CL, &list, &n,mode,0); - nscore=list2linked_pair_wise (A, ns, l_s, CL, list, n); - cl2pair_list (NULL,ns, l_s, CL, &list, &n, mode, 0); - return nscore; -} -int clinked_pair_wise ( Alignment *A, int *ns, int **l_s, Constraint_list *CL) -{ - int n=0, **list=NULL; - int nscore, pscore=0; - int mode=2; - - int add=0; - cl2pair_list (A,ns, l_s, CL, &list, &n, mode, 1000); - nscore=list2linked_pair_wise (A, ns, l_s, CL, list, n); - HERE ("***********First: %d", nscore); - if (add) - { - while (nscore>pscore) - { - pscore=nscore; - ungap_sub_aln ( A, ns[0], l_s[0]); - ungap_sub_aln ( A, ns[1], l_s[1]); - cl2pair_list (A,ns, l_s, CL, &list, &n, mode, 10); - nscore=list2linked_pair_wise (A, ns, l_s, CL, list, n); - HERE ("****************New: %d", nscore); - } - } - cl2pair_list (NULL,ns, l_s, CL, &list, &n, mode, 0); - return nscore; -} -int linked_pair_wise ( Alignment *A, int *nsi, int **lsi, Constraint_list *CL) -{ - int n=0; - static int **list=NULL; - int score, a; - int *ns, **ls; - int mode=1;//1:ecl, 0:ref - - ns=vcalloc (2, sizeof (int)); - ns[0]=nsi[1]; ns[1]=nsi[0]; - - ls=declare_int (2, ns[0]+ns[1]); - for (a=0; aseq_al[l_s[0][0]]); - l2=strlen (A->seq_al[l_s[1][0]]); - al=declare_char (2,l1+l2+1); - - - //Penalties: max score is NORM_F - //Penalties must be negative - igop=CL->gop; - gep=igep=CL->gep; - - if (n>max_size) - { - max_size=n; - - vfree (MI);vfree (MJ); vfree (MM); - free_int (slist, -1); - - slist=declare_int (n,3); - - MI=vcalloc (5*n, sizeof (long)); - MJ=vcalloc (5*n, sizeof (long)); - MM=vcalloc (5*n, sizeof (long)); - - } - else - { - for (a=0; a=(LIN(MM,pi,0)+gop))?'i':'m'; - - - LIN(MJ,a,0)=MAX(LIN(MJ,pj,0),(LIN(MM,pj,0)+gop))+delta_j*gep; - LIN(MJ,a,1)=pj; - LIN(MJ,a,2)=0; - LIN(MJ,a,3)=delta_j; - - LIN(MJ,a,4)=(LIN(MJ,pj,0)>=LIN(MM,pj,0)+gop)?'j':'m'; - - - if (a>1 && (ls=list[a][0]-list[ij][0])==(list[a][1]-list[ij][1])) - { - LIN(MM,a,0)=MAX3(LIN(MM,ij,0),LIN(MI,ij,0),LIN(MJ,ij,0))+list[a][2]-(ls*CL->nomatch); - - LIN(MM,a,1)=ij; - LIN(MM,a,2)=ls; - LIN(MM,a,3)=ls; - if ( LIN(MM,ij,0)>=LIN(MI,ij,0) && LIN(MM,ij,0)>=LIN(MJ,ij,0))LIN(MM,a,4)='m'; - else if ( LIN(MI,ij,0) >= LIN(MJ,ij,0))LIN(MM,a,4)='i'; - else LIN(MM,a,4)='j'; - - } - else - { - LIN(MM,a,0)=UNDEFINED; - LIN(MM,a,1)=-1; - } - } - - a=start_trace; - if (LIN(MM,a,0)>=LIN(MI,a,0) && LIN(MM,a,0) >=LIN(MJ,a,0))MT2=MM; - else if ( LIN(MI,a,0)>=LIN(MJ,a,0))MT2=MI; - else MT2=MJ; - - score=MAX3(LIN(MM,a,0), LIN(MI,a,0), LIN(MJ,a,0)); - - i=l1; - j=l2; - - - while (!(i==0 &&j==0)) - { - int next_a; - l=MAX(LIN(MT2,a,2),LIN(MT2,a,3)); - // HERE ("%c from %c %d %d SCORE=%d [%d %d] [%2d %2d]", T2[a][5],T2[a][4], T2[a][2], T2[a][3], T2[a][0], gop, gep, i, j); - if (i==0) - { - while ( j>0) - { - al[0][LEN]=0; - al[1][LEN]=1; - j--; LEN++; - } - } - else if (j==0) - { - while ( i>0) - { - al[0][LEN]=1; - al[1][LEN]=0; - i--; LEN++; - } - } - - else if (l==0) {HERE ("L=0 i=%d j=%d",l, i, j);exit (0);} - else - { - for (b=0; bdeclared_len<=LEN)A=realloc_aln ( A,2*LEN+1); - aln=A->seq_al; - char_buf= vcalloc (LEN+1, sizeof (char)); - - for ( c=0; c< 2; c++) - { - for ( a=0; a< ns[c]; a++) - { - int ch=0; - for ( b=0; b< LEN; b++) - { - if (al[c][b]==1) - char_buf[b]=aln[l_s[c][a]][ch++]; - else - char_buf[b]='-'; - } - char_buf[b]='\0'; - sprintf (aln[l_s[c][a]],"%s", char_buf); - } - } - - A->len_aln=LEN; - A->nseq=ns[0]+ns[1]; - - vfree (char_buf); - free_char (al, -1); - - return score; -} -int ** aln2local_penalties4link (Alignment *A, int n, int *ls, Constraint_list *CL, int **lg); -int ** aln2local_penalties4link (Alignment *A, int n, int *ls, Constraint_list *CL, int **lg) -{ - //adapted from gap_count in MAFFT V 5.5 - int p,s,l, c1, c2; - int gep,gop; - int open=3, close=4, gap=5; - - gop=CL->gop; - gep=CL->gep; - - l=strlen (A->seq_al[ls[0]]); - - if (!lg) - { - lg=declare_int (6, l); - } - - if ( read_array_size_new (lg[0])seq_al[ls[s]][p]; - - if (c1!='-' && c2=='-')lg[open][p]++; - if (c1=='-' && c2!='-')lg[close][p]++; - if ( c1=='-')lg[gap][p]++; - c1=c2; - } - } - - for (p=0; p gives low quality results - lg[GEP][p]=gep;//(1-((float)lg[gap][p]/(float)n))*gep; - lg[open][p]=lg[close][p]=lg[gap][p]=0; - - } - - return lg; -} - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_dp_gotoh_sw.c b/binaries/src/tcoffee/t_coffee_source/util_dp_gotoh_sw.c deleted file mode 100644 index bd081c1..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_dp_gotoh_sw.c +++ /dev/null @@ -1,585 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" - -#include "dp_lib_header.h" - -int gotoh_pair_wise_lalign ( Alignment *A, int*ns, int **l_s,Constraint_list *CL) - { - Alignment *BUF=NULL; - Alignment *EA=NULL; - - int a; - BUF=copy_aln (A, BUF); - - - for ( a=0; alalign_n_top; a++) - { - free_aln (A); - - A=copy_aln (BUF, A); - - A->score_aln=gotoh_pair_wise_sw (A, ns, l_s, CL); - EA=fast_coffee_evaluate_output (A, CL); - - output_format_aln (CL->out_aln_format[0],A,EA,"stdout"); - CL=undefine_sw_aln ( A, CL); - } - exit (1); - return 0; - } -Constraint_list * undefine_sw_aln ( Alignment *A, Constraint_list *CL) - { - int a, b, l; - int **pos; - int r1, rs1; - int r2, rs2; - - - - pos=aln2pos_simple ( A,A->nseq); - - for ( l=0; l< A->len_aln; l++) - for ( a=0; a< A->nseq-1; a++) - { - rs1=A->order[a][0]; - r1 =pos[a][l]; - - if ( r1<=0)continue; - for ( b=a+1; b< A->nseq;b++) - { - rs2=A->order[b][0]; - r2 =pos[b][l]; - if ( r2<=0)continue; - - CL=undefine_sw_pair ( CL, rs1, r1, rs2, r2); - } - } - free_int (pos, -1); - return CL; - } -Constraint_list * undefine_sw_pair ( Constraint_list *CL, int s1, int r1, int s2, int r2) - { - int a, b; - - if ( !CL->forbiden_pair_list) - { - - CL->forbiden_pair_list=vcalloc ( (CL->S)->nseq, sizeof (int ***)); - for ( a=0; a< ((CL->S)->nseq); a++) - { - CL->forbiden_pair_list[a]=vcalloc ( (CL->S)->nseq, sizeof (int **)); - for ( b=0; b< ((CL->S)->nseq); b++) - CL->forbiden_pair_list[a][b]=vcalloc ( (CL->S)->len[a]+1, sizeof (int *)); - - } - } - if ( CL->forbiden_pair_list[s1][s2][r1]==NULL)CL->forbiden_pair_list[s1][s2][r1]=vcalloc ( (CL->S)->len[s2]+1, sizeof (int)); - CL->forbiden_pair_list[s1][s2][r1][r2]=1; - - if ( CL->forbiden_pair_list[s2][s1][r2]==NULL)CL->forbiden_pair_list[s2][s1][r2]=vcalloc ( (CL->S)->len[s1]+1, sizeof (int)); - CL->forbiden_pair_list[s2][s1][r2][r1]=1; - - return CL; - } - -int sw_pair_is_defined ( Constraint_list *CL, int s1, int r1, int s2, int r2) - { - int d; - - d=(r1-r2); - d=(d<0)?-d:d; - - - if ( s1==s2 && d<(CL->sw_min_dist)) return UNDEFINED; - else if ( ! CL->forbiden_pair_list) return 1; - else if ( CL->forbiden_pair_list[s1][s2][r1]==NULL)return 1; - else if ( CL->forbiden_pair_list[s1][s2][r1][r2]==1)return UNDEFINED; - else if ( CL->forbiden_pair_list[s1][s2][r1][r2]==0)return 1; - - else - { - crash ("ERROR in function: sw_pair_is_defined\n"); - return UNDEFINED; - } - - } - - -int gotoh_pair_wise_sw (Alignment *A, int*ns, int **l_s,Constraint_list *CL) - { -/*******************************************************************************/ -/* SMITH AND WATERMAN */ -/* */ -/* makes DP between the the ns[0] sequences and the ns[1] */ -/* */ -/* for MODE, see the function get_dp_cost */ -/*******************************************************************************/ - int a, b, d, i, j; - int last_i, last_j; - int t; - int *cc; - int *dd, *ddg; - int lenal[2], len; - - int c,s, e,eg, ch,g,h, maximise; - int sub; - - int fop; - static int **pos0; - - char **al=NULL; - char **aln=NULL; - - - int ala, alb,LEN; - char *buffer; - char *char_buf; - -/*trace back variables */ - int best_i; - int best_j; - int best_score; - - - FILE *long_trace=NULL; - TRACE_TYPE *buf_trace=NULL; - static TRACE_TYPE **trace; - TRACE_TYPE k; - TRACE_TYPE *tr; - int long_trace_flag=0; - int dim; -/********Prepare penalties*******/ - if (CL->moca) - { - g=(CL->gop+(CL->moca)->moca_scale)*SCORE_K; - h=(CL->gep+(CL->moca)->moca_scale)*SCORE_K; - } - else - { - g=(CL->gop-CL->nomatch)*SCORE_K; - h=(CL->gep-CL->nomatch)*SCORE_K; - } - fprintf ( stderr, "\n%d %d", g, h); - maximise=CL->maximise; -/********************************/ -/*CLEAN UP AFTER USE*/ - if ( A==NULL) - { - free_int (trace,-1); - trace=NULL; - if ( al)free_char (al,-1); - al=NULL; - return 0; - } -/*DO MEMORY ALLOCATION FOR SW DP*/ - - lenal[0]=strlen (A->seq_al[l_s[0][0]]); - lenal[1]=strlen (A->seq_al[l_s[1][0]]); - len= (( lenal[0]>lenal[1])?lenal[0]:lenal[1])+1; - buf_trace=vcalloc ( len, sizeof (TRACE_TYPE)); - buffer=vcalloc ( 2*len, sizeof (char)); - al=declare_char (2, 2*len); - - char_buf= vcalloc (2*len, sizeof (char)); - dd= vcalloc (len, sizeof (int)); - cc= vcalloc (len, sizeof (int)); - ddg=vcalloc (len, sizeof (int)); - - - if ( len>=MAX_LEN_FOR_DP) - { - long_trace_flag=1; - long_trace=vtmpfile(); - } - else - { - dim=(trace==NULL)?0:read_size_int ( trace,sizeof (int*)); - trace =realloc_int ( trace,dim,dim,len-dim, len-dim); - } - -/*END OF MEMORY ALLOCATION*/ - - - /* - 0(s) +(dd) - \ | - \ | - \ | - \ | - \ | - \ | - \| - -(e)----O - */ - - - pos0=aln2pos_simple ( A,-1, ns, l_s); - - - - cc[0]=0; - - best_score=0; - best_i=0; - best_j=0; - - tr=(long_trace_flag)?buf_trace:trace[0]; - tr[0]=(TRACE_TYPE)UNDEFINED; - - t=g; - for ( j=1; j<=lenal[1]; j++) - { - cc[j]=t=t+h; - dd[j]=t+g; - tr[j]=(TRACE_TYPE)UNDEFINED; - } - if (long_trace_flag)fwrite (buf_trace, sizeof ( TRACE_TYPE),lenal[1]+1, long_trace); - - - t=g; - for (i=1; i<=lenal[0];i++) - { - tr=(long_trace_flag)?buf_trace:trace[i]; - s=cc[0]; - cc[0]=c=t=t+h; - e=t+g; - tr[0]=(TRACE_TYPE)UNDEFINED; - - for (eg=0,j=1; j<=lenal[1];j++) - { - - sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL); - - /*get the best Insertion*/ - if ( a_better_than_b ( e, c+g, maximise)) - eg++; - else - eg=1; - e=best_of_a_b (e, c+g, maximise)+h; - - /*Get the best deletion*/ - if ( a_better_than_b ( dd[j], cc[j]+g, maximise)) - ddg[j]++; - else - ddg[j]=1; - dd[j]=best_of_a_b( dd[j], cc[j]+g, maximise)+h; - - /*Chose Substitution for tie breaking*/ - if ( sub!=UNDEFINED)c=best_int(4,maximise,&fop, e, (s+sub), dd[j],0); - else - { - c=0; - fop=3; - dd[j]=e=0; - eg=ddg[j]=0; - } - - if ( c>best_score) - { - best_i=i; - best_j=j; - best_score=c; - } - fop-=1; - s=cc[j]; - cc[j]=c; - - - if ( fop==-1) - {tr[j]=(TRACE_TYPE)fop*eg; - } - else if ( fop==1) - {tr[j]=(TRACE_TYPE)fop*ddg[j]; - } - else if (fop==0) - {tr[j]=(TRACE_TYPE)0; - } - else if ( fop==2) - { - tr[j]=(TRACE_TYPE)UNDEFINED; - } - - fop= -2; - } - if (long_trace_flag) - { - fwrite ( buf_trace, sizeof (TRACE_TYPE), lenal[1]+1, long_trace); - } - } - - - - - if (best_i==0 ||best_j==0 ) - { - vfree (buf_trace); - vfree (buffer); - free_char ( al,-1); - vfree ( char_buf); - vfree ( dd); - vfree ( cc); - vfree ( ddg); - free_int (pos0, -1); - A->len_aln=0; - aln=A->seq_al; - - for ( c=0; c< 2; c++) - { - for ( a=0; a< ns[c]; a++) - { - aln[l_s[c][a]][0]='\0'; - } - } - if ( long_trace_flag)fclose ( long_trace); - - return UNDEFINED; - } - else - { - i=last_i=best_i; - j=last_j=best_j; - } - ala=alb=0; - - - while (i>0 && j>0) - { - if ( i==0 || j==0)k=UNDEFINED; - /* k=-1; - else if ( j==0) - k=1; - else if ( j==0 && i==0) - k=1;*/ - else - { - if (long_trace_flag) - { - fseek ( long_trace, sizeof (TRACE_TYPE)*((lenal[1]+1)*(i)+j),SEEK_SET); - fread ( &k, sizeof (TRACE_TYPE), 1, long_trace); - } - else - { - - k=trace[i][j]; - } - } - - if ( k==UNDEFINED){i=j=0;} - if (k==0) - { - - al[0][ala++]=1; - al[1][alb++]=1; - - - - i--; - j--; - last_i=i; - last_j=j; - - } - else if (k==(TRACE_TYPE)UNDEFINED) - { - i=0; - j=0; - - } - else if (k>0) - { - - for ( a=0; a< k; a++) - { - al[0][ala++]=1; - al[1][alb++]=0; - i--; - } - last_i=i; - last_j=j; - } - else if (k<0) - { - - for ( a=0; a>k; a--) - { - al[0][ala++]=0; - al[1][alb++]=1; - j--; - } - last_i=i; - last_j=j; - } - } - - LEN=ala; - c=LEN-1; - - - - invert_list_char ( al[0], LEN); - invert_list_char ( al[1], LEN); - - if ( A->declared_len<=LEN)realloc_alignment ( A, 2*LEN); - - - aln=A->seq_al; - - for ( c=0; c<2; c++) - for ( a=0; aorder[l_s[c][a]][2]=(c==0)?last_i:last_j; - A->order[l_s[c][a]][3]=(c==1)?best_i:best_j; - - e=(c==0)?last_i:last_j; - for ( d=0; dorder[l_s[c][a]][1]+=1-is_gap(aln[l_s[c][a]][d]); - } - } - - - for ( c=0; c< 2; c++) - { - for ( a=0; a< ns[c]; a++) - { - aln[l_s[c][a]]+=(c==0)?last_i:last_j; - ch=0; - for ( b=0; b< LEN; b++) - { - - if (al[c][b]==1) - char_buf[b]=aln[l_s[c][a]][ch++]; - else - char_buf[b]='-'; - } - char_buf[b]='\0'; - aln[l_s[c][a]]-=(c==0)?last_i:last_j; - sprintf (aln[l_s[c][a]],"%s", char_buf); - } - } - - - A->len_aln=LEN; - - free_int (pos0, -1); - vfree ( cc); - vfree (dd); - vfree (ddg); - vfree (buffer); - vfree (char_buf); - vfree (buf_trace); - if ( long_trace_flag)fclose (long_trace); - - - return best_score; - } - - -/*******************************************************************************/ -/* AUTOMATIC GEP+SCALE PENALTY FOR SW */ -/* */ -/* */ -/* */ -/* */ -/*******************************************************************************/ - -Alignment * add_seq2aln (Constraint_list *CL, Alignment *IN,Sequence *S) - { - int *n_groups; - int **group_list; - int a; - static int series=0; - - - - - int ste; /*sequence to extract, last one if they are packed*/ - - - - - - if (CL->packed_seq_lu){ste=S->nseq-1;} - else{ste=0;} - - if ( IN==NULL) - { - IN=realloc_aln2(IN, 1, strlen (S->seq[ste])+1); - IN->S=S; - IN->nseq=1; - - - - sprintf ( IN->seq_al[0], "%s", S->seq[ste]); - sprintf (IN->name[0], "%s_%d_1", S->name[ste],series); - IN->order[0][0]=ste; - IN->order[0][1]=0; - - IN->len_aln=strlen ( IN->seq_al[0]); - series++; - - } - else - { - - IN=realloc_aln2 ( IN, IN->nseq+1,MAX(strlen ( S->seq[ste])+1, IN->len_aln+1)); - n_groups=vcalloc ( 2, sizeof (int)); - group_list=declare_int (2,IN->nseq+1); - - n_groups[0]=IN->nseq; - for ( a=0; anseq; a++)group_list[0][a]=a; - - n_groups[1]=1; - group_list[1][0]=IN->nseq; - sprintf (IN->name[IN->nseq], "%s_%d_%d",S->name[ste],series,IN->nseq+1); - sprintf (IN->seq_al[IN->nseq], "%s",S->seq[ste]); - IN->order[IN->nseq][0]=ste; - IN->order[IN->nseq][1]=0; - IN->nseq++; - - - pair_wise ( IN, n_groups, group_list,CL); - - } - - return IN; - - } - - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_dp_mm_nw.c b/binaries/src/tcoffee/t_coffee_source/util_dp_mm_nw.c deleted file mode 100644 index 96da739..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_dp_mm_nw.c +++ /dev/null @@ -1,397 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" - - -/*******************************************************************************/ -/* myers and Miller */ -/* */ -/* makes DP between the the ns[0] sequences and the ns[1] */ -/* */ -/* for MODE, see the function get_dp_cost */ -/*******************************************************************************/ - -#define gap(k) ((k) <= 0 ? 0 : g+h*(k)) /* k-symbol indel cost */ -static int *sapp; /* Current script append ptr */ -static int last; /* Last script op appended */ - /* Append "Delete k" op */ -#define DEL(k) \ -{ if (last < 0) \ - last = sapp[-1] -= (k); \ - else \ - last = *sapp++ = -(k); \ -} - /* Append "Insert k" op */ -#define INS(k) \ -{ if (last < 0) \ - { sapp[-1] = (k); *sapp++ = last; } \ - else \ - last = *sapp++ = (k); \ -} - -#define REP { last = *sapp++ = 0; } /* Append "Replace" op */ - -int myers_miller_pair_wise (Alignment *A,int *ns, int **l_s,Constraint_list *CL ) - { - int **pos; - int a,b, i, j, l,l1, l2, len; - int *S; - char ** char_buf; - int score; - - - /********Prepare Penalties******/ - - /********************************/ - - - pos=aln2pos_simple ( A,-1, ns, l_s); - - - l1=strlen (A->seq_al[l_s[0][0]]); - l2=strlen (A->seq_al[l_s[1][0]]); - S=vcalloc (l1+l2+1, sizeof (int)); - last=0; - sapp=S; - - - score=diff (A,ns, l_s, 0, l1, 0, l2, 0, 0, CL, pos); - diff (NULL,ns, l_s, 0, l1, 0, l2, 0, 0, CL, pos); - - i=0;j=0;sapp=S; len=0; - while (!(i==l1 && j==l2)) - { - if (*sapp==0){i++; j++;len++;} - else if ( *sapp<0){i-=*sapp;len-=*sapp;} - else if ( *sapp>0){j+=*sapp;len+=*sapp;} - sapp++; - } - - - - A=realloc_aln2 ( A,A->max_n_seq,len+1); - char_buf=declare_char (A->max_n_seq,len+1); - - i=0;j=0;sapp=S; len=0; - while (!(i==l1 && j==l2)) - { - - if (*sapp==0) - { - for (b=0; b< ns[0]; b++) - char_buf[l_s[0][b]][len]=A->seq_al[l_s[0][b]][i]; - for (b=0; b< ns[1]; b++) - char_buf[l_s[1][b]][len]=A->seq_al[l_s[1][b]][j]; - i++; j++;len++; - } - else if ( *sapp>0) - { - l=*sapp; - for ( a=0; aseq_al[l_s[1][b]][j]; - } - } - else if ( *sapp<0) - { - l=-*sapp; - for ( a=0; aseq_al[l_s[0][b]][i];; - for (b=0; b< ns[1]; b++) - char_buf[l_s[1][b]][len]='-'; - } - } - - sapp++; - } - - - A->len_aln=len; - A->nseq=ns[0]+ns[1]; - - for ( a=0; a< ns[0]; a++){char_buf[l_s[0][a]][len]='\0'; sprintf ( A->seq_al[l_s[0][a]], "%s", char_buf[l_s[0][a]]);} - for ( a=0; a< ns[1]; a++){char_buf[l_s[1][a]][len]='\0'; sprintf ( A->seq_al[l_s[1][a]], "%s", char_buf[l_s[1][a]]);} - - - vfree (S); - free_char ( char_buf, -1); - l1=strlen (A->seq_al[l_s[0][0]]); - l2=strlen (A->seq_al[l_s[1][0]]); - if ( l1!=l2) exit(1); - - free_int (pos, -1); - return score; - } - - -int diff (Alignment *A, int *ns, int **l_s, int s1, int M,int s2, int N , int tb, int te, Constraint_list *CL, int **pos) - { - static int *CC; - static int *DD; - /* Forward cost-only vectors */ - static int *RR; - static int *SS; - /* Reverse cost-only vectors */ - int midi, midj, type; /* Midpoint, type, and cost */ - int midc; - -/*TREATMENT OF THE TERMINAL GAP PENALTIES*/ -/*TG_MODE=0---> gop and gep*/ -/*TG_MODE=1---> --- gep*/ -/*TG_MODE=2---> --- ---*/ - - - - - - - - if ( !CC) - { - int L; - L=M+N+1; - - CC=vcalloc (L, sizeof (int)); - DD=vcalloc (L, sizeof (int)); - RR=vcalloc (L, sizeof (int)); - SS=vcalloc (L, sizeof (int)); - } - - if ( A==NULL) - { - vfree(CC); - vfree(DD); - vfree(RR); - vfree(SS); - CC=DD=RR=SS=NULL; - return 0; - } - - { - int i, j; - int c, e, d, s; - int t, g,h,m; - - - - g=CL->gop*SCORE_K; - h=CL->gep*SCORE_K; - m=g+h; - - if (N <= 0){if (M > 0) DEL(M);return gap(M);} - if (M <= 1) - { - - if (M <= 0) - {INS(N); - return gap(N); - } - - - if (tb > te) tb = te; - midc = (tb+h) + gap(N); - midj = 0; - - for (j = 1; j <= N; j++) - { - - c = gap(j-1) +(CL->get_dp_cost) (A, pos, ns[0], l_s[0],s1, pos, ns[1], l_s[1],j-1+s2,CL)+ gap(N-j); - - if (c > midc) - { midc = c; - midj = j; - } - } - if (midj == 0) - {DEL(1) INS(N)} - else - {if (midj > 1) INS(midj-1); - REP; - if (midj < N) INS(N-midj); - } - - return midc; - } -/* Divide: Find optimum midpoint (midi,midj) of cost midc */ - - - midi = M/2; /* Forward phase: */ - CC[0] = 0; /* Compute C(M/2,k) & D(M/2,k) for all k */ - t = tb; - for (j = 1; j <= N; j++) - { CC[j] = t = t+h; - DD[j] = t+g; - } - t = tb; - for (i = 1; i <= midi; i++) - { - s = CC[0]; - CC[0] = c = t = t+h; - e = t+g; - - for (j = 1; j <= N; j++) - { - - - if ((c = c + m) > (e = e + h)) e = c; - if ((c = CC[j] + m) > (d = DD[j] + h)) d = c; - - c = s + (CL->get_dp_cost) (A, pos, ns[0], l_s[0],i-1+s1, pos, ns[1], l_s[1],j-1+s2,CL); - - if (e > c) c = e; - if (d > c) c = d; - - s = CC[j]; - CC[j] = c; - DD[j] = d; - } - } - DD[0] = CC[0]; - - RR[N] = 0; /* Reverse phase: */ - t = te; - - - for (j = N-1; j >= 0; j--) - { RR[j] = t = t+h; - SS[j] = t+g; - } - t = te; - for (i = M-1; i >= midi; i--) - { s = RR[N]; - RR[N] = c = t = t+h; - e = t+g; - for (j = N-1; j >= 0; j--) - { - if ((c = c + m) > (e = e + h)) e = c; - if ((c = RR[j] + m) > (d = SS[j] + h)) d = c; - - c = s + (CL->get_dp_cost) (A, pos, ns[0], l_s[0],i+s1, pos, ns[1], l_s[1],j+s2,CL); - - if (e > c) c = e; - if (d > c) c = d; - s = RR[j]; - RR[j] = c; - SS[j] = d; - - } - } - SS[N] = RR[N]; - midc = CC[0]+RR[0]; /* Find optimal midpoint */ - midj = 0; - type = 1; - for (j = 0; j <= N; j++) - if ((c = CC[j] + RR[j]) >= midc) - if (c > midc || (CC[j] != DD[j] && RR[j] == SS[j])) - { - midc = c; - midj = j; - } - for (j = N; j >= 0; j--) - if ((c = DD[j] + SS[j] - g) > midc) - {midc = c; - midj = j; - type = 2; - } - } -/* Conquer: recursively around midpoint */ - - if (type == 1) - { - - diff (A,ns, l_s, s1,midi, s2, midj, tb, CL->gop*SCORE_K, CL, pos); - diff (A,ns, l_s, s1+midi,M-midi, s2+midj, N-midj, CL->gop*SCORE_K,te, CL, pos); - } - else - { - diff (A,ns, l_s, s1,midi-1, s2, midj, tb,0, CL, pos); - DEL(2); - diff (A,ns, l_s, s1+midi+1, M-midi-1,s2+midj, N-midj,0,te, CL, pos); - } - return midc; - } - - - - - - - - - - - - - - - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centre National de la Recherche Scientifique (CNRS) */ -/*and */ -/*Cedric Notredame */ -/*Wed Sep 21 19:11:38 2005. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_dp_sim.c b/binaries/src/tcoffee/t_coffee_source/util_dp_sim.c deleted file mode 100644 index 2a20b50..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_dp_sim.c +++ /dev/null @@ -1,1163 +0,0 @@ -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" -/* extern char name0[], name1[]; */ -/* extern int match, mismh; */ - - - -static Constraint_list *CL; -static int * ns; -static int **l_s; -static Alignment *Aln; -static int **pos; -static int *seqc0, *seqc1; -static int min0,min1,max0,max1,mins; - -static void* sim_vcalloc( size_t nobj, size_t size); -static void sim_free_all (); -static int sim_reset_static_variable (); -static int big_pass(int *A,int *B,int M,int N,int K, int nseq) ; -static int locate(int *A,int *B,int nseq); -static int small_pass(int *A,int *B,int count,int nseq); -static int no_cross (); -static int diff_sim( int *A,int *B,int M,int N,int tb,int te); -int calcons(int *aa0,int n0,int *aa1,int n1,int *res,int *nc,int *nident, Alignment *A, int *ns, int **l_s, Constraint_list *CL); - - - -#define SIM_GAP -1 -#define min(x,y) ((x)<=(y) ? (x) : (y)) -//#define TC_SCORE_SIM(x,y) TC_SCORE (x,y) - -static int q, r; /* gap penalties */ -static int qr; /* qr = q + r */ - - - -typedef struct ONE { int COL ; struct ONE *NEXT ;} pair, *pairptr; -pairptr *row, z, z1; /* for saving used aligned pairs */ - - -#define PAIRNULL (pairptr)NULL -static int tt; - -typedef struct SIM_NODE - { int SIM_SCORE; - int SIM_STARI; - int SIM_STARJ; - int SIM_ENDI; - int SIM_ENDJ; - int SIM_TOP; - int SIM_BOT; - int SIM_LEFT; - int SIM_RIGHT; } vertex, -#ifdef FAR_PTR - far *vertexptr; -#else - *vertexptr; -#endif - -vertexptr *LIST; /* an array for saving k best scores */ -vertexptr low = 0; /* lowest score node in LIST */ -vertexptr most = 0; /* latestly accessed node in LIST */ -static int numnode; /* the number of nodes in LIST */ - -static int *CC, *DD; /* saving matrix scores */ -static int *RR, *SS, *EE, *FF; /* saving start-points */ -static int *HH, *WW; /* saving matrix scores */ -static int *II, *JJ, *XX, *YY; /* saving start-points */ -static int m1, mm, n1, nn; /* boundaries of recomputed area */ -static int rl, cl; /* left and top boundaries */ -static int lmin; /* minimum score in LIST */ -static int flag; /* indicate if recomputation necessary*/ - -/* DIAG() assigns value to x if (ii,jj) is never used before */ -#define DIAG(ii, jj, x, value) \ -{ for ( tt = 1, z = row[(ii)]; z != PAIRNULL; z = z->NEXT ) \ - if ( z->COL == (jj) ) \ - { tt = 0; break; } \ - if ( tt ) \ - x = ( value ); \ -} - -/* replace (ss1, xx1, yy1) by (ss2, xx2, yy2) if the latter is large */ -#define ORDER(ss1, xx1, yy1, ss2, xx2, yy2) \ -{ if ( ss1 < ss2 ) \ - { ss1 = ss2; xx1 = xx2; yy1 = yy2; } \ - else \ - if ( ss1 == ss2 ) \ - { if ( xx1 < xx2 ) \ - { xx1 = xx2; yy1 = yy2; } \ - else \ - if ( xx1 == xx2 && yy1 < yy2 ) \ - yy1 = yy2; \ - } \ -} - -/* The following definitions are for function diff() */ - -static int zero = 0; /* int type zero */ -#define gap(k) ((k) <= 0 ? 0 : q+r*(k)) /* k-symbol indel score */ - -static int *sapp; /* Current script append ptr */ -static int last; /* Last script op appended */ - -static int I, J; /* current positions of A ,B */ -static int no_mat; /* number of matches */ -static int no_mis; /* number of mismatches */ -static int al_len; /* length of alignment */ - /* Append "Delete k" op */ -#define DEL(k) \ -{ I += k;\ - al_len += k;\ - if (last < 0)\ - last = sapp[-1] -= (k);\ - else\ - last = *sapp++ = -(k);\ -} - /* Append "Insert k" op */ -#define INS(k) \ -{ J += k;\ - al_len += k;\ - if (last < 0)\ - { sapp[-1] = (k); *sapp++ = last; } \ - else\ - last = *sapp++ = (k);\ -} - - /* Append "Replace" op */ -#define REP \ -{ last = *sapp++ = 0;\ - al_len += 1;\ -} - - -/* -int sim_pair_wise_lalign (Alignment *in_A, int *in_ns, int **in_l_s,Constraint_list *in_CL) -{ - if ( in_ns[0]==1 && in_ns[1]==1) - return sim_pair_wise_lalign (in_A, in_ns, in_l_s,in_CL); - else - */ - - - - - -int sim_pair_wise_lalign (Alignment *in_A, int *in_ns, int **in_l_s,Constraint_list *in_CL) -/* SIM(A,B,M,N,K,V,Q,R) reports K best non-intersecting alignments of - the segments of A and B in order of similarity scores, where - V[a][b] is the score of aligning a and b, and -(Q+R*i) is the score - of an i-symbol indel. -*/ -{ - int endi, endj, stari, starj; /* endpoint and startpoint */ - int score; /* the max score in LIST */ - int count; /* maximum size of list */ - int i; - int *S; /* saving operations for diff */ - int nc, nident; /* for display */ - vertexptr cur; /* temporary pointer */ - vertexptr findmax(); /* return the largest score node */ - double percent; - int t1, t2, g1, g2, r1, r2; - int a, b, c, d, e; -/*cedric was here 11/2/99*/ - int CEDRIC_MAX_N_ALN=999; - int CEDRIC_THRESHOLD=50; - int *A, *B; - int M, N, K, maxl; - int nseq; - int R, Q; - Alignment *DA; - - - DA=in_A; - - Aln=copy_aln (in_A, NULL); - - - - l_s=in_l_s; - ns=in_ns; - CL=in_CL; - K=CL->lalign_n_top; - - M=strlen (Aln->seq_al[l_s[0][0]]); - N=strlen (Aln->seq_al[l_s[1][0]]); - maxl=M+N+1; - - pos=aln2pos_simple (Aln,-1, ns, l_s); - - seqc0=(int*)sim_vcalloc (maxl,sizeof (int)); - A=(int*)sim_vcalloc (maxl,sizeof (int)); - for ( a=0; agop, -CL->gop)*SCORE_K; - R=MAX(CL->gep, -CL->gep)*SCORE_K; - - - - if ( K==CEDRIC_MAX_N_ALN)K--; - else if ( K<0) - { - - CEDRIC_THRESHOLD=-K; - K=CEDRIC_MAX_N_ALN; - } - - /* allocate space for all vectors */ - - CC = ( int * ) sim_vcalloc(N+1, sizeof(int)); - DD = ( int * ) sim_vcalloc(N+1, sizeof(int)); - RR = ( int * ) sim_vcalloc(N+1, sizeof(int)); - SS = ( int * ) sim_vcalloc(N+1, sizeof(int)); - EE = ( int * ) sim_vcalloc(N+1, sizeof(int)); - FF = ( int * ) sim_vcalloc(N+1, sizeof(int)); - - HH = ( int * ) sim_vcalloc(M + 1, sizeof(int)); - WW = ( int * ) sim_vcalloc(M + 1, sizeof(int)); - II = ( int * ) sim_vcalloc(M + 1, sizeof(int)); - JJ = ( int * ) sim_vcalloc(M + 1, sizeof(int)); - XX = ( int * ) sim_vcalloc(M + 1, sizeof(int)); - YY = ( int * ) sim_vcalloc(M + 1, sizeof(int)); - S = ( int * ) sim_vcalloc(min(M,N)*5/4+1, sizeof (int)); - row = ( pairptr * ) sim_vcalloc( (M + 1), sizeof(pairptr)); - - - /* set up list for each row */ - if (nseq == 2) for ( i = 1; i <= M; i++ ) row[i]= PAIRNULL; - else { - z = ( pairptr )sim_vcalloc (M,(int)sizeof(pair)); - for ( i = 1; i <= M; i++,z++) { - row[i] = z; - z->COL = i; - z->NEXT = PAIRNULL; - } - } - - - q = Q; - r = R; - qr = q + r; - - LIST = ( vertexptr * ) sim_vcalloc( K, sizeof(vertexptr)); - for ( i = 0; i < K ; i++ ) - LIST[i] = ( vertexptr )sim_vcalloc( 1, sizeof(vertex)); - - - numnode = lmin = 0; - big_pass(A,B,M,N,K,nseq); - - - - /* Report the K best alignments one by one. After each alignment is - output, recompute part of the matrix. First determine the size - of the area to be recomputed, then do the recomputation */ - - - for ( count = K - 1; count >= 0; count-- ) - { if ( numnode == 0 ) - { - - padd_aln (in_A); - /*fatal("The number of alignments computed is too large");*/ - sim_free_all(); - return 1; - } - - cur = findmax(); /* Return a pointer to a node with max score*/ - score = cur->SIM_SCORE; - if ( K==CEDRIC_MAX_N_ALN && scoreSIM_STARI; - starj = ++cur->SIM_STARJ; - endi = cur->SIM_ENDI; - endj = cur->SIM_ENDJ; - m1 = cur->SIM_TOP; - mm = cur->SIM_BOT; - n1 = cur->SIM_LEFT; - nn = cur->SIM_RIGHT; - rl = endi - stari + 1; - cl = endj - starj + 1; - I = stari - 1; - J = starj - 1; - sapp = S; - last = 0; - al_len = 0; - no_mat = 0; - no_mis = 0; - diff_sim(&A[stari]-1, &B[starj]-1,rl,cl,q,q); - - - min0 = stari; - min1 = starj; - max0 = stari+rl-1; - max1 = starj+cl-1; - calcons(A+1,M,B+1,N,S,&nc,&nident, Aln,ns, l_s, CL); - percent = (double)nident*100.0/(double)nc; - - - - /*Min0: index of the last residue before the first in a 1..N+1 numerotation*/ - - - - if (!DA->A)DA->A=copy_aln(Aln, DA->A); - DA->A=realloc_alignment (DA->A,nc+1); - - - DA=DA->A; - DA->A=NULL; - - for ( c=0; c< 2; c++) - { - for ( a=0; a< ns[c]; a++) - { - e=(c==0)?min0:min1; - for ( d=0; dorder[l_s[c][a]][1]+=1-is_gap(Aln->seq_al[l_s[c][a]][d]); - } - } - } - - - for ( t1=min0,t2=min1,a=0; aM)?0:1; - g2=(r2==SIM_GAP || r2>N)?0:1; - t1+=g1; - t2+=g2; - for (b=0; bseq_al[l_s[0][b]][a]=(g1)?Aln->seq_al[l_s[0][b]][A[t1-1]]:'-'; - for (b=0; bseq_al[l_s[1][b]][a]=(g2)?Aln->seq_al[l_s[1][b]][B[t2-1]]:'-'; - } - for (b=0; bseq_al[l_s[0][b]][a]='\0';} - for (b=0; bseq_al[l_s[1][b]][a]='\0';} - - DA->nseq=ns[0]+ns[1]; - DA->len_aln=nc; - DA->score=percent; - DA->score_aln=score; - fflush(stdout); - - - if ( count ) - { flag = 0; - locate(A,B,nseq); - if ( flag ) - small_pass(A,B,count,nseq); - } - } - padd_aln (in_A); - - sim_free_all(); - free_int (pos, -1); - free_aln (Aln); - - - - return 1; - -} -int sim_reset_static_variable () -{ - CC=DD=RR=SS=EE=FF=HH=WW=II=JJ=XX=YY=sapp=NULL; - min0=min1=max0=max1=mins=q=r=qr=tt=numnode=m1=n1=nn=rl=cl=lmin=flag=zero=last=I=J=no_mat=no_mis=al_len=0; - most=low=NULL;/*Very important: cause a bug if not reset*/ - LIST=NULL; /*Very important: cause a bug if not reset*/ - return 0; -} -/* A big pass to compute K best classes */ - - -int big_pass(int *A,int *B,int M,int N,int K, int nseq) -{ register int i, j; /* row and column indices */ - register int c; /* best score at current point */ - register int f; /* best score ending with insertion */ - register int d; /* best score ending with deletion */ - register int p; /* best score at (i-1, j-1) */ - register int ci, cj; /* end-point associated with c */ - register int di, dj; /* end-point associated with d */ - register int fi, fj; /* end-point associated with f */ - register int pi, pj; /* end-point associated with p */ - - int addnode(); /* function for inserting a node */ - - - /* Compute the matrix and save the top K best scores in LIST - CC : the scores of the current row - RR and EE : the starting point that leads to score CC - DD : the scores of the current row, ending with deletion - SS and FF : the starting point that leads to score DD */ - /* Initialize the 0 th row */ - for ( j = 1; j <= N ; j++ ) - { CC[j] = 0; - RR[j] = 0; - EE[j] = j; - DD[j] = - (q); - SS[j] = 0; - FF[j] = j; - } - for ( i = 1; i <= M; i++) - { c = 0; /* Initialize column 0 */ - f = - (q); - ci = fi = i; - if ( nseq == 2 ) - { p = 0; - pi = i - 1; - cj = fj = pj = 0; - } - else - { p = CC[i]; - pi = RR[i]; - pj = EE[i]; - cj = fj = i; - } - for ( j = (nseq == 2 ? 1 : (i+1)) ; j <= N ; j++ ) - { f = f - r; - c = c - qr; - ORDER(f, fi, fj, c, ci, cj) - c = CC[j] - qr; - ci = RR[j]; - cj = EE[j]; - d = DD[j] - r; - di = SS[j]; - dj = FF[j]; - ORDER(d, di, dj, c, ci, cj) - c = 0; - - DIAG(i, j, c, p+TC_SCORE(A[i-1],B[j-1])) /* diagonal */ - - if ( c <= 0 ) - { c = 0; ci = i; cj = j; } - else - { ci = pi; cj = pj; } - ORDER(c, ci, cj, d, di, dj) - ORDER(c, ci, cj, f, fi, fj) - p = CC[j]; - CC[j] = c; - pi = RR[j]; - pj = EE[j]; - RR[j] = ci; - EE[j] = cj; - DD[j] = d; - SS[j] = di; - FF[j] = dj; - if ( c > lmin ) /* add the score into list */ - lmin = addnode(c, ci, cj, i, j, K, lmin); - } - } -return 1; -} - -/* Determine the left and top boundaries of the recomputed area */ - -int locate(int *A,int *B,int nseq) -{ register int i, j; /* row and column indices */ - register int c; /* best score at current point */ - register int f; /* best score ending with insertion */ - register int d; /* best score ending with deletion */ - register int p; /* best score at (i-1, j-1) */ - register int ci, cj; /* end-point associated with c */ - register int di=0, dj=0; /* end-point associated with d */ - register int fi, fj; /* end-point associated with f */ - register int pi, pj; /* end-point associated with p */ - int cflag, rflag; /* for recomputation */ - int addnode(); /* function for inserting a node */ - int limit; /* the bound on j */ - - /* Reverse pass - rows - CC : the scores on the current row - RR and EE : the endpoints that lead to CC - DD : the deletion scores - SS and FF : the endpoints that lead to DD - - columns - HH : the scores on the current columns - II and JJ : the endpoints that lead to HH - WW : the deletion scores - XX and YY : the endpoints that lead to WW - */ - for ( j = nn; j >= n1 ; j-- ) - { CC[j] = 0; - EE[j] = j; - DD[j] = - (q); - FF[j] = j; - if ( nseq == 2 || j > mm ) - RR[j] = SS[j] = mm + 1; - else - RR[j] = SS[j] = j; - } - - for ( i = mm; i >= m1; i-- ) - { c = p = 0; - f = - (q); - ci = fi = i; - pi = i + 1; - cj = fj = pj = nn + 1; - - if ( nseq == 2 || n1 > i ) - limit = n1; - else - limit = i + 1; - for ( j = nn; j >= limit ; j-- ) - { f = f - r; - c = c - qr; - ORDER(f, fi, fj, c, ci, cj) - c = CC[j] - qr; - ci = RR[j]; - cj = EE[j]; - d = DD[j] - r; - di = SS[j]; - dj = FF[j]; - ORDER(d, di, dj, c, ci, cj) - c = 0; - DIAG(i, j, c, p+TC_SCORE(A[i-1],B[j-1])) /* diagonal */ - - if ( c <= 0 ) - { c = 0; ci = i; cj = j; } - else - { ci = pi; cj = pj; } - ORDER(c, ci, cj, d, di, dj) - ORDER(c, ci, cj, f, fi, fj) - p = CC[j]; - CC[j] = c; - pi = RR[j]; - pj = EE[j]; - RR[j] = ci; - EE[j] = cj; - DD[j] = d; - SS[j] = di; - FF[j] = dj; - if ( c > lmin ) - flag = 1; - } - if ( nseq == 2 || i < n1 ) - { HH[i] = CC[n1]; - II[i] = RR[n1]; - JJ[i] = EE[n1]; - WW[i] = DD[n1]; - XX[i] = SS[n1]; - YY[i] = FF[n1]; - } - } - - for ( rl = m1, cl = n1; ; ) - { for ( rflag = cflag = 1; ( rflag && m1 > 1 ) || ( cflag && n1 > 1 ) ; ) - { if ( rflag && m1 > 1 ) /* Compute one row */ - { rflag = 0; - m1--; - c = p = 0; - f = - (q); - ci = fi = m1; - pi = m1 + 1; - cj = fj = pj = nn + 1; - - for ( j = nn; j >= n1 ; j-- ) - { f = f - r; - c = c - qr; - ORDER(f, fi, fj, c, ci, cj) - c = CC[j] - qr; - ci = RR[j]; - cj = EE[j]; - d = DD[j] - r; - di = SS[j]; - dj = FF[j]; - ORDER(d, di, dj, c, ci, cj) - c = 0; - DIAG(m1, j, c, TC_SCORE(A[m1-1],B[j-1])) /* diagonal */ - - if ( c <= 0 ) - { c = 0; ci = m1; cj = j; } - else - { ci = pi; cj = pj; } - ORDER(c, ci, cj, d, di, dj) - ORDER(c, ci, cj, f, fi, fj) - p = CC[j]; - CC[j] = c; - pi = RR[j]; - pj = EE[j]; - RR[j] = ci; - EE[j] = cj; - DD[j] = d; - SS[j] = di; - FF[j] = dj; - if ( c > lmin ) - flag = 1; - if ( ! rflag && ( (ci > rl && cj > cl) || (di > rl && dj > cl) - || (fi > rl && fj > cl) ) ) - rflag = 1; - } - HH[m1] = CC[n1]; - II[m1] = RR[n1]; - JJ[m1] = EE[n1]; - WW[m1] = DD[n1]; - XX[m1] = SS[n1]; - YY[m1] = FF[n1]; - if ( ! cflag && ( (ci > rl && cj > cl) || (di > rl && dj > cl) - || (fi > rl && fj > cl )) ) - cflag = 1; - } - - if ( nseq == 1 && n1 == (m1 + 1) && ! rflag ) - cflag = 0; - if ( cflag && n1 > 1 ) /* Compute one column */ - { cflag = 0; - n1--; - c = 0; - f = - (q); - cj = fj = n1; - if ( nseq == 2 || mm < n1 ) - { p = 0; - ci = fi = pi = mm + 1; - pj = n1 + 1; - limit = mm; - } - else - { p = HH[n1]; - pi = II[n1]; - pj = JJ[n1]; - ci = fi = n1; - limit = n1 - 1; - } - for ( i = limit; i >= m1 ; i-- ) - { f = f - r; - c = c - qr; - ORDER(f, fi, fj, c, ci, cj) - c = HH[i] - qr; - ci = II[i]; - cj = JJ[i]; - d = WW[i] - r; - di = XX[i]; - dj = YY[i]; - ORDER(d, di, dj, c, ci, cj) - c = 0; - DIAG(i, n1, c, p+TC_SCORE(A[i-1], B[n1-1])) - - - - if ( c <= 0 ) - { c = 0; ci = i; cj = n1; } - else - { ci = pi; cj = pj; } - ORDER(c, ci, cj, d, di, dj) - ORDER(c, ci, cj, f, fi, fj) - p = HH[i]; - HH[i] = c; - pi = II[i]; - pj = JJ[i]; - II[i] = ci; - JJ[i] = cj; - WW[i] = d; - XX[i] = di; - YY[i] = dj; - if ( c > lmin ) - flag = 1; - if ( ! cflag && ( (ci > rl && cj > cl) || (di > rl && dj > cl) - || (fi > rl && fj > cl )) ) - cflag = 1; - } - CC[n1] = HH[m1]; - RR[n1] = II[m1]; - EE[n1] = JJ[m1]; - DD[n1] = WW[m1]; - SS[n1] = XX[m1]; - FF[n1] = YY[m1]; - if ( ! rflag && ( (ci > rl && cj > cl) || (di > rl && dj > cl) - || (fi > rl && fj > cl) ) ) - rflag = 1; - } - } - if ( (m1 == 1 && n1 == 1) || no_cross() ) - break; - } - m1--; - n1--; -return 1; -} - -/* recompute the area on forward pass */ -int small_pass(int *A,int *B,int count,int nseq) -{ register int i, j; /* row and column indices */ - register int c; /* best score at current point */ - register int f; /* best score ending with insertion */ - register int d; /* best score ending with deletion */ - register int p; /* best score at (i-1, j-1) */ - register int ci, cj; /* end-point associated with c */ - register int di, dj; /* end-point associated with d */ - register int fi, fj; /* end-point associated with f */ - register int pi, pj; /* end-point associated with p */ - int addnode(); /* function for inserting a node */ - int limit; /* lower bound on j */ - - for ( j = n1 + 1; j <= nn ; j++ ) - { CC[j] = 0; - RR[j] = m1; - EE[j] = j; - DD[j] = - (q); - SS[j] = m1; - FF[j] = j; - } - for ( i = m1 + 1; i <= mm; i++) - { c = 0; /* Initialize column 0 */ - f = - (q); - ci = fi = i; - - if ( nseq == 2 || i <= n1 ) - { p = 0; - pi = i - 1; - cj = fj = pj = n1; - limit = n1 + 1; - } - else - { p = CC[i]; - pi = RR[i]; - pj = EE[i]; - cj = fj = i; - limit = i + 1; - } - for ( j = limit ; j <= nn ; j++ ) - { f = f - r; - c = c - qr; - ORDER(f, fi, fj, c, ci, cj) - c = CC[j] - qr; - ci = RR[j]; - cj = EE[j]; - d = DD[j] - r; - di = SS[j]; - dj = FF[j]; - ORDER(d, di, dj, c, ci, cj) - c = 0; - DIAG(i, j, c, p+TC_SCORE(A[i-1], B[j-1])) /* diagonal */ - //checked - - if ( c <= 0 ) - { c = 0; ci = i; cj = j; } - else - { ci = pi; cj = pj; } - ORDER(c, ci, cj, d, di, dj) - ORDER(c, ci, cj, f, fi, fj) - p = CC[j]; - CC[j] = c; - pi = RR[j]; - pj = EE[j]; - RR[j] = ci; - EE[j] = cj; - DD[j] = d; - SS[j] = di; - FF[j] = dj; - if ( c > lmin ) /* add the score into list */ - lmin = addnode(c, ci, cj, i, j, count, lmin); - } - } -return 1; -} - -/* Add a new node into list. */ - -int addnode(c, ci, cj, i, j, K, cost) int c, ci, cj, i, j, K, cost; -{ int found; /* 1 if the node is in LIST */ - register int d; - - found = 0; - if ( most != 0 && most->SIM_STARI == ci && most->SIM_STARJ == cj ) - found = 1; - else - for ( d = 0; d < numnode ; d++ ) - { most = LIST[d]; - if ( most->SIM_STARI == ci && most->SIM_STARJ == cj ) - { found = 1; - break; - } - } - if ( found ) - { if ( most->SIM_SCORE < c ) - { most->SIM_SCORE = c; - most->SIM_ENDI = i; - most->SIM_ENDJ = j; - } - if ( most->SIM_TOP > i ) most->SIM_TOP = i; - if ( most->SIM_BOT < i ) most->SIM_BOT = i; - if ( most->SIM_LEFT > j ) most->SIM_LEFT = j; - if ( most->SIM_RIGHT < j ) most->SIM_RIGHT = j; - } - else - { if ( numnode == K ) /* list full */ - most = low; - else - most = LIST[numnode++]; - most->SIM_SCORE = c; - most->SIM_STARI = ci; - most->SIM_STARJ = cj; - most->SIM_ENDI = i; - most->SIM_ENDJ = j; - most->SIM_TOP = most->SIM_BOT = i; - most->SIM_LEFT = most->SIM_RIGHT = j; - } - if ( numnode == K ) - { if ( low == most || ! low ) - { for ( low = LIST[0], d = 1; d < numnode ; d++ ) - if ( LIST[d]->SIM_SCORE < low->SIM_SCORE ) - low = LIST[d]; - } - return ( low->SIM_SCORE ) ; - } - else - return cost; -} - -/* Find and remove the largest score in list */ - -vertexptr findmax() -{ vertexptr cur; - register int i, j; - - for ( j = 0, i = 1; i < numnode ; i++ ) - if ( LIST[i]->SIM_SCORE > LIST[j]->SIM_SCORE ) - j = i; - cur = LIST[j]; - if ( j != --numnode ) - { LIST[j] = LIST[numnode]; - LIST[numnode] = cur; - } - most = LIST[0]; - if ( low == cur ) low = LIST[0]; - return ( cur ); -} - -/* return 1 if no node in LIST share vertices with the area */ - -int no_cross() -{ vertexptr cur; - register int i; - - for ( i = 0; i < numnode; i++ ) - { cur = LIST[i]; - if ( cur->SIM_STARI <= mm && cur->SIM_STARJ <= nn && cur->SIM_BOT >= m1-1 && - cur->SIM_RIGHT >= n1-1 && ( cur->SIM_STARI < rl || cur->SIM_STARJ < cl )) - { if ( cur->SIM_STARI < rl ) rl = cur->SIM_STARI; - if ( cur->SIM_STARJ < cl ) cl = cur->SIM_STARJ; - flag = 1; - break; - } - } - if ( i == numnode ) - return 1; - else - return 0; -} - -/* diff(A,B,M,N,tb,te) returns the score of an optimum conversion between - A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero - and appends such a conversion to the current script. */ - -int diff_sim( int *A,int *B,int M,int N,int tb,int te) - -{ int midi, midj, type; /* Midpoint, type, and cost */ - int midc; - - { - register int i, j; - register int c, e, d, s; - int t; - - - /* Boundary cases: M <= 1 or N == 0 */ - - if (N <= 0) - { if (M > 0) DEL(M) - return - gap(M); - } - if (M <= 1) - { if (M <= 0) - { INS(N); - return - gap(N); - } - if (tb > te) tb = te; - midc = - (tb + r + gap(N) ); - midj = 0; - - for (j = 1; j <= N; j++) - { for ( tt = 1, z = row[I+1]; z != PAIRNULL; z = z->NEXT ) - if ( z->COL == j+J ) - { tt = 0; break; } - if ( tt ) - { c = TC_SCORE (A[0],B[j-1]) - ( gap(j-1) + gap(N-j) ); - //checked - - if (c > midc) - { midc = c; - midj = j; - } - } - } - if (midj == 0) - { INS(N) DEL(1) } - else - { if (midj > 1) INS(midj-1) - REP - if ( A[1] == B[midj] ) - no_mat += 1; - else - no_mis += 1; - /* mark (A[I],B[J]) as used: put J into list row[I] */ - I++; J++; - - - z = ( pairptr )sim_vcalloc(1,sizeof(pair)); - z->COL = J; - z->NEXT = row[I]; - row[I] = z; - if (midj < N) INS(N-midj) - } - return midc; - } - - /* Divide: Find optimum midpoint (midi,midj) of cost midc */ - - midi = M/2; /* Forward phase: */ - CC[0] = 0; /* Compute C(M/2,k) & D(M/2,k) for all k */ - t = -q; - for (j = 1; j <= N; j++) - { CC[j] = t = t-r; - DD[j] = t-q; - } - t = -tb; - for (i = 1; i <= midi; i++) - { s = CC[0]; - CC[0] = c = t = t-r; - e = t-q; - - for (j = 1; j <= N; j++) - { if ((c = c - qr) > (e = e - r)) e = c; - if ((c = CC[j] - qr) > (d = DD[j] - r)) d = c; - DIAG(i+I, j+J, c, s+TC_SCORE(A[i-1], B[j-1])) - //checked - - if (c < d) c = d; - if (c < e) c = e; - s = CC[j]; - CC[j] = c; - DD[j] = d; - } - } - DD[0] = CC[0]; - - RR[N] = 0; /* Reverse phase: */ - t = -q; /* Compute R(M/2,k) & S(M/2,k) for all k */ - for (j = N-1; j >= 0; j--) - { RR[j] = t = t-r; - SS[j] = t-q; - } - t = -te; - for (i = M-1; i >= midi; i--) - { s = RR[N]; - RR[N] = c = t = t-r; - e = t-q; - - for (j = N-1; j >= 0; j--) - { if ((c = c - qr) > (e = e - r)) e = c; - if ((c = RR[j] - qr) > (d = SS[j] - r)) d = c; - DIAG(i+1+I, j+1+J, c, s+TC_SCORE (A[i],B[j])) /*not -1 on purpose*/ - - if (c < d) c = d; - if (c < e) c = e; - s = RR[j]; - RR[j] = c; - SS[j] = d; - } - } - SS[N] = RR[N]; - - midc = CC[0]+RR[0]; /* Find optimal midpoint */ - midj = 0; - type = 1; - for (j = 0; j <= N; j++) - if ((c = CC[j] + RR[j]) >= midc) - if (c > midc || (CC[j] != DD[j] && RR[j] == SS[j])) - { midc = c; - midj = j; - } - for (j = N; j >= 0; j--) - if ((c = DD[j] + SS[j] + q) > midc) - { midc = c; - midj = j; - type = 2; - } - } - - /* Conquer: recursively around midpoint */ - - if (type == 1) - { diff_sim(A,B,midi,midj,tb,q); - diff_sim(A+midi,B+midj,M-midi,N-midj,q,te); - } - else - { diff_sim(A,B,midi-1,midj,tb,zero); - DEL(2); - diff_sim(A+midi+1,B+midj,M-midi-1,N-midj,zero,te); - } - return midc; -} - - - - -int calcons(int *aa0,int n0,int *aa1,int n1,int *res,int *nc,int *nident, Alignment *A, int *ns, int **l_s, Constraint_list *CL) -{ - int i0, i1; - int op, nid, lenc, nd; - int *sp0, *sp1; - int *rp; - int a, b, id_col, tot_col, r0, r1; - - min0--; min1--; - - sp0 = seqc0+mins; - sp1 = seqc1+mins; - rp = res; - lenc = nid = op = 0; - i0 = min0; - i1 = min1; - - while (i0 < max0 || i1 < max1) { - if (op == 0 && *rp == 0) { - op = *rp++; - *sp0 = aa0[i0++]; - *sp1 = aa1[i1++]; - - - for (id_col=tot_col=0,a=0; a< ns[0]; a++) - for ( b=0; b< ns[1]; b++) - { - r0=Aln->seq_al[l_s[0][a]][*sp0-1]; - r1=Aln->seq_al[l_s[1][a]][*sp1-1]; - - if ( !is_gap(r0) && r1==r0)id_col++; - if ( !is_gap(r0) && !is_gap(r1))tot_col++; - } - nid+=(tot_col)?(id_col/tot_col):0; - lenc++; - sp0++; sp1++; - } - else { - if (op==0) op = *rp++; - if (op>0) { - *sp0++ = SIM_GAP; - *sp1++ = aa1[i1++]; - op--; - lenc++; - } - else { - *sp0++ = aa0[i0++]; - *sp1++ = SIM_GAP; - op++; - lenc++; - } - } - } - - *nident = nid; - *nc = lenc; - - nd = 0; - return mins+lenc+nd; -} - -/*Memory management */ -struct Mem - { - void *p; - struct Mem *next; - }; - -typedef struct Mem Mem; - -Mem *first_mem; -Mem *last_mem; - -void *sim_vcalloc ( size_t nobj, size_t size) -{ - void *p; - Mem *new_mem; - - p=vcalloc (nobj, size); - - - new_mem=vcalloc (1, sizeof (Mem)); - if ( last_mem==NULL)first_mem=last_mem=new_mem; - else - { - last_mem->next=new_mem; - last_mem=new_mem; - } - last_mem->p=p; - return p; -} - -void sim_free_all() -{ - Mem *p1, *p2; - p1=first_mem; - - - while (p1) - { - p2=p1->next; - vfree(p1->p); - vfree(p1); - p1=p2; - } - first_mem=last_mem=NULL; - sim_reset_static_variable(); -} - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_dp_ssec_pwaln.c b/binaries/src/tcoffee/t_coffee_source/util_dp_ssec_pwaln.c deleted file mode 100644 index ef8c0b0..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_dp_ssec_pwaln.c +++ /dev/null @@ -1,476 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" - -int ssec_pwaln_maln (Alignment *A, int *ns, int **ls, Constraint_list *CL) - { - - static Dp_Model *M=NULL; - Dp_Result *R=NULL; - int a, ndiag; - int Sa,Sb,St, Da, Db, Dt, Ia, Ib, It; - int ala, alb, s,b; - - a=0; - Sa=a++;Da=a++;Ia=a++; - Sb=a++;Db=a++;Ib=a++; - St=a++;Dt=a++;It=a++; - - if ( strm (CL->matrices_list[0], "analyse")) - { - for ( a=0; a< CL->n_matrices; a++) - { - - rescale_two_mat(CL->matrices_list[1],CL->matrices_list[2],1000, 100, AA_ALPHABET); - exit (0); - } - } - - - - /*2 Prepare the Model*/ - M=initialize_sseq_model(2,2,CL); - ndiag=strlen (A->seq_al[0])+strlen (A->seq_al[1])-1; - M->diag=vcalloc (ndiag+1, sizeof (int)); - M->diag[0]=ndiag-1; - for ( a=1; a<=M->diag[0]; a++)M->diag[a]=a; - - /*3 Prepare Sequence Presentation*/ - R=make_fast_generic_dp_pair_wise(A, ns, ls, M); - - - ala=alb=0; - - A=realloc_aln2(A,A->nseq, R->len+1); - for (b=1; blen;b++) - { - if (R->traceback[b]==Sa || R->traceback[b]==Sb ||R->traceback[b]==St ) - { - for (s=0; sseq_al[ls[0][s]][b-1]=(CL->S)->seq[A->order[ls[0][s]][0]][ala]; - ala++; - for (s=0; sseq_al[ls[1][s]][b-1]=(CL->S)->seq[A->order[ls[1][s]][0]][alb]; - alb++; - } - else if ( R->traceback[b]==Da || R->traceback[b]==Db ||R->traceback[b]==Dt ) - { - for (s=0; sseq_al[ls[0][s]][b-1]=(CL->S)->seq[A->order[ls[0][s]][0]][ala]; - ala++; - for (s=0; sseq_al[ls[1][s]][b-1]='-'; - } - else if ( R->traceback[b]==Ia || R->traceback[b]==Ib ||R->traceback[b]==It ) - { - for (s=0; sseq_al[ls[0][s]][b-1]='-'; - - for (s=0; sseq_al[ls[1][s]][b-1]=(CL->S)->seq[A->order[ls[1][s]][0]][alb]; - alb++; - } - } - for (s=0; sseq_al[ls[0][s]][b-1]='\0'; - for (s=0; sseq_al[ls[1][s]][b-1]='\0'; - - A->len_aln=strlen (A->seq_al[ls[0][0]]); - R->Dp_model=M; - A->Dp_result=R; - return A->score; - } - -Dp_Model * initialize_sseq_model(int left_tg_mode, int right_tg_mode, Constraint_list *CL) - { - - Dp_Model *M; - int a, b, c,d; - int Sa,Sb,St, Da, Db, Dt, Ia, Ib, It; - int tgop=CL->gep*3; - - - - - M=vcalloc ( 1, sizeof (Dp_Model)); - - M->nstate=9; - M->START=M->nstate++; - M->END =M->nstate++; - - M->model_comments=declare_char (M->nstate+1, 100); - M->bounded_model=declare_int (M->nstate+1, M->nstate+1); - M->model=declare_int (M->nstate+1, M->nstate+1); - for ( a=0; a<=M->nstate; a++) - for ( b=0; b<= M->nstate; b++) - M->model[a][b]=UNDEFINED; - - - M->model_properties=declare_int ( M->nstate, 10); - - a=0; - M->TYPE=a++;M->LEN_I=a++; M->LEN_J=a++; M->DELTA_I=a++;M->DELTA_J=a++;M->EMISSION=a++;M->TERM_EMISSION=a++;M->START_EMISSION=a++; - M->CODING0=a++;M->DELETION=a++; - M->model_properties=declare_int ( M->nstate, 10); - - a=0; - M->EMISSION=a++;M->TERM_EMISSION=a++;M->START_EMISSION=a++; - M->model_emission_function=vcalloc(M->nstate, sizeof (int (**)(Alignment*, int **, int, int*, int, int **, int, int*, int, struct Constraint_list *))); - for ( a=0; a< M->nstate; a++) - M->model_emission_function[a]=vcalloc(3, sizeof (int (*)(Alignment*, int **, int, int*, int, int **, int, int*, int, struct Constraint_list *))); - - - - a=0; - - Sa=a++;Da=a++;Ia=a++; - Sb=a++;Db=a++;Ib=a++; - St=a++;Dt=a++;It=a++; - - - sprintf ( M->model_comments[M->START], "START"); - sprintf ( M->model_comments[M->END], "END"); - - /*ALPHA*/ - /*Substitution in Alpha*/ - if (CL->matrices_list[0][0])sprintf ( M->model_comments[Sa], "Substitution %s", CL->matrices_list[0]); - M->model_properties[Sa][M->TYPE]=Sa; - M->model_properties[Sa][M->LEN_I]=1; - M->model_properties[Sa][M->LEN_J]=1; - M->model_properties[Sa][M->DELTA_I]=-1; - M->model_properties[Sa][M->DELTA_J]= 0; - - M->model_emission_function[Sa][M->EMISSION] =get_alpha_sub_cost; - M->model_emission_function[Sa][M->START_EMISSION]=get_ssec_no_cost; - M->model_emission_function[Sa][M->TERM_EMISSION] =get_ssec_no_cost; - - /*Deletions*/ - if (CL->matrices_list[0][0])sprintf ( M->model_comments[Da], "Deletion %s", CL->matrices_list[0]); - M->model_properties[Da][M->TYPE]=Da; - M->model_properties[Da][M->LEN_I]=1; - M->model_properties[Da][M->LEN_J]=0; - M->model_properties[Da][M->DELTA_I]=-1; - M->model_properties[Da][M->DELTA_J]=+1; - - - M->model_emission_function[Da][M->EMISSION] =get_alpha_gep_cost; - M->model_emission_function[Da][M->START_EMISSION]=get_alpha_start_gep_cost; - M->model_emission_function[Da][M->TERM_EMISSION] =get_alpha_term_gep_cost; - - - /*Insertion*/ - if (CL->matrices_list[0][0])sprintf ( M->model_comments[Ia], "Insertion %s", CL->matrices_list[0]); - M->model_properties[Ia][M->TYPE]=Ia; - M->model_properties[Ia][M->LEN_I]=0; - M->model_properties[Ia][M->LEN_J]=1; - M->model_properties[Ia][M->DELTA_I]=0; - M->model_properties[Ia][M->DELTA_J]=-1; - - M->model_emission_function[Ia][M->EMISSION] =get_alpha_gep_cost; - M->model_emission_function[Ia][M->START_EMISSION]=get_alpha_start_gep_cost; - M->model_emission_function[Ia][M->TERM_EMISSION] =get_alpha_term_gep_cost; - -/*BETA*/ - /*Substitution in Beta*/ - if (CL->matrices_list[1][0])sprintf ( M->model_comments[Sb], "Substitution %s", CL->matrices_list[1]); - M->model_properties[Sb][M->TYPE]=Sb; - M->model_properties[Sb][M->LEN_I]=1; - M->model_properties[Sb][M->LEN_J]=1; - M->model_properties[Sb][M->DELTA_I]=-1; - M->model_properties[Sb][M->DELTA_J]= 0; - - M->model_emission_function[Sb][M->EMISSION] =get_beta_sub_cost; - M->model_emission_function[Sb][M->START_EMISSION]=get_ssec_no_cost; - M->model_emission_function[Sb][M->TERM_EMISSION] =get_ssec_no_cost; - - - /*Deletions*/ - if (CL->matrices_list[1][0])sprintf ( M->model_comments[Db], "Deletion %s", CL->matrices_list[1]); - M->model_properties[Db][M->TYPE]=Db; - M->model_properties[Db][M->LEN_I]=1; - M->model_properties[Db][M->LEN_J]=0; - M->model_properties[Db][M->DELTA_I]=-1; - M->model_properties[Db][M->DELTA_J]=+1; - - M->model_emission_function[Db][M->EMISSION] =get_beta_gep_cost; - M->model_emission_function[Db][M->START_EMISSION]=get_beta_start_gep_cost; - M->model_emission_function[Db][M->TERM_EMISSION] =get_beta_term_gep_cost; - - - /*Insertion*/ - - if (CL->matrices_list[1][0])sprintf ( M->model_comments[Ib], "Insertion %s", CL->matrices_list[1]); - M->model_properties[Ib][M->TYPE]=Ib; - M->model_properties[Ib][M->LEN_I]=0; - M->model_properties[Ib][M->LEN_J]=1; - M->model_properties[Ib][M->DELTA_I]=0; - M->model_properties[Ib][M->DELTA_J]=-1; - - - - M->model_emission_function[Ib][M->EMISSION] =get_beta_gep_cost; - M->model_emission_function[Ib][M->START_EMISSION]=get_beta_start_gep_cost; - M->model_emission_function[Ib][M->TERM_EMISSION] =get_beta_term_gep_cost; - - /*TURNS*/ - /*Substitution in Turn*/ - if (CL->matrices_list[2][0])sprintf ( M->model_comments[St], "Substitution %s", CL->matrices_list[2]); - M->model_properties[St][M->TYPE]=St; - M->model_properties[St][M->LEN_I]=1; - M->model_properties[St][M->LEN_J]=1; - M->model_properties[St][M->DELTA_I]=-1; - M->model_properties[St][M->DELTA_J]= 0; - - M->model_emission_function[St][M->EMISSION] =get_turn_sub_cost; - M->model_emission_function[St][M->START_EMISSION]=get_ssec_no_cost; - M->model_emission_function[St][M->TERM_EMISSION] =get_ssec_no_cost; - - - /*Deletions*/ - if (CL->matrices_list[2][0])sprintf ( M->model_comments[Dt], "Deletion %s", CL->matrices_list[2]); - M->model_properties[Dt][M->TYPE]=Dt; - M->model_properties[Dt][M->LEN_I]=1; - M->model_properties[Dt][M->LEN_J]=0; - M->model_properties[Dt][M->DELTA_I]=-1; - M->model_properties[Dt][M->DELTA_J]=+1; - - M->model_emission_function[Dt][M->EMISSION] =get_turn_gep_cost; - M->model_emission_function[Dt][M->START_EMISSION]=get_turn_start_gep_cost; - M->model_emission_function[Dt][M->TERM_EMISSION] =get_turn_term_gep_cost; - /*Insertion*/ - if (CL->matrices_list[2][0])sprintf ( M->model_comments[It], "Insertion %s", CL->matrices_list[2]); - M->model_properties[It][M->TYPE]=It; - M->model_properties[It][M->LEN_I]=0; - M->model_properties[It][M->LEN_J]=1; - M->model_properties[It][M->DELTA_I]=0; - M->model_properties[It][M->DELTA_J]=-1; - - M->model_emission_function[It][M->EMISSION] =get_turn_gep_cost; - M->model_emission_function[It][M->START_EMISSION]=get_turn_start_gep_cost; - M->model_emission_function[It][M->TERM_EMISSION] =get_turn_term_gep_cost; - - -/*Transitions*/ - - M->model[M->START][Sa]=ALLOWED; - M->model[M->START][Sb]=ALLOWED; - M->model[M->START][St]=ALLOWED; - M->model[M->START][Db]=M->model[M->START][Ib]=(CL->TG_MODE==0)?CL->gop*SCORE_K:0; - M->model[M->START][Da]=M->model[M->START][Ia]=(CL->TG_MODE==0)?CL->gop*SCORE_K:0; - M->model[M->START][Dt]=M->model[M->START][It]=(CL->TG_MODE==0)?CL->gop*SCORE_K:0; - - - M->model[Sa][M->END]=ALLOWED; - M->model[Sb][M->END]=ALLOWED; - M->model[St][M->END]=ALLOWED; - M->model[Ia][M->END]=M->model[Da][M->END]=(CL->TG_MODE==0)?0:CL->gop*SCORE_K*(-1); - M->model[Ib][M->END]=M->model[Db][M->END]=(CL->TG_MODE==0)?0:CL->gop*SCORE_K*(-1); - M->model[It][M->END]=M->model[Dt][M->END]=(CL->TG_MODE==0)?0:CL->gop*SCORE_K*(-1); - - for ( a=0; a< M->nstate; a++)M->model[a][a]=ALLOWED; - - M->model[Sa][Ia]=M->model[Sa][Da]=CL->gop*SCORE_K; - M->model[Sa][Ib]=M->model[Sa][Db]=CL->gop*SCORE_K+tgop*SCORE_K; - M->model[Sa][It]=M->model[Sa][Dt]=CL->gop*SCORE_K+tgop*SCORE_K; - M->model[Sa][Sb]=M->model[Sa][St]=tgop*SCORE_K; - - M->model[Sb][Ib]=M->model[Sb][Db]=CL->gop*SCORE_K; - M->model[Sb][Ia]=M->model[Sb][Da]=CL->gop*SCORE_K+tgop*SCORE_K; - M->model[Sb][It]=M->model[Sb][Dt]=CL->gop*SCORE_K+tgop*SCORE_K; - M->model[Sb][Sa]=M->model[Sb][St]=tgop*SCORE_K; - - M->model[St][It]=M->model[St][Dt]=CL->gop*SCORE_K; - M->model[St][Ia]=M->model[St][Da]=CL->gop*SCORE_K+tgop*SCORE_K; - M->model[St][Ib]=M->model[St][Db]=CL->gop*SCORE_K+tgop*SCORE_K; - M->model[St][Sa]=M->model[St][Sb]=tgop*SCORE_K; - - M->model[Ia][Sa]=M->model[Da][Sa]=ALLOWED; - M->model[Ia][Sb]=M->model[Da][Sb]=tgop*SCORE_K; - M->model[Ia][St]=M->model[Da][St]=tgop*SCORE_K; - - M->model[Ib][Sa]=M->model[Db][Sa]=tgop*SCORE_K; - M->model[Ib][Sb]=M->model[Db][Sb]=ALLOWED; - M->model[Ib][St]=M->model[Db][St]=tgop*SCORE_K; - - M->model[It][Sa]=M->model[Dt][Sa]=tgop*SCORE_K; - M->model[It][Sb]=M->model[Dt][Sb]=tgop*SCORE_K; - M->model[It][St]=M->model[Dt][St]=ALLOWED; - - - - /*Prune the model*/ - - for (c=0,a=0, d=0; a< M->START; a++) - for ( b=0; bSTART; b++, d++) - { - if (M->model[a][b]!=UNDEFINED) - { - M->bounded_model[b][1+M->bounded_model[b][0]++]=a; - c++; - } - } - M->CL=CL; - - return M; - } - - - - - -int get_alpha_sub_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - static int **mat; - int s1, r1, s2, r2; - float score; - - - if (!mat && CL->matrices_list[0][0])mat=read_matrice (CL->matrices_list[0]); - else if ( !CL->matrices_list[0][0])return UNDEFINED; - - - - - s1=A->order[list1[0]][0]; - r1=pos1[list1[0]][col1]; - s2=A->order[list2[0]][0]; - r2=pos1[list2[0]][col2]; - - if ( r1<0 || r2<0)return 0; - - score=mat[(CL->S)->seq[s1][r1-1]-'A'][(CL->S)->seq[s2][r2-1]-'A']*SCORE_K; - return (int)score; - -} -int get_beta_sub_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - static int **mat; - int s1, r1, s2, r2; - float score; - - if (!mat && CL->matrices_list[1][0])mat=read_matrice (CL->matrices_list[1]); - else if ( !CL->matrices_list[1][0])return UNDEFINED; - - - - s1=A->order[list1[0]][0]; - r1=pos1[list1[0]][col1]; - s2=A->order[list2[0]][0]; - r2=pos1[list2[0]][col2]; - if ( r1<0 || r2<0)return 0; - - score=mat[(CL->S)->seq[s1][r1-1]-'A'][(CL->S)->seq[s2][r2-1]-'A']*SCORE_K; - return (int)score; - -} -int get_turn_sub_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - static int **mat; - int s1, r1, s2, r2; - float score; - - - - if (!mat && CL->matrices_list[2][0])mat=read_matrice (CL->matrices_list[2]); - else if ( !CL->matrices_list[2][0])return UNDEFINED; - - - s1=A->order[list1[0]][0]; - r1=pos1[list1[0]][col1]; - s2=A->order[list2[0]][0]; - r2=pos1[list2[0]][col2]; - - - if ( r1<0 || r2<0)return 0; - score=mat[(CL->S)->seq[s1][r1-1]-'A'][(CL->S)->seq[s2][r2-1]-'A']*SCORE_K; - return (int)score; - -} - -int get_turn_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - return (CL->gep) *SCORE_K; -} -int get_turn_start_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - return ((CL->TG_MODE)==2)?0:get_turn_gep_cost(A,pos1, ns1, list1, col1, pos2, ns2, list2, col2, CL); -} -int get_turn_term_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - return ((CL->TG_MODE)==2)?-get_turn_gep_cost(A,pos1, ns1, list1, col1, pos2, ns2, list2, col2, CL):0; -} - - -int get_alpha_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - return (CL->gep)*SCORE_K; -} -int get_alpha_start_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - return ((CL->TG_MODE)==2)?0:get_alpha_gep_cost(A,pos1, ns1, list1, col1, pos2, ns2, list2, col2, CL); -} -int get_alpha_term_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - return ((CL->TG_MODE)==2)?-get_alpha_gep_cost(A,pos1, ns1, list1, col1, pos2, ns2, list2, col2, CL):0; -} - - -int get_beta_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - return (CL->gep)*SCORE_K; -} -int get_beta_start_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - return ((CL->TG_MODE)==2)?0:get_beta_gep_cost(A,pos1, ns1, list1, col1, pos2, ns2, list2, col2, CL); -} -int get_beta_term_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - return ((CL->TG_MODE)==2)?-get_beta_gep_cost(A,pos1, ns1, list1, col1, pos2, ns2, list2, col2, CL):0; -} - - -int get_ssec_no_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) -{ - return 0; -} - - - - -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_dp_suboptimal_nw.c b/binaries/src/tcoffee/t_coffee_source/util_dp_suboptimal_nw.c deleted file mode 100644 index acb4b79..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_dp_suboptimal_nw.c +++ /dev/null @@ -1,1655 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" - - - - -//Values as provided in Probcons V1.1 -static float EXP_UNDERFLOW_THRESHOLD = -4.60f; -static float LOG_UNDERFLOW_THRESHOLD = 7.50f; -//static float LOG_ZERO = -FLT_MAX; -static float LOG_ZERO=-200000004008175468544.000000; -static float LOG_ONE = 0.0f; - -//Probabilistic Alignment DNA - - - - -static float DNAinitDistrib2Default[] = { 0.9615409374f, 0.0000004538f, 0.0000004538f, 0.0192291681f, 0.0192291681f }; -static float DNAgapOpen2Default[] = { 0.0082473317f, 0.0082473317f, 0.0107844425f, 0.0107844425f }; -static float DNAgapExtend2Default[] = { 0.3210460842f, 0.3210460842f, 0.3298229277f, 0.3298229277f }; - -static char DNAalphabetDefault[] = "ACGUTN"; -static float DNAemitSingleDefault[6] = {0.2270790040f, 0.2422080040f, 0.2839320004f, 0.2464679927f, 0.2464679927f, 0.0003124650f}; - -static float DNAemitPairsDefault[6][6] = { - { 0.1487240046f, 0.0184142999f, 0.0361397006f, 0.0238473993f, 0.0238473993f, 0.0000375308f }, - { 0.0184142999f, 0.1583919972f, 0.0275536999f, 0.0389291011f, 0.0389291011f, 0.0000815823f }, - { 0.0361397006f, 0.0275536999f, 0.1979320049f, 0.0244289003f, 0.0244289003f, 0.0000824765f }, - { 0.0238473993f, 0.0389291011f, 0.0244289003f, 0.1557479948f, 0.1557479948f, 0.0000743985f }, - { 0.0238473993f, 0.0389291011f, 0.0244289003f, 0.1557479948f, 0.1557479948f, 0.0000743985f }, - { 0.0000375308f, 0.0000815823f, 0.0000824765f, 0.0000743985f, 0.0000743985f, 0.0000263252f } -}; - -//Probabilistic ALignment Blosum62mt - - - - -static float initDistrib2Default[] = { 0.6814756989f, 8.615339902e-05f, 8.615339902e-05f, 0.1591759622f, 0.1591759622 }; -static float gapOpen2Default[] = { 0.0119511066f, 0.0119511066f, 0.008008334786f, 0.008008334786 }; -static float gapExtend2Default[] = { 0.3965826333f, 0.3965826333f, 0.8988758326f, 0.8988758326 }; - -static char alphabetDefault[] = "ARNDCQEGHILKMFPSTWYV"; -static float emitSingleDefault[20] = { - 0.07831005f, 0.05246024f, 0.04433257f, 0.05130349f, 0.02189704f, - 0.03585766f, 0.05615771f, 0.07783433f, 0.02601093f, 0.06511648f, - 0.09716489f, 0.05877077f, 0.02438117f, 0.04463228f, 0.03940142f, - 0.05849916f, 0.05115306f, 0.01203523f, 0.03124726f, 0.07343426f -}; - -static float emitPairsDefault[20][20] = { - {0.02373072f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00244502f, 0.01775118f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00210228f, 0.00207782f, 0.01281864f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00223549f, 0.00161657f, 0.00353540f, 0.01911178f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00145515f, 0.00044701f, 0.00042479f, 0.00036798f, 0.01013470f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00219102f, 0.00253532f, 0.00158223f, 0.00176784f, 0.00032102f, 0.00756604f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00332218f, 0.00268865f, 0.00224738f, 0.00496800f, 0.00037956f, 0.00345128f, 0.01676565f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00597898f, 0.00194865f, 0.00288882f, 0.00235249f, 0.00071206f, 0.00142432f, 0.00214860f, 0.04062876f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00114353f, 0.00132105f, 0.00141205f, 0.00097077f, 0.00026421f, 0.00113901f, 0.00131767f, 0.00103704f, 0.00867996f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00318853f, 0.00138145f, 0.00104273f, 0.00105355f, 0.00094040f, 0.00100883f, 0.00124207f, 0.00142520f, 0.00059716f, 0.01778263f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00449576f, 0.00246811f, 0.00160275f, 0.00161966f, 0.00138494f, 0.00180553f, 0.00222063f, 0.00212853f, 0.00111754f, 0.01071834f, 0.03583921f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00331693f, 0.00595650f, 0.00257310f, 0.00252518f, 0.00046951f, 0.00312308f, 0.00428420f, 0.00259311f, 0.00121376f, 0.00157852f, 0.00259626f, 0.01612228f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00148878f, 0.00076734f, 0.00063401f, 0.00047808f, 0.00037421f, 0.00075546f, 0.00076105f, 0.00066504f, 0.00042237f, 0.00224097f, 0.00461939f, 0.00096120f, 0.00409522f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00165004f, 0.00090768f, 0.00084658f, 0.00069041f, 0.00052274f, 0.00059248f, 0.00078814f, 0.00115204f, 0.00072545f, 0.00279948f, 0.00533369f, 0.00087222f, 0.00116111f, 0.01661038f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00230618f, 0.00106268f, 0.00100282f, 0.00125381f, 0.00034766f, 0.00090111f, 0.00151550f, 0.00155601f, 0.00049078f, 0.00103767f, 0.00157310f, 0.00154836f, 0.00046718f, 0.00060701f, 0.01846071f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00631752f, 0.00224540f, 0.00301397f, 0.00285226f, 0.00094867f, 0.00191155f, 0.00293898f, 0.00381962f, 0.00116422f, 0.00173565f, 0.00250962f, 0.00312633f, 0.00087787f, 0.00119036f, 0.00180037f, 0.01346609f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.00389995f, 0.00186053f, 0.00220144f, 0.00180488f, 0.00073798f, 0.00154526f, 0.00216760f, 0.00214841f, 0.00077747f, 0.00248968f, 0.00302273f, 0.00250862f, 0.00093371f, 0.00107595f, 0.00147982f, 0.00487295f, 0.01299436f, 0.0f, 0.0f, 0.0f}, - {0.00039119f, 0.00029139f, 0.00021006f, 0.00016015f, 0.00010666f, 0.00020592f, 0.00023815f, 0.00038786f, 0.00019097f, 0.00039549f, 0.00076736f, 0.00028448f, 0.00016253f, 0.00085751f, 0.00015674f, 0.00026525f, 0.00024961f, 0.00563625f, 0.0f, 0.0f}, - {0.00131840f, 0.00099430f, 0.00074960f, 0.00066005f, 0.00036626f, 0.00070192f, 0.00092548f, 0.00089301f, 0.00131038f, 0.00127857f, 0.00219713f, 0.00100817f, 0.00054105f, 0.00368739f, 0.00047608f, 0.00102648f, 0.00094759f, 0.00069226f, 0.00999315f, 0.0f}, - {0.00533241f, 0.00169359f, 0.00136609f, 0.00127915f, 0.00119152f, 0.00132844f, 0.00178697f, 0.00194579f, 0.00071553f, 0.01117956f, 0.00914460f, 0.00210897f, 0.00197461f, 0.00256159f, 0.00135781f, 0.00241601f, 0.00343452f, 0.00038538f, 0.00148001f, 0.02075171f} -}; - - -static int suboptimal_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int mode); -static int *** forward_so_dp ( Alignment *A, int *ns, int **ls, int **pos,int I, int J, int gop, int gep,int gop2, int gep2,Constraint_list *CL); -static int *** backward_so_dp ( Alignment *A, int *ns, int **ls,int **pos,int I, int J, int gop, int gep,int gop2, int gep2,Constraint_list *CL); -static int *** forward_so_dp_biphasic ( Alignment *A, int *ns, int **ls, int **pos,int I, int J, int gop, int gep,int gop2, int gep2,Constraint_list *CL); -static int *** backward_so_dp_biphasic ( Alignment *A, int *ns, int **ls,int **pos,int I, int J, int gop, int gep,int gop2, int gep2,Constraint_list *CL); -static int *** forward_so_dp_glocal ( Alignment *A, int *ns, int **ls, int **pos,int I, int J, int gop, int gep,int gop2, int gep2,Constraint_list *CL); -static int *** backward_so_dp_glocal ( Alignment *A, int *ns, int **ls,int **pos,int I, int J, int gop, int gep,int gop2, int gep2,Constraint_list *CL); - -static int match=0; -static int ins=1; -static int del=2; -static int umatch=3; -static int ins2=3; -static int del2=4; -float ** get_emitPairs (char *mat, char *alp, float **p, float *s); -int subop1_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL) -{ - return suboptimal_pair_wise ( A, ns, ls, CL, 1); -} - -int subop2_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL) -{ - return suboptimal_pair_wise ( A, ns, ls, CL, 3); -} - - - -int suboptimal_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int mode) -{ - int ***F=NULL; - int ***B=NULL; - int **pos0; - int gop, gep,gop2, gep2; - int i, I, j, J, n, s1, s2; - char *seqI, *seqJ; - int id; - int *entry; - float opt, min, score, nscore, thres; - int l1, l2, set; - - - gop=CL->gop*SCORE_K; - gep=CL->gep*SCORE_K; - - /*gop2=CL->gop*10*SCORE_K;*/ - gop2=CL->gop*2*SCORE_K; - gep2=0; - - //Values Adapted from Probcons 1.1 - gop=-132; - gep=-27; - - gop2=-144; - gep2=-3; - - ungap(A->seq_al[ls[0][0]]); - ungap(A->seq_al[ls[1][0]]); - - seqI=A->seq_al[ls[0][0]]; - seqJ=A->seq_al[ls[1][0]]; - - I=strlen (seqI); J=strlen (seqJ); - pos0=aln2pos_simple ( A,-1, ns, ls); - l1=strlen (A->seq_al[ls[0][0]]); - l2=strlen (A->seq_al[ls[1][0]]); - - if ( mode==1) - { - F=forward_so_dp (A, ns, ls, pos0,I, J,gop, gep,gop2, gep2,CL); - B=backward_so_dp (A, ns, ls, pos0,I, J,gop, gep,gop2, gep2, CL); - } - else if ( mode ==2) - { - F=forward_so_dp_glocal (A, ns, ls, pos0,I, J,gop, gep,gop2, gep2,CL); - B=backward_so_dp_glocal (A, ns, ls, pos0,I, J,gop, gep,gop2, gep2, CL); - } - else if ( mode ==3) - { - F=forward_so_dp_biphasic (A, ns, ls, pos0,I, J,gop, gep,gop2, gep2,CL); - B=backward_so_dp_biphasic (A, ns, ls, pos0,I, J,gop, gep,gop2, gep2, CL); - } - if ( MAX5(F[match][l1][l2], F[ins][l1][l2], F[del][l1][l2],F[ins2][l1][l2], F[del2][l1][l2] )!=MAX5( B[match][1][1], B[ins][1][1], B[del][1][1], B[ins2][1][1], B[del2][1][1])) - { - HERE ("ERROR in subop_pair"); - fprintf ( stdout, "\nForward: %d", MAX3(F[match][l1][l2], F[ins][l1][l2], F[del][l1][l2])); - fprintf ( stdout, "\nBackWard: %d \n\n",MAX3( B[match][1][1], B[ins][1][1], B[del][1][1])); - } - - - for (opt=0,min=0, set=0, i=1; i<=I; i++) - for (j=1; j<=J; j++) - { - if ( F[match][i][j]==UNDEFINED)continue; - F[match][i][j]+=B[match][i][j]-(CL->get_dp_cost) (A, pos0, ns[0], ls[0], i-1, pos0, ns[1], ls[1],j-1,CL); - if (set==0) - {set=1; opt=F[match][i][j];min=F[match][i][j];} - opt=MAX(F[match][i][j],opt); - min=MIN(F[match][i][j],min); - } - - - s1=name_is_in_list (A->name[ls[0][0]], (CL->S)->name, (CL->S)->nseq, 100); - s2=name_is_in_list (A->name[ls[1][0]], (CL->S)->name, (CL->S)->nseq, 100); - - id=idscore_pairseq(seqI,seqJ,-12, -1, CL->M, "idmat"); - - entry=vcalloc ( CL->entry_len, CL->el_size); - entry[SEQ1]=s1;entry[SEQ2]=s2; - - thres=opt; - for ( n=0,i=1; i<=I; i++) - { - for (j=1; j<=J; j++) - { - score=F[0][i][j]; - nscore=((score-min))/(opt-min); - - if (score==opt) - { - n++; - entry[R1]=i;entry[R2]=j; - entry[WE]=id; - entry[CONS]=1; - add_entry2list (entry,A->CL); - } - } - } - - vfree (entry); - free_int (pos0, -1); - free_arrayN (F, 3); - free_arrayN (B, 3); - - return A->score_aln; -} -/************************************************************************************************************************/ -/* */ -/* */ -/* GLOCAL */ -/* */ -/* */ -/************************************************************************************************************************/ -int *** forward_so_dp_glocal ( Alignment *A, int *ns, int **ls, int **pos0,int I, int J,int gop, int gep,int gop2, int gep2,Constraint_list *CL) -{ - int i,j; - int c; - int sub; - int ***M; - int match=0, del=1, ins=2; - - M=declare_arrayN (3, sizeof (int), 5, I+1, J+1); - - for ( i=0; i<=I; i++)for (j=0; j<=J; j++)for (c=0; c<5; c++)M[c][i][j]=-999999; - - M[match][0][0]=0; - - for (i=1; i<=I; i++){M[del] [i][0]=i*gep;M[umatch][i][0]=i*gep2+gop2;} - for (j=1; j<=J; j++){M[ins] [0][j]=j*gep;M[umatch][0][j]=j*gep2+gop2;} - - - for (i=1; i<=I; i++) - { - for ( j=1; j<=J; j++) - { - sub=(CL->get_dp_cost) (A, pos0, ns[0], ls[0], i-1, pos0, ns[1], ls[1],j-1,CL); - - M[match][i][j] =MAX4 (M[match][i-1][j-1],M[del][i-1][j-1], M[ins][i-1][j-1],M[umatch][i-1][j-1])+sub; - M[del][i][j] =MAX2 ((M[match][i-1][j]+gop), M[del][i-1][j])+gep; - M[ins][i][j] =MAX2 ((M[match][i][j-1]+gop), M[ins][i][j-1])+gep; - M[umatch][i][j]=MAX6 (M[match][i-1][j-1]+gop2, M[match][i][j-1]+gop2, M[match][i-1][j]+gop2,M[umatch][i-1][j-1], M[umatch][i-1][j], M[umatch][i][j-1])+gep2; - } - } - return M; -} -int *** backward_so_dp_glocal ( Alignment *A, int *ns, int **ls, int **pos0, int I, int J, int gop, int gep,int gop2, int gep2,Constraint_list *CL) -{ - int i,j; - int c; - int sub; - int ***M; - - - - M=declare_arrayN (3, sizeof (int), 5, I+2, J+2); - for ( i=I+1; i>=0; i--)for (j=J+1; j>=0; j--)for (c=0; c<5; c++)M[c][i][j]=-999999; - M[match][I+1][J+1]=0; - - for (i=I; i>0; i--){M[ins] [i][J+1]=i*gep;M[umatch] [i][J+1]=i*gep2+gop2;} - for (j=J; j>0; j--){M[del] [I+1][j]=j*gep;M[umatch] [I+1][j]=j*gep2+gop2;} - - for (i=I; i>0; i--) - { - for ( j=J; j>0; j--) - { - sub=(CL->get_dp_cost) (A, pos0, ns[0], ls[0], i-1, pos0, ns[1], ls[1],j-1,CL); - - M[match ][i][j] =MAX4 ((M[del][i+1][j+1]+gop), (M[ins][i+1][j+1]+gop), M[match][i+1][j+1], M[umatch][i+1][j+1]+gop2)+sub; - M[del ][i][j] =MAX2 (M[match][i+1][j], M[del][i+1][j])+gep; - M[ins ][i][j] =MAX2 (M[match][i][j+1], M[ins][i][j+1])+gep; - M[umatch][i][j] =MAX6 (M[match][i+1][j+1], M[match][i+1][j],M[match][i][j+1], M[umatch][i+1][j+1], M[umatch][i+1][j], M[umatch][i][j+1])+gep2; - - } - } - return M; -} - - - - -/************************************************************************************************************************/ -/* */ -/* */ -/* SIMPLE */ -/* */ -/* */ -/************************************************************************************************************************/ - -int *** forward_so_dp ( Alignment *A, int *ns, int **ls, int **pos0,int I, int J,int gop, int gep,int gop2, int gep2,Constraint_list *CL) -{ - int i,j; - int c; - int sub; - int ***M; - int lgop; - - - - M=declare_arrayN (3, sizeof (int), 5, I+1, J+1); - for ( i=0; i<=I; i++)for (j=0; j<=J; j++)for (c=0; c<3; c++)M[c][i][j]=-999999; - - M[match][0][0]=0; - for (i=1; i<=I; i++){M[del] [i][0]=i*gep;} - for (j=1; j<=J; j++){M[ins] [0][j]=j*gep;} - - - - for (i=1; i<=I; i++) - { - for ( j=1; j<=J; j++) - { - lgop=(i==I || j==J)?0:gop; - sub=(CL->get_dp_cost) (A, pos0, ns[0], ls[0], i-1, pos0, ns[1], ls[1],j-1,CL); - - M[match][i][j]=MAX3 (M[del][i-1][j-1], M[ins][i-1][j-1], M[match][i-1][j-1])+sub; - M[del][i][j] =MAX ((M[match][i-1][j]+lgop),M[del][i-1][j])+gep; - M[ins][i][j] =MAX ((M[match][i][j-1]+lgop), M[ins][i][j-1])+gep; - } - - } - - return M; - } -int *** backward_so_dp ( Alignment *A, int *ns, int **ls, int **pos0, int I, int J, int gop, int gep,int gop2, int gep2,Constraint_list *CL) -{ - int i,j, a, b; - - - int ***M, ***T; - - - for (a=0; a<2; a++) - for (b=0; bseq_al[ls[a][b]]); - invert_string2((CL->S)->seq[A->order[ls[a][b]][0]]); - } - T=forward_so_dp(A,ns,ls,pos0, I, J, gop, gep, gop2, gep2, CL); - for (a=0; a<2; a++) - for (b=0; bseq_al[ls[a][b]]); - invert_string2((CL->S)->seq[A->order[ls[a][b]][0]]); - } - - M=declare_arrayN (3, sizeof (int), 5, I+2, J+2); - - - for (i=0; i<=I; i++) - for (j=0; j<=J; j++) - { - M[match][i+1][j+1]=T[match][I-i][J-j]; - M[ins][i+1][j+1]=T[ins][I-i][J-j]; - M[del][i+1][j+1]=T[del][I-i][J-j]; - } - return M; -} - -/************************************************************************************************************************/ -/* */ -/* */ -/* BI-PHASIC */ -/* */ -/* */ -/************************************************************************************************************************/ -int biphasic_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL) -{ - int i,j,a,b; - int c; - int sub; - int ***m, ***t; - int M1, D1, D2, I1, I2, LEN; - int I, J; - int n=1; - char **al, **aln, *char_buf; - int gop1, gop2, gep1, gep2; - int **pos0; - int score, trace, ntrace; - M1=n++; D1=n++; D2=n++; I1=n++, I2=n++; - - I=strlen (A->seq_al[ls[0][0]]); - J=strlen (A->seq_al[ls[1][0]]); - m=declare_arrayN (3, sizeof (int),n, I+1, J+1); - t=declare_arrayN (3, sizeof (int),n, I+1, J+1); - pos0=aln2pos_simple ( A,-1, ns, ls); - al=declare_char (2, I+J+1); - for ( i=0; i<=I; i++)for (j=0; j<=J; j++)for (c=0; cgop*SCORE_K*2; - gep1=CL->gep*SCORE_K/2; - - gop2=CL->gop*SCORE_K/2; - gep2=CL->gep*SCORE_K*2; - - m[M1][0][0]=0; - for (i=1; i<=I; i++){m[I1][i][0]=gep1*i;} - for (j=1; j<=J; j++){m[D1][0][j]=gep1*j;} - - for (i=1; i<=I; i++){m[I2] [i][0]=gep2*i;} - for (j=1; j<=J; j++){m[D2] [0][j]=gep2*j;} - - for (i=1; i<=I; i++) - { - for ( j=1; j<=J; j++) - { - sub=(CL->get_dp_cost) (A, pos0, ns[0], ls[0], i-1, pos0, ns[1], ls[1],j-1,CL); - m[M1][i][j]=max_int (&t[M1][i][j],D1,m[D1][i-1][j-1],I1,m[I1][i-1][j-1], M1, m[M1][i-1][j-1],D2,m[D2][i-1][j-1],I2,m[I2][i-1][j-1], -1)+sub; - - m[D1][i][j]=max_int (&t[D1][i][j],M1,(m[M1][i][j-1]+gop1),D1,m[D1][i][j-1], -1)+gep1; - m[I1][i][j]=max_int (&t[I1][i][j],M1,(m[M1][i-1][j]+gop1),I1,m[I1][i-1][j], -1)+gep1; - - m[D2][i][j]=max_int (&t[D2][i][j],M1,(m[M1][i][j-1]+gop2),D2,m[D2][i][j-1], -1)+gep2; - m[I2][i][j]=max_int (&t[I2][i][j],M1,(m[M1][i-1][j]+gop2),I2,m[I2][i-1][j], -1)+gep2; - } - } - - score=max_int (&trace,M1,m[M1][I][J],D1,m[D1][I][J],I1, m[I1][I][J],D2,m[D2][I][J],I2,m[I2][I][J], -1); - LEN=0;i=I;j=J; - - - trace=t[trace][i][j]; - while (!(i==0 &&j==0)) - { - - ntrace=t[trace][i][j]; - if (i==0) - { - al[0][LEN]=0; - al[1][LEN]=1; - j--; - LEN++; - } - else if ( j==0) - { - al[0][LEN]=1; - al[1][LEN]=0; - i--; - LEN++; - } - else if ( trace==M1) - { - al[0][LEN]=1; - al[1][LEN]=1; - i--; j--; - LEN++; - } - - else if ( trace==D1 || trace==D2) - { - al[0][LEN]=0; - al[1][LEN]=1; - j--; - LEN++; - } - else if ( trace == I1 || trace==I2) - { - al[0][LEN]=1; - al[1][LEN]=0; - i--; - LEN++; - } - trace=ntrace; - - } - - invert_list_char ( al[0], LEN); - invert_list_char ( al[1], LEN); - if ( A->declared_len<=LEN)A=realloc_aln2 ( A,A->max_n_seq, 2*LEN); - - aln=A->seq_al; - char_buf= vcalloc (LEN+1, sizeof (char)); - for ( c=0; c< 2; c++) - { - for ( a=0; a< ns[c]; a++) - { - int ch=0; - for ( b=0; b< LEN; b++) - { - if (al[c][b]==1) - char_buf[b]=aln[ls[c][a]][ch++]; - else - char_buf[b]='-'; - } - char_buf[b]='\0'; - sprintf (aln[ls[c][a]],"%s", char_buf); - } - } - - - A->len_aln=LEN; - A->nseq=ns[0]+ns[1]; - free_arrayN((void *)m, 3); - free_arrayN((void *)t, 3); - vfree (char_buf); - free_char (al, -1); - return score; - } -int *** forward_so_dp_biphasic ( Alignment *A, int *ns, int **ls, int **pos0,int I, int J,int gop1, int gep1,int gop2, int gep2,Constraint_list *CL) -{ - int i,j; - int c; - int sub; - int ***M; - int match=0, del=1, ins=2; - int lgop1, lgop2, lgep1, lgep2; - - M=declare_arrayN (3, sizeof (int), 5, I+1, J+1); - - for ( i=0; i<=I; i++)for (j=0; j<=J; j++)for (c=0; c<5; c++)M[c][i][j]=-999999; - - M[match][0][0]=0; - - for (i=1; i<=I; i++){M[del] [i][0]=gep1*i+gop1;} - for (j=1; j<=J; j++){M[ins] [0][j]=gep1*j+gop1;} - - for (i=1; i<=I; i++){M[del2] [i][0]=gep2*i+gop2;} - for (j=1; j<=J; j++){M[ins2] [0][j]=gep2*j+gop2;} - - for (i=1; i<=I; i++) - { - for ( j=1; j<=J; j++) - { - lgop1=(i==I || j==J)?gop1:gop1; - lgop2=(i==I || j==J)?gop2:gop2; - lgep1=gep1; - lgep2=gep2; - - sub=(CL->get_dp_cost) (A, pos0, ns[0], ls[0], i-1, pos0, ns[1], ls[1],j-1,CL); - M[match][i][j]=MAX5 (M[del][i-1][j-1], M[ins][i-1][j-1], M[match][i-1][j-1], M[ins2][i-1][j-1], M[del2][i-1][j-1])+sub; - - M[del ][i][j] =MAX2 ((M[match][i-1][j]+lgop1), M[del ][i-1][j])+lgep1; - M[del2][i][j] =MAX2 ((M[match][i-1][j]+lgop2), M[del2][i-1][j])+lgep2; - - M[ins ][i][j] =MAX2 ((M[match][i][j-1]+lgop1), M[ins ][i][j-1] )+lgep1; - M[ins2][i][j] =MAX2 ((M[match][i][j-1]+lgop2), M[ins2][i][j-1] )+lgep2; - } - } - return M; - } -int *** backward_so_dp_biphasic ( Alignment *A, int *ns, int **ls, int **pos0, int I, int J, int gop, int gep,int gop2, int gep2,Constraint_list *CL) -{ - int i,j, a, b; - - - int ***M, ***T; - - - for (a=0; a<2; a++) - for (b=0; bseq_al[ls[a][b]]); - invert_string2((CL->S)->seq[A->order[ls[a][b]][0]]); - } - T=forward_so_dp_biphasic(A,ns,ls,pos0, I, J, gop, gep, gop2, gep2, CL); - for (a=0; a<2; a++) - for (b=0; bseq_al[ls[a][b]]); - invert_string2((CL->S)->seq[A->order[ls[a][b]][0]]); - } - - M=declare_arrayN (3, sizeof (int), 5, I+2, J+2); - - - for (i=0; i<=I; i++) - for (j=0; j<=J; j++) - { - M[match][i+1][j+1]=T[match][I-i][J-j]; - M[ins][i+1][j+1]=T[ins][I-i][J-j]; - M[del][i+1][j+1]=T[del][I-i][J-j]; - M[ins2][i+1][j+1]=T[ins2][I-i][J-j]; - M[del2][i+1][j+1]=T[del2][I-i][J-j]; - } - free_arrayN(T,3); - return M; -} - - -int get_tot_prob (Alignment *A1,Alignment *A2, int *ns, int **ls, int nstates, float **matchProb, float **insProb, float *TmatchProb, float ***TinsProb, Constraint_list *CL); - -float * forward_proba_pair_wise ( char *seq1, char *seq2, int NumMatrixTypes, int NumInsertStates, float **transMat, float *initialDistribution,float *TmatchProb, float ***TinsProb, float **transProb); -float * backward_proba_pair_wise ( char *seq1, char *seq2, int NumMatrixTypes, int NumInsertStates, float **transMat, float *initialDistribution,float *TmatchProb, float ***TinsProb,float **transProb); -float ComputeTotalProbability (int seq1Length, int seq2Length,int NumMatrixTypes, int NumInsertStates,float *forward, float *backward) ; -int ProbabilisticModel (int NumMatrixTypes, int NumInsertStates,float *initDistribMat,float *emitSingle, float** emitPairs, float *gapOpen, float *gapExtend, float **transMat, float *initialDistribution, float **matchProb, float **insProb, float **transProb); - -Constraint_list *ProbaMatrix2CL (Alignment *A, int *ns, int **ls, int NumMatrixTypes, int NumInsertStates, float *forward, float *backward, float thr, Constraint_list *CL); - - -void free_proba_pair_wise () -{ - proba_pair_wise (NULL, NULL, NULL, NULL); -} -int proba_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL) -{ - int NumMatrixTypes=5; - int NumInsertStates=2; - static float **transMat, **insProb, **matchProb, *initialDistribution, **transProb, **emitPairs, *emitSingle, ***TinsProb, *TmatchProb; - static int TinsProb_ml, TmatchProb_ml; - int i, j,I, J; - float *F, *B, *P; - float tot; - int l, s1, s2; - float thr=0.01;//ProbCons Default - char *alphabet; - - - - //Free all the memory - if (A==NULL) - { - free_float (transMat, -1);transMat=NULL; - free_float (insProb, -1);insProb=NULL; - free_float (matchProb, -1);matchProb=NULL; - vfree (initialDistribution); initialDistribution=NULL; - free_float (transProb, -1);transProb=NULL; - free_float (emitPairs, -1);emitPairs=NULL; - vfree (emitSingle);emitSingle=NULL; - - - free_arrayN((void***)TinsProb, 3);TinsProb=NULL; - vfree (TmatchProb);TmatchProb=NULL; - TinsProb_ml=0; TmatchProb_ml=0; - - forward_proba_pair_wise (NULL, NULL, 0,0,NULL,NULL,NULL,NULL,NULL); - backward_proba_pair_wise (NULL, NULL, 0,0,NULL,NULL,NULL,NULL,NULL); - ProbaMatrix2CL(NULL, NULL, NULL, 0, 0, NULL, NULL, 0, NULL); - return 0; - } - - if (!transMat && (strm (retrieve_seq_type(), "DNA") ||strm (retrieve_seq_type(), "RNA")) ) - { - static float **p; - static float *s; - if (!p) - { - int l,a,b; - l=strlen (DNAalphabetDefault); - p=declare_float (l,l); - s=vcalloc (l, sizeof (float)); - for (a=0; amethod_matrix, DNAalphabetDefault,p,s); - alphabet=DNAalphabetDefault; - emitPairs=declare_float (256, 256); - emitSingle=vcalloc (256, sizeof (float)); - for (i=0; i<256; i++) - { - emitSingle[i]=1e-5; - for (j=0; j<256; j++) - emitPairs[i][j]=1e-10; - } - l=strlen (alphabet); - - for (i=0; imethod_matrix, alphabetDefault,p,s); - alphabet=alphabetDefault; - emitPairs=declare_float (256, 256); - emitSingle=vcalloc (256, sizeof (float)); - for (i=0; i<256; i++) - { - //emitSingle[i]=1e-5; - emitSingle[i]=1; - for (j=0; j<256; j++) - //emitPairs[i][j]=1e-10; - emitPairs[i][j]=1; - - } - l=strlen (alphabet); - - for (i=0; iseq_al[ls[0][0]]); - J=strlen (A->seq_al[ls[1][0]]); - //TmatchProb=vcalloc ((I+1)*(J+1), sizeof (float)); - //TinsProb=declare_arrayN (3, sizeof (float),2,NumMatrixTypes,MAX(I,J)+1); - - l=(I+1)*(J+1); - if (l>TmatchProb_ml) - { - TmatchProb_ml=l; - if (TmatchProb)TmatchProb=vrealloc(TmatchProb,TmatchProb_ml*sizeof (float)); - else TmatchProb=vcalloc ( l, sizeof (float)); - } - l=MAX(I,J)+1; - if ( l>TinsProb_ml) - { - TinsProb_ml=l; - if (TinsProb)free_arrayN (TinsProb, 3); - TinsProb=declare_arrayN (3, sizeof (float),2,NumMatrixTypes,TinsProb_ml); - } - - get_tot_prob (A,A, ns,ls,NumMatrixTypes, matchProb, insProb,TmatchProb,TinsProb, CL); - - F=forward_proba_pair_wise (A->seq_al[ls[0][0]], A->seq_al[ls[1][0]], NumMatrixTypes,NumInsertStates,transMat, initialDistribution,TmatchProb,TinsProb, transProb); - B=backward_proba_pair_wise (A->seq_al[ls[0][0]], A->seq_al[ls[1][0]], NumMatrixTypes,NumInsertStates,transMat, initialDistribution,TmatchProb,TinsProb, transProb); - A->CL=ProbaMatrix2CL(A,ns, ls,NumMatrixTypes,NumInsertStates, F, B, thr,CL); - - //free_proba_pair_wise(); - return 1; - } - -int get_tot_prob (Alignment *A1,Alignment *A2, int *ns, int **ls, int nstates, float **matchProb, float **insProb, float *TmatchProb, float ***TinsProb, Constraint_list *CL) -{ - int i, j, a, b, c,d, k, n,n1,n2, ij; - char c1, c2; - int I, J; - int ***VA1,***VA2, *observed, index; - //Pre-computation of the pairwise scores in order to use potential profiles - //The profiles are vectorized AND Compressed so that the actual alphabet size (proteins/DNA) does not need to be considered - - - if (ns[0]==1 && ns[1]==1 ) - { - int s1, s2; - int *nns, **nls; - Alignment *NA1, *NA2; - - nns=vcalloc ( 2, sizeof (int)); - nls=vcalloc (2, sizeof (int*)); - - s1=A1->order[ls[0][0]][0]; - s2=A2->order[ls[1][0]][0]; - NA1=seq2R_template_profile (CL->S,s1); - NA2=seq2R_template_profile (CL->S,s2); - - if (NA1 || NA2) - { - if (NA1) - { - nns[0]=NA1->nseq; - nls[0]=vcalloc (NA1->nseq, sizeof (int)); - for (a=0; anseq; a++) - nls[0][a]=a; - } - else - { - NA1=A1; - nns[0]=ns[0]; - nls[0]=vcalloc (ns[0], sizeof (int)); - for (a=0; anseq; - nls[1]=vcalloc (NA2->nseq, sizeof (int)); - for (a=0; anseq; a++) - nls[1][a]=a; - } - else - { - NA2=A2; - nns[1]=ns[1]; - nls[1]=vcalloc (ns[1], sizeof (int)); - for (a=0; aseq_al[ls[0][0]]); - J=strlen (A2->seq_al[ls[1][0]]); - - - - //get Ins for I - for (i=1; i<=I; i++) - { - for (k=0; kseq_al[ls[0][b]][i-1]; - if (c1!='-') - { - TinsProb[0][k][i]+=insProb[c1][k]; - - n++; - } - } - if (n)TinsProb[0][k][i]/=n; - } - } - //Get Ins for J - for (j=1; j<=J; j++) - { - for (k=0; kseq_al[ls[1][b]][j-1]; - if (c2!='-') - { - TinsProb[1][k][j]+=insProb[c2][k]; - - n++; - } - } - if (n)TinsProb[1][k][j]/=n; - } - } - - observed=vcalloc ( 26, sizeof (int)); - VA1=declare_arrayN (3, sizeof (int),2,26,I); - for (i=0; iseq_al[ls[0][b]][i]); - if ( c1=='-')continue; - c1-='a'; - - if (!(in=observed[c1])){in=observed[c1]=++index;} - - VA1[0][in-1][i]=c1; - VA1[1][in-1][i]++; - } - - VA1[0][index][i]=-1; - for (b=0; b<26; b++)observed[b]=0; - } - - VA2=declare_arrayN (3, sizeof (int),2,26,J); - for (i=0; iseq_al[ls[1][b]][i]); - if ( c1=='-')continue; - c1-='a'; - - if (!(in=observed[c1])){in=observed[c1]=++index;} - - VA2[0][in-1][i]=c1; - VA2[1][in-1][i]++; - } - VA2[0][index][i]=-1; - for (b=0; b<26; b++)observed[b]=0; - } - vfree (observed); - - for ( ij=0,i=0; i<=I; i++) - { - for ( j=0; j<=J ; j++, ij++) - { - n=0; - TmatchProb[ij]=0; - if (i==0 || j==0); - else - { - c=0; - while (VA1[0][c][i-1]!=-1) - { - c1=VA1[0][c][i-1]+'a'; - n1=VA1[1][c][i-1]; - d=0; - while (VA2[0][d][j-1]!=-1) - { - c2=VA2[0][d][j-1]+'a'; - n2=VA2[1][d][j-1]; - TmatchProb[ij]+=matchProb[c1][c2]*(double)n1*(double)n2; - n+=n1*n2; - d++; - } - c++; - } - } - if (n)TmatchProb[ij]/=n; - } - } - - free_arrayN ((void **)VA1, 3); - free_arrayN ((void **)VA2, 3); - return 1; -} - - - - -Constraint_list *ProbaMatrix2CL (Alignment *A, int *ns, int **ls, int NumMatrixTypes, int NumInsertStates, float *forward, float *backward, float thr, Constraint_list *CL) -{ - float totalProb; - int ij, i, j,k, I, J, s1, s2; - static int *entry; - static int **list; - static int list_max; - int sim; - int list_size; - int list_n; - int old_n=0; - double v; - static float F=4; //potential number of full suboptimal alignmnents incorporated in the library - static int tot_old, tot_new; - - if (!A) - { - free_int (list, -1);list=NULL; - list_max=0; - - vfree(entry); entry=NULL; - return NULL; - } - - I=strlen (A->seq_al[ls[0][0]]); - J=strlen (A->seq_al[ls[1][0]]); - s1=name_is_in_list (A->name[ls[0][0]], (CL->S)->name, (CL->S)->nseq, 100); - s2=name_is_in_list (A->name[ls[1][0]], (CL->S)->name, (CL->S)->nseq, 100); - - list_size=I*J; - - if ( list_maxthr)//Conservative reduction of the list size to speed up the sorting - { - list[list_n][0]=i; - list[list_n][1]=j; - list[list_n][2]=(int)((float)v*(float)NORM_F); - list_n++; - } - if (v>0.01)old_n++; - } - } - - sort_int_inv (list, 3, 2, 0, list_n-1); - if (!entry)entry=vcalloc ( CL->entry_len, CL->el_size); - list_n=MIN(list_n,(F*MIN(I,J))); - for (i=0; iCL); - } - tot_new+=list_n; - tot_old+=old_n; - // HERE ("LIB_SIZE NEW: %d (new) %d (old) [%.2f]", list_n, old_n, (float)tot_new/(float)tot_old); - return A->CL; -} - -Constraint_list *ProbaMatrix2CL_old (Alignment *A, int *ns, int **ls, int NumMatrixTypes, int NumInsertStates, float *forward, float *backward, float thr, Constraint_list *CL) -{ - float totalProb; - int ij, i, j,k, I, J, s1, s2; - static int *entry; - int lib_size=0; - double v; - - I=strlen (A->seq_al[ls[0][0]]); - J=strlen (A->seq_al[ls[1][0]]); - s1=name_is_in_list (A->name[ls[0][0]], (CL->S)->name, (CL->S)->nseq, 100); - s2=name_is_in_list (A->name[ls[1][0]], (CL->S)->name, (CL->S)->nseq, 100); - - totalProb = ComputeTotalProbability (I,J,NumMatrixTypes, NumInsertStates,forward, backward); - if (!entry)entry=vcalloc ( CL->entry_len, CL->el_size); - ij = 0; - thr=0.01; - - - for (ij=0,i =0; i <= I; i++) - { - for (j =0; j <= J; j++) - { - v= EXP (MIN(LOG_ONE,(forward[ij] + backward[ij] - totalProb))); - if (i && j && v>=thr) - - { - entry[SEQ1]=s1;entry[SEQ2]=s2; - entry[R1]=i;entry[R2]=j; - entry[WE]=(int)((float)v*(float)NORM_F); - entry[CONS]=1; - add_entry2list (entry,A->CL); - lib_size++; - } - ij += NumMatrixTypes; - } - } - HERE ("LIB_SIZE_OLD: %d", lib_size); - return A->CL; -} - -float ComputeTotalProbability (int seq1Length, int seq2Length,int NumMatrixTypes, int NumInsertStates,float *forward, float *backward) -{ - - float totalForwardProb = LOG_ZERO; - float totalBackwardProb = LOG_ZERO; - int k; - - for (k = 0; k < NumMatrixTypes; k++) - { - LOG_PLUS_EQUALS (&totalForwardProb,forward[k + NumMatrixTypes * ((seq1Length+1) * (seq2Length+1) - 1)] + backward[k + NumMatrixTypes * ((seq1Length+1) * (seq2Length+1) - 1)]); - } - - totalBackwardProb =forward[0 + NumMatrixTypes * (1 * (seq2Length+1) + 1)] +backward[0 + NumMatrixTypes * (1 * (seq2Length+1) + 1)]; - - for (k = 0; k < NumInsertStates; k++) - { - LOG_PLUS_EQUALS (&totalBackwardProb,forward[2*k+1 + NumMatrixTypes * (1 * (seq2Length+1) + 0)] +backward[2*k+1 + NumMatrixTypes * (1 * (seq2Length+1) + 0)]); - LOG_PLUS_EQUALS (&totalBackwardProb,forward[2*k+2 + NumMatrixTypes * (0 * (seq2Length+1) + 1)] +backward[2*k+2 + NumMatrixTypes * (0 * (seq2Length+1) + 1)]); - } - return (totalForwardProb + totalBackwardProb) / 2; - } - - -float * backward_proba_pair_wise ( char *seq1, char *seq2, int NumMatrixTypes, int NumInsertStates, float **transMat, float *initialDistribution,float *matchProb, float ***insProb, float **transProb) -{ - static float *backward; - static int max_l; - - - int k, i, j,ij, i1j1, i1j, ij1,a, l, seq1Length, seq2Length, m; - char c1, c2; - char *iter1, *iter2; - - if (!seq1) - { - vfree (backward); - backward=NULL; max_l=0; - return NULL; - } - - iter1=seq1-1; - iter2=seq2-1; - seq1Length=strlen (seq1); - seq2Length=strlen (seq2); - l=(seq1Length+1)*(seq2Length+1)*NumMatrixTypes; - - if (!backward) - { - backward=vcalloc (l, sizeof (float)); - max_l=l; - } - else if (max_l= 0; i--) - { - c1 = (i == seq1Length) ? '~' : (unsigned char) iter1[i+1]; - for (j = seq2Length; j >= 0; j--) - { - c2 = (j == seq2Length) ? '~' : (unsigned char) iter2[j+1]; - - if (i < seq1Length && j < seq2Length) - { - m=((i+1)*(seq2Length+1))+j+1;//The backward and the forward are offset by 1 - float ProbXY = backward[0 + i1j1] + matchProb[m]; - - - for (k = 0; k < NumMatrixTypes; k++) - { - LOG_PLUS_EQUALS (&backward[k + ij], ProbXY + transProb[k][0]); - } - } - if (i < seq1Length) - { - for (k = 0; k < NumInsertStates; k++) - { - LOG_PLUS_EQUALS (&backward[0 + ij], backward[2*k+1 + i1j] + insProb[0][k][i+1] + transProb[0][2*k+1]); - LOG_PLUS_EQUALS (&backward[2*k+1 + ij], backward[2*k+1 + i1j] + insProb[0][k][i+1] + transProb[2*k+1][2*k+1]); - } - } - if (j < seq2Length) - { - for (k = 0; k < NumInsertStates; k++) - { - //+1 because the backward and the forward are offset by 1 - LOG_PLUS_EQUALS (&backward[0 + ij], backward[2*k+2 + ij1] + insProb[1][k][j+1] + transProb[0][2*k+2]); - LOG_PLUS_EQUALS (&backward[2*k+2 + ij], backward[2*k+2 + ij1] + insProb[1][k][j+1] + transProb[2*k+2][2*k+2]); - } - } - - ij -= NumMatrixTypes; - i1j -= NumMatrixTypes; - ij1 -= NumMatrixTypes; - i1j1 -= NumMatrixTypes; - } - } - - return backward; -} -float * forward_proba_pair_wise ( char *seq1, char *seq2, int NumMatrixTypes, int NumInsertStates, float **transMat, float *initialDistribution,float *matchProb, float ***insProb, float **transProb) -{ - static float *forward; - static int max_l; - int k, i, j,ij, i1j1, i1j, ij1, seq1Length, seq2Length, m; - char *iter1, *iter2; - int l,a; - - if (!seq1) - { - vfree (forward); - forward=NULL; max_l=0; - return NULL; - } - iter1=seq1-1; - iter2=seq2-1; - seq1Length=strlen (seq1); - seq2Length=strlen (seq2); - l=(seq1Length+1)*(seq2Length+1)*NumMatrixTypes; - - if (!forward) - { - forward=vcalloc (l, sizeof (float)); - max_l=l; - } - else if (max_l 1 || j > 1) - { - if (i > 0 && j > 0) - { - //Sum over all possible alignments - forward[0 + ij] = forward[0 + i1j1] + transProb[0][0]; - for (k = 1; k < NumMatrixTypes; k++) - { - LOG_PLUS_EQUALS (&forward[0 + ij], forward[k + i1j1] + transProb[k][0]); - } - forward[0 + ij] += matchProb[m]; - } - if ( i > 0) - { - for (k = 0; k < NumInsertStates; k++) - { - forward[2*k+1 + ij] = insProb[0][k][i] + LOG_ADD (forward[0 + i1j] + transProb[0][2*k+1],forward[2*k+1 + i1j] + transProb[2*k+1][2*k+1]); - } - } - if (j > 0) - { - for (k = 0; k < NumInsertStates; k++) - { - forward[2*k+2 + ij] = insProb[1][k][j] +LOG_ADD (forward[0 + ij1] + transProb[0][2*k+2],forward[2*k+2 + ij1] + transProb[2*k+2][2*k+2]); - } - } - } - - ij += NumMatrixTypes; - i1j += NumMatrixTypes; - ij1 += NumMatrixTypes; - i1j1 += NumMatrixTypes; - } - - } - return forward; - } -int ProbabilisticModel (int NumMatrixTypes, int NumInsertStates,float *initDistribMat,float *emitSingle, float **emitPairs, float *gapOpen, float *gapExtend, float **transMat, float *initialDistribution, float **matchProb, float **insProb, float **transProb) -{ - - - // build transition matrix - int i, j; - - - transMat[0][0] = 1; - for (i = 0; i < NumInsertStates; i++) - { - transMat[0][2*i+1] = gapOpen[2*i]; - transMat[0][2*i+2] = gapOpen[2*i+1]; - transMat[0][0] -= (gapOpen[2*i] + gapOpen[2*i+1]); - - transMat[2*i+1][2*i+1] = gapExtend[2*i]; - transMat[2*i+2][2*i+2] = gapExtend[2*i+1]; - transMat[2*i+1][2*i+2] = 0; - transMat[2*i+2][2*i+1] = 0; - transMat[2*i+1][0] = 1 - gapExtend[2*i]; - transMat[2*i+2][0] = 1 - gapExtend[2*i+1]; - } - - - - // create initial and transition probability matrices - for (i = 0; i < NumMatrixTypes; i++){ - initialDistribution[i] = (float)log ((float)initDistribMat[i]); - for (j = 0; j < NumMatrixTypes; j++) - transProb[i][j] = (float)log ((float)transMat[i][j]); - } - - // create insertion and match probability matrices - for (i = 0; i < 256; i++) - { - for (j = 0; j < NumMatrixTypes; j++) - { - insProb[i][j] = (float)log((float)emitSingle[i]); - } - for (j = 0; j < 256; j++) - { - matchProb[i][j] = (float)log((float)emitPairs[i][j]); - } - } - return 1; -} - - -int viterbi_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL) -{ - char C1,c1, C2,c2; - char *alphabet, *char_buf; - char **al, **aln; - int seq1Length, seq2Length, I, J; - int i, j,ij, i1j1, i1j, ij1, k, a, b,l, LEN, r, c, m, state; - int NumMatrixTypes=5; - int NumInsertStates=2; - int *traceback; - float bestProb; - static float **transMat, **insProb, **matchProb, *initialDistribution, **transProb, **emitPairs, *emitSingle, *TmatchProb, ***TinsProb; - float *viterbi; - - ungap_sub_aln (A, ns[0],ls[0]); - ungap_sub_aln (A, ns[1],ls[1]); - - seq1Length=I=strlen (A->seq_al[ls[0][0]]); - seq2Length=J=strlen (A->seq_al[ls[1][0]]); - - if (!transMat) - { - alphabet=alphabetDefault; - emitPairs=declare_float (256, 256); - emitSingle=vcalloc (256, sizeof (float)); - for (i=0; i<256; i++) - { - emitSingle[i]=1e-5; - for (j=0; j<256; j++) - emitPairs[i][j]=1e-10; - } - l=strlen (alphabet); - - for (i=0; i 0 && j > 0) - { - for (k = 0; k < NumMatrixTypes; k++) - { - float newVal = viterbi[k + i1j1] + transProb[k][0] + TmatchProb[m]; - if (viterbi[0 + ij] < newVal) - { - viterbi[0 + ij] = newVal; - traceback[0 + ij] = k; - } - } - } - if (i > 0) - { - for (k = 0; k < NumInsertStates; k++) - { - float valFromMatch = TinsProb[0][k][i] + viterbi[0 + i1j] + transProb[0][2*k+1]; - float valFromIns = TinsProb[0][k][i] + viterbi[2*k+1 + i1j] + transProb[2*k+1][2*k+1]; - if (valFromMatch >= valFromIns){ - viterbi[2*k+1 + ij] = valFromMatch; - traceback[2*k+1 + ij] = 0; - } - else { - viterbi[2*k+1 + ij] = valFromIns; - traceback[2*k+1 + ij] = 2*k+1; - } - } - } - if (j > 0) - { - for (k = 0; k < NumInsertStates; k++){ - float valFromMatch = TinsProb[1][k][j] + viterbi[0 + ij1] + transProb[0][2*k+2]; - float valFromIns = TinsProb[1][k][j] + viterbi[2*k+2 + ij1] + transProb[2*k+2][2*k+2]; - if (valFromMatch >= valFromIns){ - viterbi[2*k+2 + ij] = valFromMatch; - traceback[2*k+2 + ij] = 0; - } - else - { - viterbi[2*k+2 + ij] = valFromIns; - traceback[2*k+2 + ij] = 2*k+2; - } - } - } - - ij += NumMatrixTypes; - i1j += NumMatrixTypes; - ij1 += NumMatrixTypes; - i1j1 += NumMatrixTypes; - } - } - - // figure out best terminating cell - bestProb = LOG_ZERO; - state = -1; - for (k = 0; k < NumMatrixTypes; k++) - { - float thisProb = viterbi[k + NumMatrixTypes * ((seq1Length+1)*(seq2Length+1) - 1)] + initialDistribution[k]; - if (bestProb < thisProb) - { - bestProb = thisProb; - state = k; - } - } - - - - // compute traceback - al=declare_char(2,seq1Length+seq2Length); - LEN=0; - r = seq1Length, c = seq2Length; - while (r != 0 || c != 0) - { - int newState = traceback[state + NumMatrixTypes * (r * (seq2Length+1) + c)]; - - if (state == 0){ c--; r--; al[0][LEN]=1;al[1][LEN]=1;} - else if (state % 2 == 1) {r--; al[0][LEN]=1;al[1][LEN]=0;} - else { c--; al[0][LEN]=0;al[1][LEN]=1;} - LEN++; - state = newState; - } - - - invert_list_char ( al[0], LEN); - invert_list_char ( al[1], LEN); - if ( A->declared_len<=LEN)A=realloc_aln2 ( A,A->max_n_seq, 2*LEN); - aln=A->seq_al; - char_buf= vcalloc (LEN+1, sizeof (char)); - for ( c=0; c< 2; c++) - { - for ( a=0; a< ns[c]; a++) - { - int ch=0; - for ( b=0; b< LEN; b++) - { - if (al[c][b]==1) - char_buf[b]=aln[ls[c][a]][ch++]; - else - char_buf[b]='-'; - } - char_buf[b]='\0'; - sprintf (aln[ls[c][a]],"%s", char_buf); - } - } - - - A->len_aln=LEN; - A->nseq=ns[0]+ns[1]; - vfree (char_buf); - free_char (al, -1); - - - - - - return (int)(bestProb*(float)1000); -} - -float ** get_emitPairs (char *mat, char *alp, float **p, float *s) - { - static char *rmat; - float k=0, t=0; - int a, b, c, l; - int **M; - - if (!rmat)rmat=vcalloc (100, sizeof (char)); - - if (!mat || !mat[0] || strm (mat, "default"))return p; - else if (strm (rmat, mat))return p; - - sprintf (rmat,"%s", mat); - - M=read_matrice (mat); - l=strlen (alp); - - k=log (2)/2; - for (a=0; a -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" -/*********************************************************************************************/ -/* */ -/* MULTI_THREAD */ -/* */ -/*********************************************************************************************/ -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_graph_maln.c b/binaries/src/tcoffee/t_coffee_source/util_graph_maln.c deleted file mode 100644 index d46647b..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_graph_maln.c +++ /dev/null @@ -1,570 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" -int check_link (CL_node ***G, int S1, int r1, int s2, int r2); -void light_nodes (CL_node *A, int va, CL_node*B, int vb, CL_node*C,int vc, char *s); -void print_graph (CL_node *G, Sequence *S); -Sequence *Seq; -CL_node *Start; -static int cycle; - - - -Alignment * add_constraint2aln ( Alignment *A, int s1, int r1, int s2, int r2) -{ - /*Note sCL_node ***G;1 and r1 must be numbered from 0 to n-1*/ - CL_node ***G; - - - G=aln2graph(A); - G=add_constraint2graph_aln (G,s1,r1, s2, r2); - A=graph2aln (A,G[0][0],aln2seq(A)); - vfree_graph (G[0][0]); - return A; -} -Alignment * graph_aln (Alignment *A, Constraint_list *iCL, Sequence *S) -{ - CL_node ***G; - int a,start; - CLIST_TYPE *entry=NULL; - Constraint_list *CL; - Seq=S; - - CL=duplicate_constraint_list (iCL); - for ( a=0; ane; a++) - { - CL->L[a*CL->entry_len+WE]=CL->evaluate_residue_pair ( iCL, CL->L[a*CL->entry_len+SEQ1],CL->L[a+CL->entry_len+R1] ,CL->L[a*CL->entry_len+SEQ2],CL->L[a*CL->entry_len+R2]); - } - CL=sort_constraint_list_on_n_fields(CL, 0, CL->ne,WE,1); - G=aln2graph (A); - - - Start=G[0][0]; - start=0; - for (a=start; ane; a++) - { - - cycle++; - entry=extract_entry(entry,a, CL); - fprintf ( stderr, "\r\tCompletion: [%5.2f%%]",(float)(a*100)/CL->ne); - G=add_constraint2graph_aln (G, entry[SEQ1], entry[R1]-1, entry[SEQ2],entry[R2]-1); - } - start=0; - for (a=start; ane; a++) - { - - cycle++; - entry=extract_entry(entry,a, CL); - fprintf ( stderr, "\r\tCompletion: [%5.2f%%]",(float)(a*100)/CL->ne); - G=add_constraint2graph_aln (G, entry[SEQ1], entry[R1]-1, entry[SEQ2],entry[R2]-1); - } - - A=graph2aln (A,G[0][0], S); - - return A; - -} - -void print_graph (CL_node *G, Sequence *S) -{ - Alignment *A=NULL; - - if (S==NULL) S=Seq; - A=seq2aln (Seq, A, 1); - A=graph2aln (A,G, Seq); - print_aln (A); -} - -Alignment* graph2aln (Alignment *A, CL_node *G, Sequence *S) -{ - int s, l, a; - CL_node *Gi; - - /*Rewind G*/ - while ( G->p)G=G->p; - while ( G->l)G=G->l; - - l=s=0; - Gi=G; - while (Gi){Gi=Gi->r;l++;} - Gi=G; - while (Gi){Gi=Gi->c;s++;} - - A=realloc_alignment (A, l+1); - - - l=0; - while (G) - { - Gi=G; - s=0; - while ( G!=NULL) - { - - if ( G->res==-1)A->seq_al[s][l]='-'; - else if ( G->res==-2)A->seq_al[s][l]='*'; - else if ( G->res==-3)A->seq_al[s][l]='#'; - else if (G->res>=0)A->seq_al[s][l]=S->seq[G->seq][G->res]; - - G=G->c; - s++; - } - G=Gi->r; - l++; - } - - - - for ( a=0;aseq_al[a][l]='\0'; - A->len_aln=strlen (A->seq_al[0]); - A->nseq=s; - - - return A; -} -CL_node ***aln2graph (Alignment *A) -{ - int a=0, b; - static CL_node ***galn; - CL_node *N, *iN, *pN; - int res; - - if ( !galn) - { - galn=calloc ( A->nseq, sizeof (CL_node**)); - for ( a=0; a< A->nseq; a++) - { - galn[a]=calloc (A->len_aln, sizeof (CL_node*)); - } - } - pN=iN=NULL; - N=declare_cl_nodes(-1, a); - - - for ( a=0; anseq; a++) - { - iN=N; - for (res=A->order[a][1], b=0; blen_aln; b++) - { - if (blen_aln-1) - { - N->r=declare_cl_nodes(-1, a); - (N->r)->l=N; - } - if ( pN) - { - N->p=pN; - pN->c=N; - } - - N->seq=A->order[a][0]; - N->res=(is_gap(A->seq_al[a][b]))?-1:res++; - - if (N->res!=-1)galn[a][res-1]=N; - if ( pN) - { - N->p=pN; - pN->c=N; - pN=pN->r; - } - N=N->r; - } - if ( anseq-1)iN->c=declare_cl_nodes(-1, a); - pN=iN; - N=iN->c; - } - - return galn; -} -CL_node ***add_constraint2graph_aln (CL_node ***G, int s1, int r1, int s2, int r2) -{ - CL_node *S, *E, *B; - int d; - - - S=G[s1][r1]; - E=G[s2][r2]; - - - d=get_node_distance (S,E); - if (d<0){B=S;S=E;E=B;} - d=(d<0)?-d:d; - - - insert_gap_columns (E,d); - shift_segment( S,d+1,d); - - return G; - -} - - -CL_node *shift_segment ( CL_node *S, int segL, int shiftL) -{ - int a; - CL_node *E, *G; - - - if ( !shiftL)return S; - - /*find segment coordinates*/ - for (E=S, a=1; a< segL; a++)E=E->r; - - /*Shift the gaps*/ - - G=swap_gap_in_graph (S, E); - for (a=1; a< shiftL; a++)swap_gap_in_graph (S, E); - - while (G!=E)G=remove_graph_gap_column (G); - remove_graph_gap_column (E); - - return G; -} - -int is_graph_gap_column(CL_node *S) -{ - while (S->p)S=S->p; - - while (S) - { - if (S->res>=0)return 0; - S=S->c; - } - return 1; -} -CL_node * remove_graph_gap_column (CL_node *S) -{ - CL_node *R,*L, *P, *RV; - - RV=S->r; - while (S->p) - { - - S=S->p; - } - - if ( !is_graph_gap_column (S))return RV; - - - - while (S) - { - - R=S->r; - L=S->l; - P=S->p; - - if (L)L->r=S->r; - if (R)R->l=S->l; - - P=S; - S=S->c; - vfree_cl_node (P); - } - return RV; -} - -CL_node * swap_gap_in_graph ( CL_node*S, CL_node *E) -{ - /*Moves gap AFTER End to BEFORE Start - SxxxE- - -xxxxx - straightens the links in between - */ - CL_node *G, *N, *iE, *iS, *SP, *SC, *SL; - - - /*Preserve the E/S values*/ - iE=E; - iS=S; - - - /*prepare the parent/child links first*/ - - SP=S->p; - SC=S->c; - SL=S->l; - - while ( S!=E->r) - { - N=S->r; - - S->p=N->p; - if (N->p)(S->p)->c=S; - - S->c=N->c; - if (N->c)(S->c)->p=S; - - S=S->r; - } - - E=iE; - S=iS; - - /*Remove the gap*/ - G=E->r; - if ( G->res>=0)fprintf ( stderr, "\nERROR: NOT a GAP"); - - E->r=G->r; - if (E->r)(E->r)->l=E; - - /*insert the gap*/ - - G->r=S; - S->l=G; - - G->l=SL; - if (SL)SL->r=G; - - - G->p=SP; - if (SP)SP->c=G; - - G->c=SC; - if (SC)SC->p=G; - - return G; - -} - -CL_node * declare_cl_nodes ( int len, int seq) -{ - static CL_node **N; - CL_node *IN; - static int Nlen; - int a; - - if (len==-1) - { - IN=calloc ( 1, sizeof (CL_node)); - IN->res=-1; - return IN; - } - - - - if ( len>Nlen) - { - free (N); - N=calloc (len, sizeof (CL_node*)); - } - - if ( len==0)return NULL; - - for (a=0; ares=-1; - (N[a])->seq=seq; - if (a!=0)(N[a])->l=N[a-1]; - if (a!=len-1)(N[a])->r=N[a+1]; - } - - (N[0])->l=N[len-1]; - (N[len-1])->r=N[0]; - - return N[0]; -} - -CL_node *insert_gap_columns (CL_node *S, int d) -{ - CL_node *Gs,*Ge, *pGs, *Gi, *Si; - int a; - - if ( d==0)return S; - - pGs=Gi=NULL; - Si=S; - while (S->p!=NULL)S=S->p; - - while (S!=NULL) - { - Gs=declare_cl_nodes(d, S->seq); - Ge=Gs->l; - - Ge->r=S->r; - if (Ge->r)(Ge->r)->l=Ge; - - Gs->l=S; - S->r=Gs; - - if (pGs) - { - Gi=Gs; - for (a=0; a< d; a++) - { - Gs->p=pGs; - pGs->c=Gs; - Gs=Gs->r; - pGs=pGs->r; - } - pGs=Gi; - } - else - { - pGs=Gs; - } - S=S->c; - } - return Si; -} - -int get_node_distance ( CL_node *S, CL_node *E) -{ - int distance=0; - CL_node *iS,*B; - int swap=1; - - /*project the two points onto one sequence*/ - if (S->seq>E->seq){B=S;S=E;E=B;swap*=-1;} - while (S->seq!=E->seq)S=S->c; - - /*Walk from E to S */ - iS=S; - while ( iS->res<0 && iS->r!=NULL){iS=iS->r;} - if (iS->res<0 || iS->res>E->res){B=S; S=E; E=B;swap*=-1;} - - while ( S!=E) - { - S=S->r; - distance+=swap; - } - return distance; -} - - - - - - - - - -int check_graph ( CL_node *S, char *string) -{ - CL_node *iS; - static int n; - int lr; - - if ( S==NULL)S=Start; - fprintf ( stderr, "\n\tGRAPH Check %s #%d\n",string, ++n); - while ( S->p!=NULL)S=S->p; - while ( S->l!=NULL)S=S->l; - while ( S) - { - iS=S; - lr=-1; - while (iS) - { - if (iS->l && (iS->l)->seq!=iS->seq){fprintf ( stderr, "\n\t\tSEq pb");myexit(EXIT_FAILURE);} - if (iS->free==1){fprintf ( stderr, "\n\t\tFree Node read");myexit(EXIT_FAILURE);} - if (iS->res>0) - { - if (lr!=-1 && iS->res-lr!=1){fprintf ( stderr, "\n\t\tERROR: lost residues");myexit (EXIT_FAILURE);} - lr=iS->res; - } - if ( iS->r && (iS->r)->l!=iS){fprintf ( stderr, "\n\t\tERROR: left != right: [%d %d][%d %d]", iS->seq, iS->res, (iS->l)->seq, (iS->r)->res);myexit (EXIT_FAILURE);} - if ( iS->p && (iS->p)->c!=iS){fprintf ( stderr, "\n\t\tERROR: parent != child: [%d %d][%d %d]", iS->seq, iS->res, (iS->p)->seq, (iS->p)->res);myexit (EXIT_FAILURE);} - if ( iS->c && (iS->c)->p!=iS){fprintf ( stderr, "\n\t\tERROR: parent != child: [%d %d][%d %d]", iS->seq, iS->res, (iS->c)->seq, (iS->c)->res);myexit (EXIT_FAILURE);} - iS=iS->r; - } - S=S->c; - } - return 1; -} - -CL_node * vfree_graph ( CL_node *S) -{ - CL_node *Si; - - while ( S->p!=NULL)S=S->p; - while ( S->l!=NULL)S=S->l; - - while ( S) - { - Si=S->c; - while ( S) - { - - S=S->r; - if (S)vfree_cl_node (S->l); - } - S=Si; - } - return S; - -} -CL_node *vfree_cl_node ( CL_node *N) -{ - if ( N->free==1)crash("freeing free block"); - N->free=1; - free (N); - return N; -} - - -void light_nodes (CL_node *A, int va, CL_node*B, int vb, CL_node*C,int vc, char *string ) -{ - int ta=0, tb=0, tc=0; - - fprintf ( stderr, "\nCycle %d\n LIGHT NODE: %s", cycle,string); - if ( A){ta=A->res; A->res=va;fprintf ( stderr, "\nA: seq %d res %d", A->seq, A->res);} - if ( B){tb=B->res; B->res=vb;fprintf ( stderr, "\nB: seq %d res %d", B->seq, B->res);} - if ( C){tc=C->res; C->res=vc;fprintf ( stderr, "\nC: seq %d res %d", C->seq, C->res);} - print_graph (A, 0); - if ( A){A->res=ta;} - if ( B){B->res=tb;} - if ( C){C->res=tc;} -} -int check_link (CL_node ***G, int s1, int r1, int s2, int r2) -{ - CL_node *S; - CL_node *E; - - S=G[s1][r1]; - E=G[s2][r2]; - while ( S->p)S=S->p; - while ( S) - { - S=S->c; - if ( S==E)return 1; - } - return 0; -} -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_job_handling.c b/binaries/src/tcoffee/t_coffee_source/util_job_handling.c deleted file mode 100644 index 8f730d2..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_job_handling.c +++ /dev/null @@ -1,319 +0,0 @@ -#include -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "define_header.h" -#include "dp_lib_header.h" - - - -Job_TC* print_lib_job ( Job_TC *job,char *string, ...) -{ - va_list ap; - char **list; - char **value; - char **name; - int a, np, n; - char bname[LONG_STRING]; - char bval[LONG_STRING]; - - list=string2list2(string, " ="); - n=atoi (list[0]); - - - name =vcalloc ( (n-1)/2, sizeof (char*)); - value=vcalloc ( (n-1)/2, sizeof (char*)); - - - va_start (ap, string); - for (a=1, np=0; apl=vcalloc (100, sizeof (char*));job->pl[job->np++]=(void*)job->pl; - job->jobid=njobs++; - } - - for ( a=0; a< n; a++) - { - int string=0; - if ( strstr(name[a], "control") && !job->control){job->control=vcalloc ( 1, sizeof (Job_control_TC));job->pl[job->np++]=(void*)job->control;} - else if ( strstr(name[a], "io") && !job->io){job->io=vcalloc ( 1, sizeof (Job_io_TC));job->pl[job->np++]=(void*)job->io;} - else if ( strstr(name[a], "param") && !job->param){job->param=vcalloc ( 1, sizeof (Job_param_TC));job->pl[job->np++]=(void*)job->param;} - - if ( strm (name[a], "control")) {job->control=(struct Job_control_TC*)atoi(value[a]);string=0;} - else if ( strm (name[a], "control->submitF")) {(job->control) ->submitF=(struct Job_TC *(*)(struct Job_TC *))atol(value[a]);string=0;} - else if ( strm (name[a], "control->retrieveF")) {(job->control) ->retrieveF=(struct Job_TC *(*)(struct Job_TC *))atol(value[a]);string=0;} - else if ( strm (name[a], "control->mode")) {(job->control)->mode=value[a];string=1;} - - else if ( strm (name[a], "param")) {job->param=(struct Job_param_TC*)atol(value[a]);string=0;} - else if ( strm (name[a], "param->method")) {job->pl[job->np++]=((job->param)->method)=value[a];string=1;} - else if ( strm (name[a], "param->TCM")) {(job->param)->TCM= (TC_method *) atol(value[a]) ;string=0;} - else if ( strm (name[a], "param->aln_c")) {job->pl[job->np++]=(job->param)->aln_c=value[a] ;string=1;} - else if ( strm (name[a], "param->seq_c")) {job->pl[job->np++]=(job->param)->seq_c=value[a] ;string=1;} - - - else if ( strm (name[a], "io")) {job->io=(struct Job_io_TC*)atol(value[a]);string=0;} - else if ( strm (name[a], "io->out")) {job->pl[job->np++]=(job->io)->out=value[a] ;string=1;} - else if ( strm (name[a], "io->in" )) {job->pl[job->np++]=(job->io)->in =value[a] ;string=1;} - else if ( strm (name[a], "io->CL")) {(job->io)->CL=(Constraint_list*)atol (value[a]); string=0;} - else - { - fprintf ( stderr, "ERROR: print_lib_job2: %s is unknown [FATAL:%s]", name[a], PROGRAM); - myexit (EXIT_FAILURE); - } - if ( string==0) vfree ( value[a]); - } - vfree ( value); - free_arrayN ((void **)name, 2); - return job; -} - - - -/*Stack Manipulation*/ -Job_TC *queue_cat (Job_TC *P, Job_TC *C) -{ - if ( !P && !C) return NULL; - else if (!P || P->jobid==-1) - { - vfree (P); - C->p=NULL; - return C; - } - else - { - P->c=C; - if (C)C->p=P; - return queue2last(P); - } - return NULL; -} -Job_TC *free_queue (Job_TC *job) -{ - return NULL; - if (!job) return job; - else - { - job=queue2last(job); - while ( job) - { - job=free_job (job); - } - return job; - } -} -Job_TC *free_job (Job_TC *job) - { - int a; - Job_TC *p; - - if ( !job ) return job; - else - { - for ( a=job->np-1; a>=0; a--) - vfree ( job->pl[a]); - p=job->p; - job->p=job->c=NULL; - vfree (job); - return p; - } - return NULL; - } -Job_TC * queue2heap (Job_TC*job) -{ - - while (job && job->p) - job=job->p; - return job; -} -Job_TC * queue2last (Job_TC*job) -{ - - while (job && job->c) - { - job=job->c; - } - return job; -} - -int queue2n (Job_TC*job) -{ - int n=0; - - - job=queue2last (job); - while (job && job->p) - { - n++; - job=job->p; - - } - return n; -} - -Job_TC * descend_queue (Job_TC*job) -{ - - if (!job ||!job->c)return job; - else - { - (job->c)->p=job; - job=job->c; - } - return job; -} - -Job_TC* delete_job (Job_TC *job) -{ - Job_TC *p, *c; - - p=job->p; - c=job->c; - free_job (job); - - return queue_cat (p, c); -} - -Job_TC*** split_job_list (Job_TC *job, int ns) -{ - int a,u,n,nj,split; - Job_TC*** jl; - Job_TC *ljob; - //retun a pointer to ns splits for joblist - - - if (ns==0)return NULL; - job=queue2heap(job); - jl=vcalloc(ns+1, sizeof (Job_TC**)); - jl[0]=vcalloc (2, sizeof (Job_TC*)); - - nj=queue2n(job); - - if (nj==0)return NULL; - else split=(nj/ns)+1; - - - n=a=u=0; - jl[a][0]=job; - while (job) - { - ljob=job; - if (n==split && ac; - } - return jl; -} - - - - -/*Job Control*/ -Job_TC* submit_job ( Job_TC *job) -{ - - if (!(job->control)->mode ||!(job->control)->mode[0] || 1==1) - { - return (job->control)->submitF (job); - } - else - { - fprintf ( stderr, "\n%s is an unkown mode for posting jobs [FATAL:%s]",(job->control)->mode, PROGRAM); - myexit (EXIT_FAILURE); - return NULL; - } - -} - -Job_TC* retrieve_job ( Job_TC *job) -{ - if (!(job->control)->mode ||!(job->control)->mode[0] || 1==1) - { - return (job->control)->retrieveF (job); - } - else - { - fprintf ( stderr, "\n%s is an unkown mode for posting jobs [FATAL:%s]",(job->control)->mode, PROGRAM); - myexit (EXIT_FAILURE); - return NULL; - } -} -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_lib_header.h b/binaries/src/tcoffee/t_coffee_source/util_lib_header.h deleted file mode 100644 index 9be955b..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_lib_header.h +++ /dev/null @@ -1,2677 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -typedef struct - { - char *name; - char *path; - char *suffix; - char *full; - } -Fname; - -struct Tmpname - { - char *name; - struct Tmpname *next; - }; -/*********************************************************************/ -/* */ -/* DICHOTOMY */ -/* */ -/* */ -/*********************************************************************/ -double dichotomy (double value, double target_value, double middle, double *bottom,double *top); -/*********************************************************************/ -/* */ -/* QSORT */ -/* */ -/* */ -/*********************************************************************/ - -void qsort(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *)); -/*int memcmp ( const void *a, const void * b, size_t size); -void * memcpy ( void *a, void * b, size_t size); -*/ -/*********************************************************************/ -/* */ -/* HEAPSORT */ -/* */ -/* */ -/*********************************************************************/ -FILE *hsort_file ( FILE *fp ,int n,int len, size_t size,int first_comp_field, int n_comp_fields,int (*compare)(const void *, const void*,int,int,size_t),void * (*copy)(void *,void*,size_t)); -void ** hsort_array ( void **ra,int n,int len, size_t size, int first_comp_field, int n_comp_fields,int (*compare)(const void *, const void*,int,int,size_t),void * (*copy)(void *,void*,size_t)); -/**********************************************************************/ -/* */ -/* HSORT WRAPPERS */ -/* */ -/* */ -/**********************************************************************/ -void **hsort_list_array ( void **L, int len, size_t size, int entry_len,int first_comp_field, int n_comp_fields); -FILE *hsort_list_file ( FILE *fp, int len, size_t size, int entry_len,int first_comp_field, int n_comp_fields); -int hsort_cmp ( const void *a, const void *b, int first, int clen, size_t size); -void *hsort_cpy(void*to, void *from, size_t size); - -void test_hsort_list_array(); - -/*********************************************************************/ -/* */ -/* CEDRIC BSEARCH */ -/* */ -/* */ -/*********************************************************************/ -void * bsearch_file ( const void *key,int *p,int comp_first,int comp_len, FILE *fp ,int len, int entry_len,size_t el_size, int (*compare)(const void *, const void*,int, int, size_t)); -void * bsearch_array( const void *key,int *p,int comp_first,int comp_len,void**list,int len, int entry_len,size_t el_size, int (*compare)(const void *, const void*,int, int, size_t)); - -/*********************************************************************/ -/* */ -/* MY B_SEARCH_FILE FUNCTIONS */ -/* */ -/* */ -/*********************************************************************/ -void **search_in_list_file ( void *key,int *p, int comp_len,FILE *fp, int len, size_t size, int entry_len); -void **search_in_list_array ( void *key,int *p, int comp_len,void **L , int len, size_t size, int entry_len); - -/*********************************************************************/ -/* */ -/* SORT/COMPARE/SEARCH FUNCTIONS */ -/* */ -/* */ -/*********************************************************************/ - -int **search_in_list_int ( int *key, int k_len, int **list, int ne); -void sort_float ( float **V,int N_F, int F, int left, int right); -int cmp_float ( const float **a, const float **b); -void sort_int_1D ( int *L, int n); -char** sort_string_array (char **V, int n); - -void sort_int ( int **V,int N_F, int F, int left, int right); -void sort_list_int ( int **V,int N_F, int F, int left, int right); -void sort_list_int2 ( int **V,int *list,int N_F, int left, int right); -void sort_int_inv ( int **V,int N_F, int F, int left, int right); -void sort_list_int_inv ( int **V,int N_F, int F, int left, int right); -int cmp_int ( const int**a, const int**b); -int cmp_list_int (const int**a, const int**b); -int cmp_list_int2 (const int**a, const int**b); - -int name_is_in_list ( char *name, char **name_list, int n_name, int len); -char * check_list_for_dup ( char **list, int ne); -FILE *get_number_list_in_file ( FILE *fp, int *list, int *n, int *max_len); - - -/*********************************************************************/ -/* */ -/* QUANTILE */ -/* */ -/* */ -/*********************************************************************/ -int quantile ( int argc, char *argv[]); - -int quantile_rank (int **list,int field, int n, float p); -/*********************************************************************/ -/* */ -/* DUPLICATION */ -/* */ -/* */ -/*********************************************************************/ -short * set_short ( short *, int n,...); -char * set_char ( char *, int n,...); -int * set_int ( int *, int n,...); -float * set_float ( float *, int n,...); -double * set_double( double*, int n,...); - -short * ga_memcpy_short ( short *array1, short *array2, int n); -int * ga_memcpy_int ( int *array1, int *array2, int n); -float * ga_memcpy_float ( float *array1, float *array2, int n); -double * ga_memcpy_double( double *array1, double *array2, int n); - -short ** duplicate_short ( short **array , int len, int field); -int ** duplicate_int ( int **array , int len, int field); -char ** duplicate_char ( char **array , int len, int field); -char * duplicate_string ( char *string); -float ** duplicate_float ( float **array , int len, int field); -double ** duplicate_double( double **array , int len, int field); - -short ** copy_short ( short **array1, short **array2, int len, int number_field); -char ** copy_char ( char **array1, char **array2, int len, int number_field); -int ** copy_int ( int **array1, int **array2, int len, int number_field); -float ** copy_float ( float **array1, float **array2, int len, int number_field); -double ** copy_double( double **array1, double **array2, int len, int number_field); - -/*********************************************************************/ -/* */ -/* CONCATENATION */ -/* */ -/* */ -/*********************************************************************/ -Alignment ** cat_aln_list ( Alignment **list_to_cat,int first, int end, Alignment **rec_list); - -/*********************************************************************/ -/* */ -/* NUMBER ARRAY ANALYSE */ -/* */ -/* */ -/*********************************************************************/ -FILE * output_array_int (int **array, int len, int nf ,FILE *fp); -short return_max_short (short ** array, int len_array, int field); -char return_max_char( char ** array, int len_array, int field); -int return_max_int( int ** array, int len_array, int field); -float return_max_float( float ** array, int len_array, int field); -double return_max_double( double** array, int len_array, int field); - -short return_min_short (short ** array, int len_array, int field); -char return_min_char( char ** array, int len_array, int field); -int return_min_int( int ** array, int len_array, int field); -float return_min_float( float ** array, int len_array, int field); -double return_min_double( double** array, int len_array, int field); - -short return_max_coor_short (short ** array, int len_array, int field, int *coor); -char return_max_coor_char( char ** array, int len_array, int field, int *coor); -int return_max_coor_int( int ** array, int len_array, int field, int *coor); -float return_max_coor_float( float ** array, int len_array, int field, int *coor); -double return_max_coor_double( double** array, int len_array, int field, int *coor); - -short return_min_coor_short (short ** array, int len_array, int field, int *coor); -char return_min_coor_char( char ** array, int len_array, int field, int *coor); -int return_min_coor_int( int ** array, int len_array, int field, int *coor); -float return_min_coor_float( float ** array, int len_array, int field, int *coor); -double return_min_coor_double( double** array, int len_array, int field, int *coor); - -short return_2Dmax_short (short ** array, int start, int len_array, int first_field, int number_field); -char return_2Dmax_char( char ** array, int start, int len_array, int first_field, int number_field); -int return_2Dmax_int( int ** array, int start, int len_array, int first_field, int number_field); -float return_2Dmax_float( float ** array, int start, int len_array, int first_field, int number_field); -double return_2Dmax_double( double** array, int start, int len_array, int first_field, int number_field); - -short return_2Dmin_short (short ** array, int start, int len_array, int first_field, int number_field); -char return_2Dmin_char( char ** array, int start, int len_array, int first_field, int number_field); -int return_2Dmin_int( int ** array, int start, int len_array, int first_field, int number_field); -float return_2Dmin_float( float ** array, int start, int len_array, int first_field, int number_field); -double return_2Dmin_double( double** array, int start, int len_array, int first_field, int number_field); - -short return_2Dmax_coor_short ( short ** array,int start1, int end1, int start2, int end2, int *i, int *j ); -char return_2Dmax_coor_char( char ** array, int start1, int end1, int start2, int end2, int *i, int *j); -int return_2Dmax_coor_int( int ** array, int start1, int end1, int start2, int end2, int *i, int *j); -float return_2Dmax_coor_float( float ** array, int start1, int end1, int start2, int end2, int *i, int *j); -double return_2Dmax_coor_double( double** array, int start1, int end1, int start2, int end2, int *i, int *j); - -short return_2Dmin_coor_short ( short ** array, int start1, int end1, int start2, int end2, int *i, int *j); -char return_2Dmin_coor_char( char ** array, int start1, int end1, int start2, int end2, int *i, int *j); -int return_2Dmin_coor_int( int ** array, int start1, int end1, int start2, int end2, int *i, int *j); -float return_2Dmin_coor_float( float ** array, int start1, int end1, int start2, int end2, int *i, int *j); -double return_2Dmin_coor_double( double** array, int start1, int end1, int start2, int end2, int *i, int *j); - -double return_wmean_short ( short ** array, int len, int wfield, int field); -double return_wmean_char ( char ** array, int len, int wfield, int field); -double return_wmean_int ( int ** array, int len, int wfield, int field); -double return_wmean_float ( float ** array, int len, int wfield, int field); -double return_wmean_double( double** array, int len, int wfield, int field); - -double return_mean_short ( short ** array, int len, int field); -double return_mean_char ( char ** array, int len, int field); -double return_mean_int ( int ** array, int len, int field); -double return_mean_float ( float ** array, int len, int field); -double return_mean_double ( double** array, int len, int field); - -short return_sum_short ( short ** array, int len, int field); -char return_sum_char ( char ** array, int len, int field); -int return_sum_int ( int ** array, int len, int field); -float return_sum_float ( float ** array, int len, int field); -double return_sum_double( double** array, int len, int field); - -short return_sd_short ( short ** array, int len, int field, short mean); -char return_sd_char ( char ** array, int len, int field, char mean); -int return_sd_int ( int ** array, int len, int field, int mean); -float return_sd_float ( float ** array, int len, int field, float mean); -double return_sd_double( double** array, int len, int field, double mean); - -double return_z_score ( double x, double sum, double sum2, double n); -double* return_r (double **list, int n); -short* invert_list_short ( short * array, int len ); -char* invert_list_char ( char * array, int len ); -int* invert_list_int ( int * array, int len ); -float* invert_list_float ( float * array, int len ); -double* invert_list_double( double* array, int len ); - -void swap_short ( short * array, short * array2,int len ); -void swap_char ( char * array, char * array2,int len ); -void swap_int ( int * array, int * array2,int len ); -void swap_float ( float * array, float * array2,int len ); -void swap_double( double* array, double* array2,int len ); - -short return_max_short_hor (short ** array, int len_array, int field); -char return_max_char_hor (char ** array, int len_array, int field); -int return_max_int_hor (int ** array, int len_array, int field); -float return_max_float_hor (float ** array, int len_array, int field); -double return_max_double_hor(double ** array, int len_array, int field); - -short return_min_short_hor ( short ** array, int len_array, int field); -char return_min_char_hor ( char ** array, int len_array, int field); -int return_min_int_hor ( int ** array, int len_array, int field); -float return_min_float_hor ( float ** array, int len_array, int field); -double return_min_double_hor( double** array, int len_array, int field); - -short best_short (int n, ...); -int best_int (int n, ...); -char best_char (int n, ...); -float best_float (int n, ...); -double best_double(int n, ...); - -int is_defined_short (int n, ...); -int is_defined_int (int n, ...); -int is_defined_char (int n, ...); -int is_defined_float (int n, ...); -int is_defined_double(int n, ...); - - -int max_int (int*i, ...); - -int return_maxlen ( char ** array, int number); -int return_minlen ( char ** array, int number); - -float return_mean_diff_float ( float **array, int len, int field,float mean); - - -void inverse_int ( int**array, int len, int field, int max, int min); -void inverse_float ( float**array, int len, int field, int max, int min); -void inverse_2D_float ( float **array, int start, int len, int start_field, int number_field, float max,float min); - - - - -void **recycle (void **A, int l, int cycle); - -/*********************************************************************/ -/* */ -/* SHELL INTERFACES */ -/* */ -/* */ -/*********************************************************************/ -char* getenv4debug ( const char *var); -char* get_env_variable ( const char *var, int mode); -void setenv_func ( char *string_name, char *string_value); -void get_pwd ( char *name); -char *pg2path (char *pg); -int pg_is_installed ( char *pg); -/*********************************************************************/ -/* */ -/* MISC */ -/* */ -/*********************************************************************/ -char *num2plot (int value, int max, int line_len); -int perl_strstr ( char *string, char *pattern); -float grep_function ( char *pattern, char *file); -void crash_if ( int val, char *s); -void crash ( char *s); -int ** make_recursive_combination_table ( int tot_n_param, int *n_param, int *nc, int**table, int field); -/*********************************************************************/ -/* */ -/* STRING PROCESSING */ -/* */ -/* */ -/*********************************************************************/ -char *strnrchr ( char *s,char x, int n); -int intlen (int n); -char * update_string (char *string1, char *string2); -char* strcatf (char *string1,char *string2, ...); -char *vcat (char *v1, char *v2); - -int strget_param ( char *string, char *param_name, char *param_value, char *format, ...); -char * lstrstr ( char *in, char *token); -char * vstrstr ( char *in, char *token); -int strscanf (char *in, char *token, char *format, ...); -int match_motif ( char *string, char **motif); - -char *after_strstr (char *string, char *token); - -char ** push_string (char *val, char **stack, int *nval, int mode); -int vsrand (int val); -int *randomize_list (int *list, int len, int ncycle); -int vstrcmp (const char *s1, const char *s2); -int vstrncmp (const char *s1, const char *s2, int n); -FILE *print_array_char (FILE *out, char **array, int n, char *sep); - -char *extract_suffixe ( char *array); -char * path2filename ( char *array); -char *filename2path (char *name); -Fname* parse_fname ( char *array); - -void string_array_convert ( char **array, int n_strings, int ns, char **sl); -void string_convert( char *string, int ns, char **sl); -int convert ( char c, int ns, char **sl); -int convert2 ( char c, char *list); - -void string_array_upper ( char **string, int n); -void string_array_lower ( char **string, int n); -char *upper_string ( char *string); -char *lower_string ( char *string); -char * substitute_double ( char *string, char *token); -char * substitute ( char *string, char *token, char *replacement); -char * substitute_char ( char *string, char token, char replacement); -char * substituteN ( char *string, char *token, char *replacement, int N); -char * tild_substitute ( char *string, char *token, char *replacement); - - -char ** clean_string ( int n, char **string); - -int str_overlap ( char *string1, char *string2, char x); -int get_string_line ( int start, int n_lines, char *in, char *out); -FILE * output_string_wrap ( int wrap,char *string, FILE *fp); -char * extract_char ( char * array, int first, int len); -int check_cl4t_coffee (int argv, char **argc); - -char** break_list ( char **argv, int *argc, char *separators); -char** merge_list ( char **argv, int *argc); -int *name_array2index_array ( char **list1, int n1, char **list2, int n2); -char ** get_list_of_tokens ( char *string, char *separators, int *n_tokens); -char **ungap_array(char ** array, int n); -void ungap ( char *seq); -int seq2len (char *seq, char *pset, char *nset); -int seq2res_len (char *seq); -void remove_charset ( char *seq, char *set); -char *remove_charset_from_file (char *fname, char *set); -char *mark_internal_gaps(char *seq, char symbol); - -char *list2string (char **list, int n); -char *list2string2 (char **list, int n, char* sep); - -char ** string2list (char *string); -char ** string2list2(char *string, char *separators); -int * string2num_list( char *string); -int * string2num_list2( char *string, char *separators); -char **char_array2number ( char ** array, int n); -char *char2number ( char * array); -long atop(char *); -char *invert_string (char *string); -char *invert_string2 (char *string); -char *string2inverted_string (char *string); -/* Analyse and Compare Strings*/ -int isblanc ( char *buf); -/*int islower (char c); -int isupper (char c); -*/ -void splice_out ( char *seq, char x); -char* splice_out_seg ( char *seq,int pos, int len); - -int is_number ( char *buf); -int is_alpha_line ( char *buf); -int is_alnum_line ( char *buf); -int case_insensitive_strcmp ( char *string1, char *string2); -int get_string_sim ( char *string1, char *string2, char *ignore); - -int is_gap ( char x); -int is_gop (int p, char *s); - -int is_aa ( char x); -int is_dna ( char x); -int is_rna ( char x); - - - -char * get_alphabet ( char *seq, char *alphabet); -int is_in_set ( char r, char *list); -int array_is_in_set (char *array, char *set); -char * generate_void ( int x); -char * generate_null ( int x); -char * generate_string ( int x, char y); - - -char * translate_string (char *string, char *in, char*out); -int get_longest_string (char **array,int n, int *len, int *index); -int get_shortest_string (char **array,int n, int *len, int *index); -/*EDIT STRING*/ -char **pad_string_array ( char **array, int n, int len, char pad); -char * crop_string (char *string, int start, int end); -int get_distance2char ( char *x, char *list); - -/*********************************************************************/ -/* */ -/* TIME FUNCTIONS */ -/* */ -/* */ -/*********************************************************************/ -FILE *print_program_information (FILE *fp, char *comment); -FILE* print_cpu_usage (FILE *fp, char *comment); -void print_exit_success_message (); -void print_exit_failure_message (); - -int get_time (); -int get_ctime (); -int reset_time(); -int increase_ref_time(int increase); -/*********************************************************************/ -/* */ -/* SYSTEM CALLS */ -/* */ -/* */ -/*********************************************************************/ -pid_t **declare_pidtable (); -pid_t set_pid (pid_t p); -pid_t vfork(); -pid_t vwait (pid_t *p); -int vwait_npid (int submited, int max, int min); -int kill_child_pid(); - -int safe_system (const char * commande); -pid_t vwaitpid (pid_t p, int *status, int options); - -int evaluate_sys_call_io ( char *out_file, char *com, char *fonc); -void HERE (char *string, ...); -void printf_exit (int exit_code, FILE *fp, char *string, ...); -int printf_file ( char *file, char *mode, char *string, ...); -int printf_fork ( FILE *fp,char *string,...); -int printf_system (char *string, ...); -int printf_system_direct (char *string, ...); -int my_system_cl (int argc, char *argv[]); -int my_system ( char *command); -int unpack_perl_script (char *name, char ***unpacked, int n); -void unpack_all_perl_script (char *script); -/*********************************************************************/ -/* */ -/* IO FUNCTIONS */ -/* */ -/* */ -/*********************************************************************/ -FILE * print_command_line (FILE *fp ); -int getpid_ref(); -char ** standard_initialisation ( char **in_argv, int *in_argc); -char ** standard_initialisation_start ( char **in_argv, int *in_argc); -char ** standard_initialisation_end ( char **in_argv, int *in_argc); -/* -by default : dir_4_tcoffee: $HOME/.t_coffee -tmp: dir_4_tcoffee/tmp OR TMP_4_TCOFFEE -cache: idem -methods: idem -mcoffee: idem -*/ -int get_nproc (); -char *get_os(); -char *get_plugins_4_tcoffee (char *mode); -char *get_home_4_tcoffee(); -char *get_dir_4_tcoffee(); -char *get_tmp_4_tcoffee(); -char *get_cache_4_tcoffee(); -char *get_methods_4_tcoffee(); -char *get_mcoffee_4_tcoffee(); - - - -void myexit (int signal); -FILE *fatal_exit ( FILE *fp, int exit_signal, char *string, ...); -int set_warning_mode ( int mode); -FILE *add_warning (FILE *fp, char *string, ...); -void output_warning_list(); - -int count_n_res_in_array (char *array, int len); -int count_n_gap_in_array (char *array, int len); -int count_n_symbol_in_array ( char *array, char *array_list, int len); -char* count_strings_in_file ( char *in, char *out); -char** count_strings ( char **array, int len); -int ** count_int_strings ( int **array, int len, int s); - -int get_first_non_white_char (char *name); -int count_n_char_x_in_file(char *name, char x); -int count_n_char_in_file(char *name); -int count_n_line_in_file(char *name); -int measure_longest_line_in_file ( char *name ); -int file_cat ( char *fname1, char *fname2); -FILE* display_file_content (FILE *output, char *name); -int cat_file (char *file1, char *file2); -char ***file2list (char *name, char *sep); -char ** file2lines (char *name); -char * file2string (char *name); -char *string2file ( char *s, char *f, char *m); -char *chomp (char *name); -int get_cl_param (int argc, char **argv, FILE **fp,char *para_name, int *set_flag, char *type, int optional, int max_n_val,char *usage, ...); -char ** get_parameter ( char *para_name, int *np, char *fname); - -char *get_t_coffee_environement (char *file); -char *set_path_4_plugins (char *); -int add_package2_tcoffee_env (char *package); - - -char *Proxy(int input_mode, int set_mode); -char *proxy_message (); -char *input_proxy (); -char *get_proxy(); -char *get_proxy_from_env(); -int set_proxy (char *proxy); - -char *input_name (); -char *Email4cl(int input_mode, int set_mode); -char *Email(int input_mode, int set_mode); -char *input_email (); -char *get_email_from_env (); -char *get_email (); -int set_email (char *email); -int cputenv (char*, ...); -char *file_putenv (char *file); -int check_dir_getenv ( char *string); - -char* set_string_variable (char *var, char* v); -char* get_string_variable (char *var); -char* unset_string_variable (char *var); -char* store_string_variable (char *var, char * v, int mode); - -int int_variable_isset (char *var); -int set_int_variable (char *var, int v); -int get_int_variable (char *var); -int unset_int_variable (char *var); -int store_int_variable (char *var, int v, int mode); - -void check_vtmpnam (); -int flag_file2remove_is_on (); -void set_file2remove_off(); -void set_file2remove_on(); -char *set_file2remove_extension(char *extension, int mode); -char * add2file2remove_list ( char *name); - - - -FILE * vtmpfile(); -void initiate_vtmpnam (char *s); -char * vtmpnam ( char *s); -char * tmpnam_2 (char *s); -char * vremove ( char *s); -char * vremove2 ( char *s); -void error_exit (); -void clean_exit(); -void main_exit (); -int log_function (char *fname); - -void clean_function ( ); -void sig_clean_function ( int x); -char * prepare_cache ( const char *mode); -char * get_cache_dir(); -void update_cache (); -void ignore_cache(); - -FILE * vfopen ( char *name, char *mode); -FILE * vfclose (FILE *fp); -int echo ( char *string, char *fname); - -int **get_file_block_pattern (char *fname, int *n_blocks, int max_n_line); - -int token_is_in_file (char *fname, char *token); -FILE * find_token_in_file_nlines ( char *fname, FILE * fp, char *token, int n_line); -FILE * find_token_in_file ( char *fname, FILE * fp, char *token); -char * vfgets (char *buf, FILE *fp); - -FILE * set_fp_after_char ( FILE *fp, char x); -FILE * set_fp_id ( FILE *fp, char *id); -FILE * skip_commentary_line_in_file ( char com, FILE *fp); -char * strip_file_from_comments (char *com, char *in_file); - -int check_for_update ( char *web_address); -int url2file (char *address, char *out); -int wget (char *address, char *out); -int curl (char *address, char *out); - - -int simple_check_internet_connection (char *address); -int check_internet_connection (int mode); -int check_environement_variable_is_set ( char *variable, char *description, int fatal); -int check_program_is_installed ( char *program_name, char *current_path, char *path_variable, char *where2getit, int fatal); -FILE * display_output_filename ( FILE *io, char *type, char *format, char *name, int check_output); -FILE * display_input_filename ( FILE *io, char *type, char *format, char *name, int check_output); -int filename_is_special ( char *fname); -char *check_file_exists ( char *fname); -int my_mkdir ( char *dir); -int file_is_empty(char *fname); -int file_exists (char *path,char *fname); -int isexec (char *fname); -int isdir (char *fname); -int rrmdir (char *fname); -char * ls_l(char *path,char *fname); - -void create_file ( char *name); -void delete_file ( char *fname); -int util_rename ( char* from, char *to); -int util_copy ( char* from, char *to); -FILE * output_completion4halfmat ( FILE *fp,int n, int tot, int n_eports, char *s); -FILE * output_completion ( FILE *fp,int n, int tot, int n_eports, char *s); -void * null_function (int a, ...); -int btoi ( int nc,...); -/*********************************************************************/ -/* */ -/* Geometric FUNCTIONS */ -/* */ -/* */ -/*********************************************************************/ - -float get_geometric_distance ( float ** matrix, int ncoor, int d1, int d2, char *mode); -/*********************************************************************/ -/* */ -/* MATHEMATICAL FUNCTIONS */ -/* */ -/* */ -/*********************************************************************/ -double log_addN ( int N, double *L); -double log_add6 (double a, double b, double c, double d, double e, double f ); -double log_add5 (double a, double b, double c, double d, double e); -double log_add4 (double a, double b, double c, double d); -double log_add3 (double a, double b, double c); -double log_add2 (double a, double b); - -float factorial_log ( int start, int end); -float M_chooses_Nlog ( int m, int N); -double factorial ( int start, int end); -double M_chooses_N ( int m, int N); -float my_int_log(int a); -/*********************************************************************/ -/* */ -/* Fast Log Additions (adapted from Probcons)*/ -/* */ -/* */ -/*********************************************************************/ -double EXP(double x); -float LOOKUP (float x); -void LOG_PLUS_EQUALS (float *x, float y); -float LOG_ADD (float x, float y); -float LOG_ADD3 (float x1, float x2, float x3); -float LOG_ADD4 (float x1, float x2, float x3, float x4); -float LOG_ADD5 (float x1, float x2, float x3, float x4, float x5); -float LOG_ADD6 (float x1, float x2, float x3, float x4, float x5, float x6); -float LOG_ADD7 (float x1, float x2, float x3, float x4, float x5, float x6, float x7); -/////////////////////////////////////////////////////////////////////////////////////////// -// Hash function -//////////////////////////////////////////////////////////////////////////////////////////// -unsigned long hash_file(char* file); //returns the hash value for key -/////////////////////////////////////////////////////////////////////////////////////////// -// Generating lists through recirsive exploration -//////////////////////////////////////////////////////////////////////////////////////////// -int **generate_array_int_list (int len, int min, int max, int step, int *n, char *filename); -char ***generate_array_string_list (int len, char ***alp, int *alp_size, int *n, char *file, int mode); -float rates2sensitivity (int tp, int tn, int fp, int fn, float *sp, float *sn, float *sen2, float *best); -float profile2sensitivity (char *pred, char *ref, float *sp, float *sn, float *sen2, float *b); -float profile2evalue (char *pred, char *ref); -//isexec lib -#include - -unsigned long linrand(unsigned long r); -unsigned long addrand(unsigned long r); -void addrandinit(unsigned long s); - -unsigned long mult(unsigned long p,unsigned long q); - - -struct Job_TC - { - int jobid; - int status; - - struct Job_TC *c; - struct Job_TC *p; - struct Job_io_TC *io; - struct Job_control_TC *control; - - struct Job_param_TC *param; - - /*memory mangement*/ - char **pl; - int np; -}; -typedef struct Job_TC Job_TC; - -struct Job_control_TC - { - - struct Job_TC* (*submitF) (struct Job_TC*); - struct Job_TC* (*retrieveF)(struct Job_TC*); - char *mode; -}; -typedef struct Job_control_TC Job_control_TC; - -struct Job_io_TC - { - char *in; - char *out; - struct Constraint_list *CL; - struct Alignment *A; -}; -typedef struct Job_io_TC Job_io_TC; - -struct Job_param_TC -{ - char *method; - struct TC_method *TCM; - char *temp_c; - char *aln_c; - char *seq_c; - char *aln_mode; -}; -typedef struct Job_param_TC Job_param_TC; - -Job_TC* print_lib_job ( Job_TC *job,char *string, ...); -Job_TC *print_lib_job2 ( Job_TC* job, int n, char **name, char **value); - - -/*Stack Manipulation*/ -Job_TC *free_queue (Job_TC *job); -Job_TC *free_job (Job_TC *job); -Job_TC * queue2heap (Job_TC*job); -Job_TC * queue2last (Job_TC*job); -int queue2n (Job_TC*job); -Job_TC * descend_queue (Job_TC*job); -Job_TC *queue_cat (Job_TC *P, Job_TC *C); -Job_TC *delete_job (Job_TC *job); -/*Job Control*/ -struct Job_TC* submit_job ( Job_TC *job); -struct Job_TC* retrieve_job ( Job_TC *job); -Job_TC*** split_job_list (Job_TC *job, int ns); -struct Dps_result - { - int njobs; - struct Dps_job **dps_job; -}; -typedef struct Dps_result Dps_result; - -struct Dps_job - { - int JobId; - struct Constraint_list *CL; - char *input_file; - char *output_file; -}; -typedef struct Dps_job Dps_job; - -struct Dps_result *seq2list_DPS (struct Constraint_list *CL,char *method, char *aln_command, char *seq_command, char *weight, Dps_result *dps_result); -struct Constraint_list * gather_results_DPS ( Dps_result *DPS, struct Constraint_list *CL); -Dps_result *declare_dps_result ( int naln, Dps_result *dps); -#define SEQ1 0 -#define SEQ2 1 -#define R1 2 -#define R2 3 -#define WE 4 -#define CONS 5 -#define MISC 6 -#define LIST_N_FIELDS 7 -#define CLIST_TYPE int - -/*********************************************************************************************/ -/* */ -/* FUNCTIONS Typedef */ -/* */ -/*********************************************************************************************/ -typedef int (*Profile_cost_func) (int*, int *,struct Constraint_list *); -typedef int (*Col_cost_func)(Alignment*, int **, int, int*, int, int **, int, int*, int, struct Constraint_list *); -typedef int (*Pair_cost_func)(struct Constraint_list *, int, int, int, int); -typedef int (*Pwfunc) (Alignment *, int*, int **,struct Constraint_list *); - -/*********************************************************************************************/ -/* */ -/* STRUCTURES FOR PDB ANALYSIS */ -/* */ -/*********************************************************************************************/ -typedef struct - { - char *use_seqan; -} -TC_param; -typedef struct - { - char blast_server[FILENAMELEN+1]; - char db[FILENAMELEN+1]; - int min_cov; - int min_id; - int max_id; -} -Blast_param; - -typedef struct - { - int n_excluded_nb; - - float similarity_threshold; - float rmsd_threshold; - float md_threshold; - int distance_on_request; - char *comparison_io; - int print_rapdb; - float maximum_distance;/*Diameter of the bubble used to identify the Calpha Neighborhood*/ - int N_ca; /*Number of Calpha to be looked at on both side*/ - float max_delta ; /*Maximum value for delta to be positive*/ - char *local_mode; - int scale; /*Value substracted to the pdb score in the bubble mode*/ - int n_extra_param; - char **extra_param; - char *evaluate_mode; - char *color_mode; - float filter; - int filter_aln; - int irmsd_graph; - int nirmsd_graph; - - - } -Pdb_param; - -typedef struct - { - int num; - int res_num;/*Residue number from 1 to N*/ - char res[4]; - char type[4]; - float x; - float y; - float z; - } -Atom; - -typedef struct - { - - Atom*CA; - Atom *C; - Atom *N; - Atom *CB; - } -Amino_acid; - - -typedef struct - { - /*Distances used for the Neighbour mode*/ - int **nb; /*Neighbors of each Ca ( sorted by distance) given as atoms*/ - /*nb[x][0] contains the number of neighbor atoms*/ - float **d_nb; /* contains the distances between atom y=nb[x][5] and Ca x*/ - /* !!!d_nb[x][0] is empty, the array starts at +1 to folow nb*/ - int max_nb; /* Largest neigborhood*/ -} -Struct_nb; - -typedef struct - { - - int len; /*Number of Calpha Carbons*/ - int n_atom; /*Number of atoms*/ - char *name; /*Name of the sequence*/ - char *seq; /*Sequence ( Complete)*/ - Atom **structure; /*Atoms*/ - Atom **ca; /*List of pointers to the Calpha Atoms from 0 to N-1*/ - Amino_acid **peptide_chain;/*List of pointers to the Calpha Atoms from 0 to N-1*/ - - - Struct_nb *Chain; - Struct_nb *Bubble; - Struct_nb *Transversal; - - float ** ca_dist; - Pdb_param *pdb_param; -} - -Ca_trace; -/*********************************************************************************************/ -/* */ -/* MOCA: Data structure for domains and alignments */ -/* */ -/*********************************************************************************************/ -struct Moca -{ - /*Normalisation factor: value by which each constraint weight is decreased*/ - int moca_scale; - /*Functions used for domain extraction:*/ - /*Function for evaluating the score of a domain: returns 0 if not acceptable, value if OK*/ - int (*evaluate_domain)(Alignment*,struct Constraint_list *); - int moca_threshold; - - /*Function for hiding previously used residues*/ - int ** (*cache_cl_with_domain)(Alignment*, struct Constraint_list *); - int **forbiden_residues; /*List of residues already used for domain construction*/ - - - /*Function for trunkating the result into a non-overlapping alignment*/ - Alignment* (*make_nol_aln)(Alignment*, struct Constraint_list *); - - /*Parameters Coordinates of the first motif to extract*/ - int moca_start; - int moca_len; - int moca_interactive; - -}; -typedef struct Moca Moca; -/*********************************************************************************************/ -/* */ -/* CONSTRAINT LISTS */ -/* */ -/*********************************************************************************************/ -struct Distance_matrix -{ - char mode[100]; - char sim_mode[100]; - char nseq; - int **similarity_matrix; /*Pairwise ID levels: 1-10000*/ - int **score_similarity_matrix; /*Pairwise ID levels: 1-10000*/ - int **distance_matrix; /*Pairwise ID levels: 1-10000*/ -}; -typedef struct Distance_matrix Distance_matrix; -struct Constraint_list - { - /*In Case of Modif, synchronize with: - util_declare/declare_constraint_list - util_declare/cache_dp_value4constraint_list - util_declare/duplicate_constraint_list - util_declare/free_constraint_list - */ - - //Generic parameters - TC_param *TC; - - int copy_mode; - struct Constraint_list *pCL; - Sequence *S; /*Total sequences*/ - Sequence *STRUC_LIST; /*Name of the sequences with a Structure*/ - char align_pdb_param_file[FILENAMELEN+1]; - char align_pdb_hasch_mode[FILENAMELEN+1]; - - - Weights *W; /*Sequence Weights*/ - Distance_matrix *DM; /*Accurate Distance Matrix*/ - Distance_matrix *ktupDM; /*Fast Distance Matrix*/ - Fname *RunName; - - int *translation; - char ** out_aln_format; - int n_out_aln_format; - - - /*Packing Sequence: To use with domain analysis*/ - int **packed_seq_lu; - - /*DATA*/ - FILE *fp; /*File used for i/o if disk being used*/ - int *L; /*Array used for storing Lib if mem being used*/ - int **M; /*substitution matrix*/ - char rna_lib[FILENAMELEN+1]; /*name of a file containing the RNA libraries*/ - - /*List Information*/ - int ne; /*Number of elements in the list*/ - char list_name[1000]; /*Name of the list*/ - int entry_len; /*Size of an entry in el_size*/ - size_t el_size; /*Size of each elements in an entry in bytes*/ - - /*Normalisation information*/ - int normalise; - int max_ext_value; - int max_value; - int overweight; - int filter_lib; - - /*Pair wise alignment method*/ - int pw_parameters_set; - int gop; - int gep; - int f_gop; - int f_gep; - int nm_gop; - int nm_gep; - - int nomatch; - - int TG_MODE; - int F_TG_MODE; - - char dp_mode[FILENAMELEN+1]; - int reverse_seq; - int maximise; - char matrix_for_aa_group[FILENAMELEN+1]; - char method_matrix[FILENAMELEN+1]; - float diagonal_threshold; - int ktup; - int use_fragments; - int fasta_step; - int lalign_n_top; - int sw_min_dist; - char **matrices_list; - int n_matrices; - char tree_mode[FILENAMELEN+1]; - - char distance_matrix_mode[FILENAMELEN+1]; - char distance_matrix_sim_mode[FILENAMELEN+1]; - - Alignment *tree_aln; - - /*Functions used for dynamic programming and Evaluation*/ - int no_overaln; - /*1 Function for evaluating the cost of a column*/ - Col_cost_func get_dp_cost; - Profile_cost_func profile_mode; - char profile_comparison [FILENAMELEN+1]; - - /*2 Function for evaluating the cost of a pair of residues*/ - Pair_cost_func evaluate_residue_pair; - /*3 Function for making dynamic programming*/ - Pwfunc pair_wise; - - /* - int (*get_dp_cost)(Alignment*, int **, int, int*, int, int **, int, int*, int, struct Constraint_list *); - int (*evaluate_residue_pair)(struct Constraint_list *, int, int, int, int); - int (*pair_wise)(Alignment *, int*, int **,struct Constraint_list *); - */ - - int weight_field; - int max_n_pair; /*maximum number of pairs when aligning two profiles*/ - - /*Extend a sequence against itself*/ - - /*Threading parameters*/ - Blast_param *Prot_Blast; - Blast_param *Pdb_Blast; - Blast_param *DNA_Blast; - /*Split parameters*/ - int split; - int split_nseq_thres; - int split_score_thres; - /*Check Structural Status*/ - int check_pdb_status; - /*log*/ - char method_log[1000]; - char evaluate_mode[1000]; - char method_evaluate_mode[100]; -/*Parameters for domain extraction*/ - Moca *moca; -/*Functions for hiding forbiden pairs of residues*/ - int ****forbiden_pair_list; /* pair_list[S1][S2][L1][L2]=1 ->forbiden*/ - /* pair_list[S1][S2][L1][L2]=0 ->allowed*/ - /* pair_list[S1][S2][L1]=NULL ->all pairs S1L1, S2 allowed */ - /* S-> sequences, 0..N */ - /* L-> residues , 1..L-1 */ - -/*extention properties: copy*/ - int *seq_for_quadruplet; - int nseq_for_quadruplet; - -/*extention properties: Do Not copy*/ - int extend_jit; /*Extend only on request*/ - int extend_threshold; /*Do not extend pairs below the Theshold*/ - int do_self; /*Extend a sequence against itself*/ - char extend_clean_mode[100]; - char extend_compact_mode[100]; - - -/*Lookup table parameteres*/ -/*!!!!!do not copy in duplication*/ - /*Residue Index contains residue_index[nseq][seq_len][0]->number of links*/ - /*[seq][res][x ]->target seq (0->N-1)*/ - /*[seq][res][x+1]->traget res (1->len*/ - /*[seq][res][x+2]->target weight */ - /*It is automatically recomputed when L residue_indexed is set to 0*/ - int residue_indexed; - int ***residue_index; - int residue_field; - - /*Index of the pairs of sequences within L*/ - int seq_indexed; - int **start_index; - int **end_index; - int max_L_len; - int chunk; - - - - /*PDB STRUCTURE ALIGNMENTS*/ - Ca_trace ** T; /*This structure contains the PDB trace for sequences with a known Struc T[Nseq]*/ - - /*MISC*/ - int cpu; - FILE *local_stderr; - char multi_thread[100]; - char lib_list[FILENAMELEN+1]; -}; - -typedef struct Constraint_list Constraint_list; - -struct TC_method -{ - - char executable[FILENAMELEN+1]; - char executable2[FILENAMELEN+1]; - char in_flag[FILENAMELEN+1]; - char in_flag2[FILENAMELEN+1]; - char out_flag[FILENAMELEN+1]; - char aln_mode[FILENAMELEN+1]; - char out_mode[FILENAMELEN+1]; - char seq_type[FILENAMELEN+1]; - char weight[FILENAMELEN+1]; - char matrix[FILENAMELEN+1]; - int gop; - int gep; - int minid; - int maxid; - char param[1000]; - char param1[1000]; - char param2[1000]; - - Constraint_list *PW_CL; -}; -typedef struct TC_method TC_method; - -/*********************************************************************/ -/* */ -/* PRODUCE IN LIST */ -/* */ -/* */ -/*********************************************************************/ -Constraint_list *produce_list ( Constraint_list *CL, Sequence *S, char * method,char *weight,char *mem_mode); -Job_TC* method2job_list ( char *method, Sequence *S,char *weight, char *lib_list, Distance_matrix *DM, Constraint_list *CL); - -Job_TC *job_list2multi_thread_job_list (Job_TC* ojob, char *mt, Constraint_list *CL); -Job_TC *retrieve_lib_job ( Job_TC *job); -Job_TC *submit_lib_job ( Job_TC *job); -int add_method_output2method_log (char *l, char *command,Alignment *A, Constraint_list *CL, char *iofile); - -int check_seq_type (TC_method *M, char *slist,Sequence *S); -int check_profile_seq_type (Sequence *S, int i, char t); -char **method_list2method4dna_list ( char **list, int n); -int is_in_pre_set_method_list (char *fname); -char *** display_method_names (char *mode, FILE *fp); - -char *method_name2method_file (char *method); -char *make_aln_command(TC_method *m, char *seq, char *aln); -struct TC_method* method_file2TC_method ( char *fname); -char *method_file_tag2value (char *method, char *tag); -int TC_method2method_file( struct TC_method*, char *fname ); -/*********************************************************************/ -/* */ -/* WRITE IN LIST */ -/* */ -/* */ -/*********************************************************************/ -int dump_constraint_list (Constraint_list *CL); -int vread_clist ( Constraint_list *CL, int a, int b ); -int vwrite_clist ( Constraint_list *CL, int a, int b, CLIST_TYPE x); -Constraint_list *index_constraint_list ( Constraint_list *CL); -Constraint_list *index_res_constraint_list ( Constraint_list *CL, int field); -Constraint_list * progressive_index_res_constraint_list ( Alignment *A, int *ns, int **ls, Constraint_list *CL); -char ** reindex_constraint_list (char **profile, int np,char **list, int *inL, Sequence *S); -/*********************************************************************/ -/* */ -/* ENTRY MANIPULATION */ -/* */ -/* */ -/*********************************************************************/ -Constraint_list * add_list_entry2list (Constraint_list *CL, int n_para, ...); -Constraint_list * evaluate_constraint_list_reference ( Constraint_list *CL); -Constraint_list *add_entry2list ( CLIST_TYPE *entry, Constraint_list *CL); -Constraint_list *insert_entry2list ( CLIST_TYPE *entry, int pos,Constraint_list *CL); -CLIST_TYPE* extract_entry(CLIST_TYPE * entry, int pos, Constraint_list *CL); -/*********************************************************************/ -/* */ -/* LIST EXTENTION */ -/* */ -/* */ -/*********************************************************************/ -Constraint_list *extend_list_pair (Constraint_list *CLin,char *store_mode, int s1, int s2); -Constraint_list *extend_list (Constraint_list *CLin, char *store_mode,char *clean_mode, char *compact_mode,int do_self, Sequence *SUBSET); -void get_bounds (Constraint_list *CL, int s1, int s2, int *start, int *end); -int ** fill_pos_matrix (Constraint_list *CL, int beg, int end, int slen, int **pos, int *len, int mirrored); - -/*********************************************************************/ -/* */ -/* SEARCH IN LIST (ARRAY AND FILE) */ -/* */ -/* */ -/*********************************************************************/ -FILE * compare_list (FILE *OUT, Constraint_list *CL1,Constraint_list *CL2); -//CLIST_TYPE *search_in_list_constraint(int *key, int k_len, int **L, int ne, int ***start_index, int ***end_index); -CLIST_TYPE *main_search_in_list_constraint ( int *key,int *p,int k_len,Constraint_list *CL); -Constraint_list *sort_constraint_list_inv (Constraint_list *CL, int start, int len); -Constraint_list *invert_constraint_list (Constraint_list *CL, int start,int len); -Constraint_list * sort_constraint_list (Constraint_list *CL, int start, int len); -Constraint_list * sort_constraint_list_on_n_fields (Constraint_list *CL, int start, int len, int first_field, int n_fields); - -/*********************************************************************/ -/* */ -/* INPUT/OUTPUT */ -/* */ -/* */ -/*********************************************************************/ -Constraint_list* read_n_constraint_list(char **fname,int n_list, char *in_mode,char *mem_mode,char *weight_mode,char *type, FILE *local_stderr, Constraint_list *CL, char *seq_source); -Constraint_list* read_constraint_list(Constraint_list *CL,char *fname,char *in_mode,char *mem_mode,char *weight_mode); -Constraint_list * read_constraint_list_raw_file(Constraint_list *CL, char *fname); - -int read_cpu_in_n_list(char **fname, int n); -int read_seq_in_list ( char *fname, int *nseq, char ***sequences, char ***seq_name); - -Sequence * read_seq_in_n_list(char **fname, int n, char *type, char *SeqMode); - -int read_cpu_in_list ( char *fname); -int ** read_list ( char *fname, int **list,int *ne, int *nseq, int *cpu, char ***sequences, char ***seq_name); - -char * expand_constraint_list_file ( char *file); -Constraint_list * read_constraint_list_file(Constraint_list *CL, char *fname); -Constraint_list * fast_read_constraint_list_file(Constraint_list *CL, char *fname); - -/*********************************************************************/ -/* */ -/* EXTENDED LIST OUTPUT */ -/* */ -/* */ -/*********************************************************************/ -FILE * save_extended_constraint_list ( Constraint_list *CL, char *mode, FILE *fp) ; -FILE * save_extended_constraint_list_pair ( Constraint_list *CL, char *mode, char* seq1, char * seq2,FILE *fp); - -/*********************************************************************/ -/* */ -/* LIST OUTPUT */ -/* */ -/* */ -/*********************************************************************/ -int constraint_list2raw_file ( Constraint_list *CL, char *fname, char *fmode); -FILE * save_raw_constraint_list ( FILE *fp,Constraint_list *CL, int start,int len, int *translation); -FILE * save_constraint_list ( Constraint_list *CL,int start, int len, char *fname, FILE *fp,char *mode,Sequence *S); -FILE * save_sub_list_header ( FILE *OUT, int n, char **name, Constraint_list *CL); -FILE * save_list_header ( FILE *OUT,Constraint_list *CL); -FILE * save_list_footer (FILE *OUT,Constraint_list *CL); -FILE * save_constraint_list_ascii ( FILE *OUT,Constraint_list *CL, int start,int len, int *translation); -FILE * save_constraint_list_bin ( FILE *OUT,Constraint_list *CL, int start,int len, int *translation); - -/*********************************************************************/ -/* */ -/* LIST CONVERTION */ -/* */ -/* */ -/*********************************************************************/ -Constraint_list * shrink_constraint_list (Constraint_list *CL); -Constraint_list * relax_constraint_list (Constraint_list *CL); -Constraint_list * filter_constraint_list (Constraint_list *CL, int field, int T); -int constraint_list_is_connected ( Constraint_list *CL); - -int *seqpair2weight (int s1, int s2, Alignment *A,Constraint_list *CL, char *weight_mode, int *weight); -Constraint_list *aln_file2constraint_list (char *alname, Constraint_list *CL,char *weight_mode); -Constraint_list *aln2constraint_list (Alignment *A, Constraint_list *CL,char *weight_mode); - -double **list2mat (Constraint_list *CL,int s1,int s2, double *min, double *max); -Constraint_list * constraint_list2bin_file(Constraint_list *clist); -FILE * bin_file2constraint_list ( Constraint_list *CL, FILE *fp, char *name); - -int **list2residue_total_weight ( Constraint_list *CL); -int **list2residue_total_extended_weight ( Constraint_list *CL); -int **list2residue_partial_extended_weight ( Constraint_list *CL); -/*******************************************************************************************/ -/* */ -/* */ -/* clean functions */ -/* */ -/* */ -/* */ -/*******************************************************************************************/ -Constraint_list *clean ( char *clean_mode,Constraint_list *C,int start, int len); -Constraint_list * clean_shadow ( Constraint_list *CL, int start, int len); - -/*********************************************************************/ -/* */ -/* LIST FUNCTIONS */ -/* */ -/* */ -/*********************************************************************/ -Constraint_list *merge_constraint_list ( Constraint_list *SL, Constraint_list *ML, char *mode); -CLIST_TYPE return_max_constraint_list ( Constraint_list *CL, int field); -Constraint_list *modify_weight( Constraint_list *CL,int start, int end, char *modify_mode); -Constraint_list *compact_list (Constraint_list *CL, int start, int len, char *compact_mode); -Constraint_list *rescale_list_simple (Constraint_list *CL,int start, int len,int new_min, int new_max); -Constraint_list *rescale_list (Constraint_list *CL,int start, int len,int max1, int max2); -Constraint_list* filter_list (Constraint_list *CL, int start, int len,int T); -Constraint_list *undefine_list (Constraint_list *CL); -int ** seq2defined_residues ( Sequence *S, Constraint_list *CL); -int ** aln2defined_residues ( Alignment *A, Constraint_list *CL); -/*********************************************************************/ -/* DEBUG */ -/* */ -/*********************************************************************/ -void print_CL_mem(Constraint_list *CL, char *function); -int constraint_list_is_sorted ( Constraint_list *CL); -void check_seq_pair_in_list(Constraint_list *CL,int seq1, int seq2); -/******************************************************************/ -/* NEW METHODS */ -/* */ -/* */ -/******************************************************************/ - -Constraint_list * align_coding_nucleotides (char *seq, char *method, char *weight, char *mem_mode, Constraint_list *CL); -/*********************************************************************************************/ -/* */ -/* FUNCTION FOR PRUNING THE LIST */ -/* */ -/*********************************************************************************************/ -char * list2prune_list (Sequence *S, int **sm); -/*********************************************************************************************/ -/* */ -/* FUNCTION FOR WEIGHTING THE LIST */ -/* */ -/*********************************************************************************************/ -Constraint_list *weight_constraint_list(Constraint_list * CL, char *seq_weight); -Weights* compute_t_coffee_weight(Constraint_list * CL); -Constraint_list *re_weight_constraint_list(Constraint_list * CL,Weights *W); - - -Distance_matrix *cl2distance_matrix (Constraint_list *CL, Alignment *A, char *mode, char *sim_mode, int print); -Distance_matrix *seq2distance_matrix (Constraint_list *CL, Alignment *A, char *mode, char *sim_mode, int print); - -/*********************************************************************************************/ -/* */ -/* MULTI_THREAD */ -/* */ -/*********************************************************************************************/ -int run_multi_thread_file (char *fname, char *config); -/*********************************************************************/ -/* */ -/* RNA FUNCTIONS */ -/* */ -/* */ -/*********************************************************************/ -char * seq2rna_lib ( Sequence *S, char *name); -Constraint_list *read_rna_lib ( Sequence *S, char *fname); -Constraint_list *rna_lib_extension ( Constraint_list *CL, Constraint_list *R); -char *** produce_method_file ( char *method); -typedef struct - { - int p1; - int p2; - int p3; - int p4; - int t; - int f; - char mode[20];//lower, unalign - char model[20];//fsa1 fsa2 -} -OveralnP; - -//RNA - -int ** alifold_list2cov_list (Alignment *A, int **list); -int ** update_RNAfold_list (Alignment *A, int **pos, int s, int **l); -int ** vienna2list ( char *seq); -Alignment *compare_RNA_fold ( Alignment *A, Alignment *B); - -Alignment *alifold2analyze (Alignment *A, Alignment *ST, char *mode); -Alignment *alifold2cov_aln (Alignment *A, int **l, int ug); -Alignment *alifold2cov_stat (Alignment *A, int **l, int ug); -Alignment *alifold2cov_list (Alignment *A, int **l, int ug); -Alignment *alifold2cov_cache (Alignment *inA, int **l, int ug); - - -Alignment *add_alifold2aln (Alignment *A, Alignment *ST); -Alignment *aln2alifold(Alignment *A); - -//end -Alignment * aln2bootstrap (Alignment *A, int n); -Alignment * aln2sample (Alignment *A, int n); -Alignment * aln2random_aln (Alignment *A, char *mode); -Alignment *aln2scale (Alignment *A, char *offset); -Alignment* aln2case_aln (Alignment *A, char *upper, char *lower); -Alignment*aln2gap_cache (Alignment *A, int val); -Alignment *score_aln2score_ascii_aln (Alignment *A, Alignment *C); -int **aln2resindex ( Alignment *A, Alignment *B, FILE *fp); -int **index_seq_res ( Sequence *S1, Sequence *S2, int **name_index); -int **index_seq_name ( Sequence *S1, Sequence *S2); -int *get_name_index (char **l1, int n1, char **l2, int n2); - -int* get_res_index (char *seq1, char *seq2); -int * pos2list (int * pos, int len, int *nl); - int *list2pos (int *list, int nl, int len); - - -int change_residue_coordinate ( char *in_seq1, char *in_seq2, int v); - -int ** minimise_repeat_coor (int **coor, int nseq, Sequence *S); -int ** get_nol_seq( Constraint_list *CL,int **coor, int nseq, Sequence *S); - - -int compare_pos_column( int **pos1,int p1, int **pos2,int p2, int nseq); - - - -char * seq2alphabet (Sequence *S); -char *aln2alphabet (Alignment *A); -char *array2alphabet (char **array, int n, char *forbiden); - -//TM Predictions -char* alnpos2hmmtop_pred (Alignment *A, Alignment *Pred, int pos, int mode); -Alignment * aln2hmmtop_pred (Alignment *A); -char * seq2tmstruc ( char *seq); - -char * set_blast_default_values(); -char * seq2pdb ( Sequence *S); -Alignment * seq2blast ( Sequence *S); - -Sequence * seq2unique_name_seq ( Sequence *S); -Alignment * aln2unique_name_aln ( Alignment *S); -int name_list2unique_name_list (int n, char **name); -Sequence *seq2clean_seq ( Sequence *S, char *alp);//remove all alp characters from seq - - -int ** seq2aln_pos (Alignment *A, int *n, int **ls); -Alignment *padd_aln ( Alignment *A); -char **padd_string ( char **string, int n,char pad); - -Alignment *local_maln2global_maln (char *seq, Alignment *A); - -Alignment * seq2profile (Sequence *S, int index); - -Sequence *remove_empty_sequence (Sequence *S); -Alignment * aln2profile (Alignment * A); -Alignment * aln2collapsed_aln (Alignment * A, int n, char **string); -Alignment* aln2sub_aln_file (Alignment *A, int n, char **string); -Alignment* aln2sub_seq (Alignment *A, int n, char **string); - -int ** aln2inv_pos (Alignment *A); -int * seq2inv_pos ( char *seq); -int ** aln2pos_simple (Alignment *A, int n_nseq, ...); -int ** aln2pos_simple_2 (Alignment *A); -Alignment ** split_seq_in_aln_list ( Alignment **aln, Sequence *S, int l_seq, char **seq_list); - -Sequence * fill_sequence_struc ( int nseq, char **sequences, char **seq_name); - -int seq_list2in_file ( TC_method *M, Sequence *S, char *list, char *file); -int seq_list2fasta_file( Sequence *S, char *list, char *file); -Structure * seq2struc ( Sequence *S, Structure *ST); -Alignment *strings2aln (int nseq,...); - -Alignment * seq2aln ( Sequence *S, Alignment *A,int rm_gap); -Alignment *seq_coor2aln ( Sequence *S, Alignment *A, int **coor, int nseq); - -Alignment *stack_aln (Alignment *A, Alignment *B); -Alignment *chseqIaln(char *name, int seq_n, int start,int len,Sequence *S, int seqIaln, Alignment *A); - - -char *dna_aln2cons_seq ( Alignment *A); -char *aln2cons_seq ( Alignment *A, int ns, int *ls, int n_groups, char **group_list); -char *aln2cons_maj ( Alignment *A, int ns, int *ls, int n_groups, char **group_list); -Alignment *aln2conservation ( Alignment *A, int threshold,char *seq); - -char *sub_aln2cons_seq_mat ( Alignment *A,int ns, int *ls, char *mat_name); -char *aln2cons_seq_mat ( Alignment*A, char *mat_name); -Alignment *aln2short_aln( Alignment *A, char *list, char *new, int spacer); -Sequence *keep_residues_in_seq ( Sequence *S,char *list, char replacement); -Alignment *keep_residues_in_aln ( Alignment *A,char *list, char replacement); -Alignment *filter_keep_residues_in_aln ( Alignment *A,Alignment *ST, int use_cons, int value, char *list, char replacement); - -Alignment *aln_convert (Alignment *A, Alignment *ST, int use_cons, int value,int n, ...); -Alignment *aln2number (Alignment *A); -Alignment * filter_aln ( Alignment *A, Alignment *ST, int value); -Alignment * filter_aln_lower_upper ( Alignment *A, Alignment *ST,int use_cons, int value); -Alignment * filter_aln_upper_lower ( Alignment *A, Alignment *ST, int use_cons,int value); -Alignment * filter_aln_switchcase ( Alignment *A, Alignment *ST, int use_cons, int value); - -Alignment * STseq2STaln ( Alignment *A, Alignment *ST); -Alignment * merge_annotation ( Alignment *A, Alignment *ST, char *seq); -Alignment * filter_aln_convert ( Alignment *A, Alignment *ST, int use_cons,int value, int n_symbol,char** symbol_list); -int aln2ngap (Alignment *A); - -int * count_in_aln ( Alignment *A, Alignment *ST, int value, int n_symbol,char **symbol_list, int *table); -void count_misc (Alignment*A, Alignment *B); - -Alignment * trim_aln_with_seq ( Alignment *S, Alignment *P); -Alignment * add_align_seq2aln ( Alignment *A, char *seq, char *seq_name); -Sequence * aln2seq ( Alignment *A); -Sequence * aln2seq_main ( Alignment *A, int mode); -Alignment * thread_profile_files2aln (Alignment *A, char *template_file, Fname *F); -Alignment * expand_aln (Alignment *A); -Alignment * aln2expanded_aln (Alignment *A); -Alignment * expand_number_aln (Alignment *A,Alignment *EA); -Alignment * remove_gap_column ( Alignment *A, char *mode); -Alignment* ungap_sub_aln ( Alignment *A, int nseq, int *ls); -Sequence * ungap_seq ( Sequence *A); -Alignment * insert_gap_col (Alignment *A, int p, int l); -Alignment * unalign_residues (Alignment *A, int i1, int i2); -Alignment * unalign_aln (Alignment *A, Alignment *C, int t); -Alignment * unalign_aln_pos (Alignment *A, int s, int p, int l); - -Alignment *degap_aln (Alignment *A); - -Alignment * ungap_aln_n ( Alignment *A, int n); -Alignment * ungap_aln ( Alignment *A); -void compress_aln ( Alignment *A); -Alignment* condense_aln (Alignment *A); - -Alignment * probabilistic_rm_aa ( Alignment *A, int pos, int len); -Alignment * aln_gap2random_aa(Alignment *A); -Alignment * make_random_aln(Alignment *A,int nseq, int len, char *alphabet); -Alignment * add_random_sequence2aln( Alignment *A, char *alphabet); - -int ** trim_aln_borders ( char **seq1, char **seq2, int nseq); -Sequence * trim_aln_seq ( Alignment *A, Alignment *B); -Sequence * trim_aln_seq_name ( Alignment *A, Alignment *B); -Sequence *get_defined_residues( Alignment *A); - - -Alignment *thread_defined_residues_on_aln ( Alignment *A, Sequence *S1); -Sequence *seq2number (Sequence *S); -Sequence * merge_seq ( Sequence *IN, Sequence *OUT); -char * seq_name2coor ( char *s, int *start, int *end, char sep); -Alignment *seq_name2removed_seq_name(Sequence *S, Alignment *NA, float **diff); -int seq_name2index (char *name, Sequence *S); - -Sequence *extract_one_seq(char *n,int start, int end, Alignment *S,int keep_name); -Sequence * extract_sub_seq( Sequence *COOR, Sequence *S); - - -Sequence * add_prf2seq (char *alnfile, Sequence *S); -int prf_in_seq ( Sequence *S); -Sequence * add_sequence ( Sequence *IN, Sequence *OUT, int i); -Sequence * trim_seq ( Sequence *A, Sequence *B); -Sequence * reorder_seq ( Sequence *A, char **name, int nseq); -Sequence * reorder_seq_2 ( Sequence *A, int **name,int field, int nseq); - -char * concatenate_seq ( Sequence *S, char *conc, int *order); -Sequence * swap_header ( Sequence *S, Sequence *H); - -Alignment *aln2jacknife (Alignment *A, int nseq, int len); -char ** name2random_subset (char **in_name, int n_in, int n_out); -Alignment * aln2random_order ( Alignment *A); -Alignment * aln2scramble_seq ( Alignment *A); - -Alignment * reorder_aln ( Alignment *A, char **name, int nseq); - -char ** rm_name_tag (char **name, int nseq, char *tag); - -/******************************************************************************/ -/* TEMPLATE MANAGEMENENT */ -/******************************************************************************/ -char * string_contains_template_tag (char *string); -Sequence * seq2template_type(Sequence *Seq); - -Sequence * vremove_seq_template_files (Sequence *S); -Sequence * display_seq_template_files (Sequence *S); -Sequence * handle_seq_template_file (Sequence *S, char *mode); -int handle_X_template_files ( X_template *T, char *mode); - - -Sequence * seq2template_seq ( Sequence *S, char *template_file, Fname *F); -char * seq2template_file (Sequence *S, char *file); -int seq2template_file2 (Sequence *S, char *file, char *mode); - -Sequence * profile_seq2template_seq ( Sequence *S, char *template_file, Fname *F); -int seq2n_X_template ( Sequence *S, char *type); - -struct X_template *fill_X_template (char *name, char *p, char *type); -FILE * display_seq_template (Sequence *S, FILE *io); -char *template_type2type_name (char *type); -char *template_type2short_type_name (char *type); - - -FILE * display_sequence_templates ( Sequence *S, int i, FILE *io); -FILE * display_X_template (struct X_template *X, FILE *io); - -struct X_template* free_X_template ( struct X_template *X); - -struct X_template *fill_P_template (char *name, char *p, Sequence *S); -struct X_template *fill_F_template (char *name, char *p, Sequence *S); -struct X_template *fill_S_template ( char *name,char *p, Sequence *S); -struct X_template *fill_R_template (char *name, char *p, Sequence *S); -struct X_template *fill_G_template (char *name, char *p, Sequence *S); -struct X_template *fill_T_template (char *name, char *p, Sequence *S); -struct X_template *fill_E_template (char *name, char *p, Sequence *S); -struct X_template *fill_U_template (char *name, char *p, Sequence *S); - -char *seq2T_value ( Sequence *S, int i, char *param_name, char *template_type); -char *profile2P_template_file (Sequence *S, int n); -Alignment * seq2R_template_profile (Sequence *S, int n); -char *seq2P_pdb_id (Sequence *S, int n); -char * seq2P_template_file (Sequence *S, int n); -char * seq2T_template_string (Sequence *S, int n); -char * seq2E_template_string (Sequence *S, int n); -int * seq2U_template (Sequence *S, int n); - -struct X_template * seq_has_template ( Sequence *S, int n, char *type); - -/******************************************************************************/ -/* ALIGNMENT MANIPULATION */ -/******************************************************************************/ - -char *aln_column2string (Alignment *A, int p); -Alignment * fix_aln_seq ( Alignment *A, Sequence *S); -Alignment * rotate_aln ( Alignment *A, char *name); -Alignment * invert_aln ( Alignment *A); -char * complement_string (char *s); -Alignment * complement_aln ( Alignment *A); -Alignment * extract_nol_local_aln( Alignment *A, int start, int max_end); -Alignment * aln2block (Alignment *A, int start, int end, Alignment *B); -Alignment * alnpos2block (Alignment *A, int*pos, Alignment *B); - -Alignment * extract_aln ( Alignment *A, int start, int end); -Alignment * extract_aln2 ( Alignment *A, int start, int end, char *seq_name); -Alignment * extract_aln3 ( Alignment *A, char *filename); -Alignment * alnpos_list2block (Alignment *A, int n, char **in_list); - -Alignment * trunkate_local_aln ( Alignment *A); -int get_nol_aln_border ( Alignment *A, int start, int direction); -Alignment ** trim_local_aln ( Alignment *A, int **List, int ne, int **residue_list, Sequence *S); - -Alignment * aln_cat ( Alignment *A, Alignment *B); -Alignment * concatenate_aln ( Alignment *A, Alignment *B, char *sep); -char * extract_defined_seq ( char *in, int in_of, int in_start, int *aa_def, int dir, int *out_start, char *out_seq); -int verify_aln ( Alignment *A, Sequence *S, char * error); -Alignment * remove_end (Alignment *A); - -Alignment *adjust_est_aln ( Alignment *PW, Alignment *M, int s); -Alignment * rename_seq_in_aln (Alignment *A, char ***list); -Sequence * rename_seq_in_seq (Sequence *A, char ***list); -/********************************************************************/ -/* */ -/* FLOAT SIMILARITIES */ -/* */ -/* */ -/* */ -/********************************************************************/ -float get_seq_fsim ( char *string1, char *string2, char *ignore, char *similarity_groups, int **matrix, int mode); -float get_seq_fsim2 ( char *string1, char *string2, char *ignore, char *in_mode); -float ** get_fsim_aln_array ( Alignment *A, char *mode); -/********************************************************************/ -/* */ -/* ALIGNMENT ANALYSES */ -/* */ -/* */ -/* */ -/********************************************************************/ -int **sim_array2dist_array ( int **p, int max); -int **dist_array2sim_array ( int **p, int max); -int **normalize_array (int **p, int max, int norm); - -int aln2most_similar_sequence ( Alignment *A, char *mode); -int aln2coverage ( Alignment *A, int ref_seq); - -double aln2entropy (Alignment *A, int *in_ls, int in_ns, float gap_threshold); -int sub_aln2sim ( Alignment *A, int *ns, int **ls, char *mode); -int sub_aln2max_sim ( Alignment *A, int *ns, int **ls, char *mode); -int aln2sim ( Alignment *A, char *mode); -int seq2idscore_sim ( char *seq1, char *seq2); - -int aln_is_aligned ( Alignment *A); -int* get_cdna_seq_winsim ( int *cache, char *string1, char *string2, char *ignore, char *mode, int *w); -int get_cdna_seq_sim ( int *cache, char *string1, char *string2, char *ignore, char *mode); - -int seq2aln2sim (char *seq1, char *seq2, char *mode_aln, char *mode_id); -int* get_seq_winsim( char *string1, char *string2, char *ignore, char *mode, int *w); -int get_seq_sim ( char *string1, char *string2, char *ignore, char *mode); -int get_seq_sim_2 ( char *string1, char *string2, char *ignore, char **gr, int ng); -int get_seq_sim_3 ( char *string1, char *string2, char *ignore, int **mat); - - -int *** get_winsim_aln_array ( Alignment *A, char *mode, int ***w); -int ** get_sim_master_aln_array ( Alignment *A,int n, char *mode); - -int ** seq2sim_mat (Sequence *S, char *mode); -int ** seq2cov_mat (Sequence *S, char *mode); -int ** seq2comp_mat (Sequence *S, char *mode, char *comp_mode); - -int logid_score (int sim, int len); -int ** fast_aln2sim_mat (Alignment *A, char *mode); -int ** fast_aln2sim_list (Alignment *A, char *mode, int *ns, int **ls); - -int ** aln2sim_mat (Alignment *A, char *mode); -int **aln2cov (Alignment *A); -int ** get_dist_aln_array ( Alignment *A, char *mode); -int ** get_raw_sim_aln_array ( Alignment *A, char *mode); -int ** get_sim_aln_array ( Alignment *A, char *mode); -int generic_get_seq_sim ( char *seq1, char *seq2, int *cache, char *mode); -Alignment * grep_seq (Alignment *S,char *field, char *mode, char *string); -Alignment* modify_seq (Alignment *S,char *field, char *string1, char *string2); - -Sequence * seq2filter (Sequence *S_in, int min, int max); -int ** get_cov_aln_array ( Alignment *A, char *mode); -int ** get_cov_master_aln_array ( Alignment *A,int n, char *mode); - - -int * get_aln_col_weight ( Alignment *A, char *mode); -int analyse_aln_column ( Alignment *B, int col); - -int sub_aln2nseq_prf ( Alignment *A, int ns, int *ls); -int **aln2count_mat (Alignment *A); -int **sub_aln2count_mat2 (Alignment *A, int ns, int *ls); -int **sub_aln2count_mat3 (char **al, int n); -int **aln2count_mat2 (Alignment *A); -char *aln2random_seq (Alignment *A, int noise1, int noise2, int noise3, int gap_noise); - -Alignment * master_trimseq( Alignment *A, Sequence *S,char *mode); -Alignment * trimseq( Alignment *A, Sequence *S, char *mode); -Alignment *simple_trimseq (Alignment *A,Alignment*K, char *mode, char *seq); -Alignment *sim_filter (Alignment *A, char *in_mode, char *seq_list); - -float ** get_weight ( Alignment *A, Sequence *S, char *mode); -float **seq2pwsim ( Alignment *A, Sequence *S, char *mode); -Alignment * trimseq( Alignment *A, Sequence *S,char *mode); -Alignment * tc_trimseq( Alignment *A, Sequence *S,char *mode); -Alignment* seq2subseq3( Alignment *A, Sequence *S,int use_aln, int lower_sim,int upper_sim, int min_nseq, int trim_direction, char *weight_mode, float ***sim_weight, int *seq_list); -Alignment* seq2subseq2( Alignment *A, Sequence *S,int use_aln, int lower_sim,int upper_sim, int max_nseq, int trim_direction, char *weight_mode, float ***weight_table, int *seq_list); -float extreme_seq (int direction, Alignment *A,float **sim_weight,int *seq_list, int *seq_index); - - -Alignment* seq2subseq1( Alignment *A, Sequence *S,int use_aln, int percent,int max_nseq,int max_diff, char *weight_mode); -/********************************************************************/ -/* */ -/* AMINO ACID FUNCTIONS */ -/* */ -/* */ -/* */ -/********************************************************************/ -char** string2alphabet (char *string, int depth, int *falp_size); -int is_in_same_group_aa ( char r1, char r2, int n_group, char **gl, char *mode); -int find_group_aa_distribution (char *col, int nseq,int n_group, char **gl, int *distrib, char *mode ); -char** make_group_aa (int *ngroup, char *mode); -char** make_group_aa_upgma (char *mat, int max_size); - - -char * test_gene2prot (Constraint_list *CL, int s1); -Alignment* gene2prot (Alignment *A); -Alignment * dna_aln2_3frame_cdna_aln(Alignment *A,int *ns,int **l_s); - -int ** get_sim_aln_array_normal_distribution ( Alignment *A, char *mode, int *STD, int *CENTER); -double normal(double x, double mean, double std); -int generic_get_seq_sim_normal_distribution ( char *seq1, char *seq2, int*cache, char *mode, int *STD, int *CENTER); -int get_seq_sim_distribution ( char *string1, char *string2, char *ignore, char *in_mode, int *STD, int *CENTER); - -Alignment *aln2clean_pw_aln (Alignment *A,OveralnP *F); -char **pw_aln2clean_pw_aln (char ** aln,OveralnP *F); -int * pw_aln2clean_aln_weight ( char *seq1, char *seq2, int w, OveralnP *F); - -float* aln2pred ( Alignment *A, Alignment*B, char *mode); -float* analyze_overaln ( Alignment *A, Alignment *B, char *mode, int f,int p1,int p2, int p3,int filter); - - -Alignment * mark_exon_boundaries (Alignment *A, Alignment *E); - -struct orp -{ - char name[100]; - char mode[100]; - int ncomp; - int nseq; - int len; - - Alignment *A; - Alignment *P; - Alignment *S; - - int *pos; - char ***motif; - float sp; - float sn; - float sen2; - float best; - int tp; - int tn; - int fp; - int fn; - - int offset; - float evalue; - struct orp *PR; -}; - -typedef struct orp ORP; - -typedef Alignment * (*filter_func) (Alignment *, Alignment*, int,int, char *); -/************************************************************************************/ -/* ALIGNMENT ANALYZE : SAR */ -/************************************************************************************/ -int display_simple_sar_analyze_pair_col (Alignment *A, Alignment *SAR, char *mode); -int **simple_sar_analyze_pair_col ( Alignment *inA, Alignment *SAR, char *mode); -int ***simple_sar_predict ( Alignment *inA, Alignment *SAR, char *mode); -int display_simple_sar_analyze_col ( Alignment *inA, Alignment *SAR, char *mode); -Alignment *sar_analyze4 (Alignment *A, Alignment *SAR, char *name);/*28/08/06*/ -Alignment *sar_analyze3 (Alignment *A, Alignment *SAR, char *name); -Alignment *sar_analyze2 (Alignment *A, Alignment *SAR, char *name); -Alignment *sar_analyze (Alignment *A, Alignment *SAR, char *name); -int aln2sar_column_list ( Alignment *A, char *filter); -float get_sar_sim (char *seq1, char *seq2); -float get_sar_sim2 (char *seq1, char *seq2); -Alignment *aln2weighted_sar_score ( Alignment *A,Alignment *B, char *weight_file, char *compound); -float seq2weighted_sar_score ( char *seq, int **weight); - -int sarset2subsarset ( Alignment *A, Alignment *S, Alignment **subA, Alignment **subS, Alignment *SUB); -int sar2subsar (Alignment *A, Alignment *S, Alignment **subA, Alignment **subS, char **slist, int nl); -int sar2subsar_file ( Alignment *A, Alignment *S, char *aln, char *sar); - -Alignment *weight2sar (Alignment *A, Alignment *SAR, char *weight_file, int limit); -Alignment * sar2simpred (Alignment *A, Alignment *SAR, char *pos, char *compound, int L,int U ); -Alignment * sar2simpred2 (Alignment *A, Alignment *SAR, char *seqlist, char *posfile, char *compound, int L1 ); - -Alignment *display_sar ( Alignment *A, Alignment *SAR, char *compound); -NT_node sar2tree (Alignment *A, char *mode); -/************************************************************************************/ -/* ALIGNMENT ANALYZE : SAR FOR OR */ -/************************************************************************************/ - -Alignment * or_scan (Alignment *A, Alignment *B, char *param); -Alignment * or_sar (Alignment *A, Alignment *B, char *param, int print); -ORP * or_loo ( Alignment *inA, Alignment *inS, char *mode, int *pos,int print); - - -ORP * combine_n_predictions (ORP **R,Alignment *A, Alignment *B); -ORP* combine_2_predictions ( ORP *IN, ORP *TO,Alignment *A, Alignment *B); -ORP * display_or_summary (ORP *CP, char *mode, FILE *fp, int print); - -Alignment * or_comp_loo ( Alignment *inA, Alignment *inS, char *mode, int *pos,int print); -int * or_comp_pos ( Alignment *inA, Alignment *inS, char *mode,int print); -float or_id_evaluate ( Alignment *A, Alignment *S, char *mode, int *pos, int print); -char* or_id_evaluate2 ( Alignment *A, Alignment *S, char *mode, int *pos, int print, float *score); -float or_loo_evaluate ( Alignment *A, Alignment *S, char *mode, int *pos, int print); -float or_loo_evaluate2 ( Alignment *A, Alignment *S, char *mode, int *pos, int print); - -Alignment * or_test ( Alignment *inA, Alignment *inS, char *mode); -Alignment * or_jack(Alignment *A, Alignment *S, char *param); -Alignment * or_predict(Alignment *A, Alignment *S, char *mode); -Alignment * or_aln2pos_aln (Alignment *A, Alignment *S, char *mode); -ORP* or_self_predict(Alignment *inA, Alignment *inS, char *mode, int *pos, int print); -Alignment * or_sim(Alignment *A, Alignment *S, char *mode); - -Alignment *display_pos (Alignment *A, Alignment *B, int *pos, char *mode); - -float evaluate_prediction (Alignment *R, Alignment *P, char *mode, int print); -ORP* new_evaluate_prediction (ORP *P, char *mode, int print); - -Alignment * aln2prediction (Alignment *A,char ***motif, int *pos); -int * aln2predictive_positions (Alignment *A, Alignment *B, char *mode, int print); -int * aln2predictive_positions_mat (Alignment *A, Alignment *B, char *mode, int print); -int * aln2predictive_positions_scan (Alignment *A, Alignment *B, char *mode, int print); -char *** compounds2motifs (Alignment *A, Alignment *B, int *pos, int depth, char *mode, int print); -char ** compound2motif (Alignment *A, Alignment *B, int *pos, int depth, int c, char *mode, int print); -double pos2sim (Alignment *A, Alignment *B, int *pos); -double sar_aln2r (Alignment *A, Alignment *B, int *pos, int print); -double sar_aln2delta (Alignment *A, Alignment *B, int *pos, int print); -Alignment * jack_sar(Alignment *A, Alignment *S, char *param); -Alignment *set_sar (Alignment *A, Alignment *S, char *param); -char * get_compound_name (int c, char *mode); -Alignment *get_prediction_target (Alignment *A, Alignment *S, char *param); -int * file2pos_list (Alignment *A, char *posfile); -ORP * declare_or_prediction ( int ncomp, int nseq, int len); -void free_orp_list ( ORP**P); -void free_orp ( ORP*P); -double evaluate_sar_score1 ( int len, int n11, int n1a, int n1b); -double evaluate_sar_score2 ( int len, int n11, int n1a, int n1b); - -Sequence * compare_sar_sequence( Sequence *S1, Sequence *S2, int depth); -Constraint_list * mask_list_with_aln (Alignment *A,int start, int len,Constraint_list *CL, int new_value); -Constraint_list* mask_list_with_aln_pair (Alignment *A,int start, int end,Constraint_list *CL,int new_value); -Constraint_list *mask_entry( Constraint_list *CL, int p, int new_value); -Constraint_list *prepare_list_and_seq4sw(Constraint_list *I, int n_seq, char **seq_name); -int ** get_undefined_list (Constraint_list *CL); -int is_never_undefined (Constraint_list *CL,int r); -int* do_analyse_list ( Constraint_list *CL); - - - -void print_list(Constraint_list *CL); -void print_pair (Constraint_list *CL,int p); -int** bin_list (Constraint_list *CL,int field, int Threshold); -void save_full_list (Constraint_list *CL, char*fname); -FILE * output_list ( Constraint_list *CL, FILE *fp); -FILE * output_pair (Constraint_list *CL,int p, FILE *fp); -NT_node ** seq2cw_tree ( Sequence *S, char *file); -NT_node ** make_nj_tree ( Alignment *A,int **distances,int gop, int gep, char **out_seq, char **out_seq_name, int out_nseq, char *tree_file, char *tree_mode); -NT_node ** make_upgma_tree ( Alignment *A,int **distances,int gop, int gep, char **out_seq, char **out_seq_name, int out_nseq, char *tree_file, char *tree_mode); - -NT_node ** int_dist2nj_tree (int **distances, char **out_seq_name, int out_nseq, char *tree_file); -NT_node ** float_dist2nj_tree (float **distances, char **out_seq_name, int out_nseq, char *tree_file); -NT_node ** dist2nj_tree (double **distances, char **out_seq_name, int out_nseq, char *tree_file); - -NT_node ** int_dist2upgma_tree (int **mat, Alignment *A, int nseq, char *fname); -NT_node upgma_merge (int **mat, NT_node *NL, int *used, int *n, int N); - -void nj_tree(char **tree_description, int nseq); -void fast_nj_tree(char **tree_description); -void slow_nj_tree(char **tree_description); - -void print_phylip_tree(char **tree_description, FILE *tree, int bootstrap); -void two_way_split(char **tree_description, FILE *tree, int start_row, int flag, int bootstrap); -void guide_tree(char *fname, double **saga_tmat, char **sag_seq_name, int saga_nseq); - - - -NT_node split2upgma_tree (Split **S, Alignment *A, int nseq, char *fname); -NT_node split_upgma_merge (Alignment *A, Split **S, NT_node *NL, int *used, int *n, int N); -float get_split_dist ( Alignment *A, NT_node L, NT_node R, Split **S) ; - -Alignment * upgma_tree_aln (Alignment*A, int nseq, Constraint_list *CL); -int ** dist_mat2best_split (int **mat, int nseq); -typedef struct Tmpname Tmpname; -struct Memcontrol - { - size_t size; - size_t size_element; - char check[3]; - struct Memcontrol *p; - struct Memcontrol *n; - }; -typedef struct Memcontrol Memcontrol; -void free_pair_wise();//Frees static memory in the pair_wise functions -/************************************************************************/ -/* */ -/* CONSTRAINT_LIST */ -/* */ -/* */ -/************************************************************************/ -Constraint_list *free_constraint_list4lib_computation (Constraint_list *CL); -Constraint_list *duplicate_constraint_list4lib_computation (Constraint_list *CL); -Constraint_list * declare_constraint_list_simple ( Sequence *S); -Constraint_list * declare_constraint_list ( Sequence *S, char *name, int *L, int ne,FILE *fp, int **M); -Constraint_list *cache_dp_value4constraint_list ( char mode[],Constraint_list *CL); -Constraint_list *duplicate_constraint_list_soft (Constraint_list *CL); -Constraint_list *duplicate_constraint_list (Constraint_list *CL); -Constraint_list *copy_constraint_list (Constraint_list *CL, int mode); -Sequence * free_constraint_list (Constraint_list *CL); -Constraint_list * free_constraint_list_full (Constraint_list *CL); -Distance_matrix * free_distance_matrix ( Distance_matrix *DM); -Distance_matrix * duplicate_distance_matrix ( Distance_matrix *DMin); -/************************************************************************/ -/* */ -/* Blast_param Functions */ -/* */ -/* */ -/************************************************************************/ -Blast_param * duplicate_blast_param ( Blast_param*B); -Blast_param * free_blast_param ( Blast_param*B); -/************************************************************************/ -/* */ -/* TC_param Functions */ -/* */ -/* */ -/************************************************************************/ -TC_param * duplicate_TC_param ( TC_param*B); -TC_param * free_TC_param ( TC_param*B); -/************************************************************************/ -/* */ -/* MOCA Functions */ -/* */ -/* */ -/************************************************************************/ -Moca * duplicate_moca ( Moca *m); -Moca * free_moca ( Moca *m); -/************************************************************************/ -/* */ -/* PDB Functions */ -/* */ -/* */ -/************************************************************************/ -Structure * declare_structure ( int n, char **array); -Structure * extend_structure ( Structure *S); -/************************************************************************/ -/* */ -/* Weights Functions */ -/* */ -/* */ -/************************************************************************/ -Weights* declare_weights ( int nseq); -Weights* duplicate_weights (Weights *W); -Weights* free_weights ( Weights* W); - -FILE* print_mem_usage (FILE *fp, char *comment); -void set_max_mem (int m); -int verify_memory (int s); -int my_assert ( void *p, int index); - -void * vmalloc ( size_t size); -void * vcalloc ( size_t nobj, size_t size); -void * vcalloc_nomemset ( size_t nobj, size_t size); -void * sub_vcalloc ( size_t nobj, size_t size, int MODE); - -void * vrealloc ( void *p, size_t size); -void vfree2 ( void **p); -void vfree ( void *p); -void * free_arrayN (void *p, int ndim); -void vfree_all (); -/*********************************************************************/ -/* */ -/* SIZES */ -/* */ -/* */ -/*********************************************************************/ -void write_size_short (int x, short *array, int offset); -void write_size_char (int x, char *array, int offset); -void write_size_int (int x, int *array, int offset); -void write_size_float (int x, float *array, int offset); -void write_size_double(int x, double *array, int offset); - -int read_size_short ( void *array, size_t size ); -int read_size_char ( void *array, size_t size ); -int read_size_int ( void *array, size_t size ); -int read_size_float ( void *array, size_t size ); -int read_size_double( void *array, size_t size ); -int read_array_size_new ( void *array); -int read_array_size ( void *array, size_t size ); -int read_array_new ( void *array); -int is_dynamic_memory ( void *array); - -/*********************************************************************/ -/* */ -/* REALLOCATION */ -/* */ -/* */ -/*********************************************************************/ -void **realloc_arrayN(int ndim,void **main_array,size_t size, ...); -void **realloc_arrayN2 ( int ndim, void ** p, int *A, size_t size); - - -void ** realloc_array (void **array,size_t size, int first, int second, int ext1, int ext2); -short ** realloc_short ( short **array, int first, int second, int ext1, int ext2); -char ** realloc_char ( char **array, int first, int second, int ext1, int ext2); -int ** realloc_int ( int **array, int first, int second, int ext1, int ext2); -float ** realloc_float ( float **array, int first, int second, int ext1, int ext2); -double ** realloc_double ( double **array, int first, int second, int ext1, int ext2); -Alignment ** realloc_aln_array ( Alignment **array, int ext1); -/*The new realloc is recommended*/ -short ** new_realloc_short ( short **array, int ext1, int ext2); -char ** new_realloc_char ( char **array, int ext1, int ext2); -int ** new_realloc_int ( int **array, int ext1, int ext2); -float ** new_realloc_float ( float **array, int ext1, int ext2); -double ** new_realloc_double ( double **array, int ext1, int ext2); - - -void * declare_arrayNnomemset (int ndim, size_t size, ...); -void *declare_arrayN2nomemset ( int ndim, int *A, size_t size); - -void * declare_arrayN (int ndim, size_t size, ...); -void *declare_arrayN2 ( int ndim, int *A, size_t size); - - -void ** declare_array (int first, int second, size_t size); -short ** declare_short ( int first, int second); -char ** declare_char ( int first, int second); -int ** declare_int ( int first, int second); -float ** declare_float ( int first, int second); -double ** declare_double ( int first, int second); - -void ** declare_array_nomemset (int first, int second, size_t size); -short ** declare_short_nomemset ( int first, int second); -char ** declare_char_nomemset ( int first, int second); -int ** declare_int_nomemset ( int first, int second); -float ** declare_float_nomemset ( int first, int second); -double ** declare_double_nomemset ( int first, int second); - - -Alignment ** declare_aln_array ( int first); - -short ** free_short ( short **array, int first); -int ** free_int ( int **array, int first); -char ** free_char ( char **array, int first); -double ** free_double ( double **array, int first); -float ** free_float ( float **array, int first); -Alignment ** free_aln_array ( Alignment **array); - -long aln_stack (Alignment *A, int mode); -Sequence *free_Alignment ( Alignment *A); -Sequence *free_aln ( Alignment *A); -Alignment *declare_Alignment ( Sequence *S); -Alignment *realloc_alignment ( Alignment *A, int new_len); -Alignment *realloc_alignment2 ( Alignment *A, int new_nseq, int new_len); - -Alignment *declare_aln ( Sequence *S); -Alignment *declare_aln2 (int nseq, int len); -Alignment *realloc_aln ( Alignment *A, int new_len); -Alignment *realloc_aln2 ( Alignment *A, int new_nseq, int new_len); -Alignment *update_aln_random_tag (Alignment *A); - -Alignment *copy_aln ( Alignment *A, Alignment *B); -Alignment* extract_sub_aln2 ( Alignment *A, int nseq, char **list); -Alignment* extract_sub_aln ( Alignment *A, int nseq, int *list); -Alignment* shrink_aln ( Alignment *A, int nseq, int *list); - -Profile *copy_profile (Profile *P1); -Profile *declare_profile(char *alphabet, int len); -Profile * free_profile ( Profile *P); - -Sequence * declare_sequence ( int min, int max, int nseq); -Sequence * realloc_sequence (Sequence *OUT, int new_nseq, int max_len); -Sequence * duplicate_sequence (Sequence *S ); -Sequence * add_sequence ( Sequence *IN, Sequence *OUT, int i); -void free_sequence ( Sequence *LS, int nseq); - - - -Fname *declare_fname (); -Fname *free_fname ( Fname *F); -/*********************************************************************************************/ -/* */ -/* STRUCTURES FOR HSEARCH */ -/* */ -/*********************************************************************************************/ -#define FIND 0 -#define ADD 1 -#define REMOVE 2 -#define DECLARE 3 -#define MARK 4 -#define UNMARK 5 -#define FREE 6 -#define FREE_STACK 7 -#define FREE_ALL 8 -#define FREE_MARK 9 -#define INFO 10 - -struct HaschT -{ - int ne; - struct Hasch_entry **p; -}; -typedef struct HaschT HaschT; - -struct Hasch_entry -{ - struct Hasch_entry *n; - struct Hasch_entry *p; - int k; - struct Hasch_data *data; - struct Hasch_data * (*free_data)(struct Hasch_data *); - struct Hasch_data * (*declare_data)(struct Hasch_entry*); - int tag; -}; -typedef struct Hasch_entry Hasch_entry; -struct Char_node -{ - struct Char_node **c; - int key; - -}; -typedef struct Char_node Char_node; - -HaschT * hcreate ( int n_elements,struct Hasch_data * declare_data(struct Hasch_entry *), struct Hasch_data *free_data(struct Hasch_data *) ); -HaschT *hdestroy (HaschT *T,struct Hasch_data * declare_data(struct Hasch_entry *), struct Hasch_data *free_data(struct Hasch_data *) ); -Hasch_entry* hsearch (HaschT *T, int k, int action, struct Hasch_data * declare_data(struct Hasch_entry *), struct Hasch_data *free_data(struct Hasch_data *) ); -Hasch_entry * extract_hasch_entry_from_list (Hasch_entry *e, struct Hasch_data * declare_data(struct Hasch_entry *), struct Hasch_data *free_data(struct Hasch_data *) ); -Hasch_entry * insert_hasch_entry_in_list (Hasch_entry *p, Hasch_entry *e, Hasch_entry *n, struct Hasch_data * declare_data(struct Hasch_entry *), struct Hasch_data *free_data(struct Hasch_data *) ); -Hasch_entry * allocate_hasch_entry (Hasch_entry *e, int action,struct Hasch_data * declare_data(struct Hasch_entry *), struct Hasch_data *free_data(struct Hasch_data *) ); - - - - - -int string2key (char *s, Char_node *n); -Char_node * declare_char_node (int action); -char * process_repeat (char *aln, char *seq, char *pdb); -char * normalize_pdb_file (char *name, char *seq,char *out_file); -Ca_trace * trim_ca_trace (Ca_trace *st, char *seq ); - -Ca_trace * read_ca_trace (char *file, char *seq_field ); -Ca_trace * simple_read_ca_trace (char *file ); -Ca_trace * hasch_ca_trace ( Ca_trace *T); -Ca_trace * hasch_ca_trace_nb ( Ca_trace *T); -Ca_trace * hasch_ca_trace_bubble ( Ca_trace *T); -Ca_trace * hasch_ca_trace_transversal ( Ca_trace *TRACE); - -float get_atomic_distance ( Atom *A, Atom*B); -float ** measure_ca_distances(Ca_trace *T); - -float** print_contacts ( char *file1, char *file2, float T); -char * map_contacts ( char *file1, char *file2, float T); -int * identify_contacts (Ca_trace *ST1,Ca_trace *ST2, float T); -Sequence *seq2contacts ( Sequence *S, float T); -char *string2contacts (char *seq,char *name,char *comment, float T); -char **struc2nb (char *name,char *seq, char *comment, float Threshold, char *atom_list, char *output); -char **struclist2nb (char *name,char *seq, char *comment, float Threshold, char *atom_list, char *output); - -typedef struct -{ Alignment *A; - Alignment *B; - Alignment *sim_A; - Sequence *S; - Structure *ST; -/*PARAMETERS*/ - char ***grep_list; - int n_greps; - - char *sim_aln; - char *alignment1_file; - char *alignment2_file; - - char *io_format; - - int n_structure; - char **struct_file; - char **struct_format; - int *n_symbol; - char ***symbol_list; - -/*LIST VARIABLES*/ - int **code_A; - int **code_B; - int n_elementsA; - int n_elementsB; - - int **end_index; - int **start_index; -/*RESULTS_VARIABLES*/ - int **tot_count; - int **pos_count; - int ***pw_tot_count; - int ***pw_pos_count; - int *glob; - int **pw_glob; -/*IO VARIABLES*/ - int n_categories; - char ***category; - char *category_list; - int *n_sub_categories; - char sep_l; - char sep_r; -/*Sims VARIABLES*/ - float **sim; - float **sim_param; - char *sim_matrix; - - int sim_n_categories; - char ***sim_category; - char *sim_category_list; - int *sim_n_sub_categories; -}Result; - - -#define MAX_N_CATEGORIES 100 -#define MAX_N_STRUC 100 - - - - -int aln_compare (int argc, char *argv[]); -int **analyse_distance ( Alignment *A, int **dis); - -Structure * read_structure (char *fname, char *format, Alignment *A,Alignment *B, Structure *ST, int n_symbols, char **symbol_table); - - -int is_in_struct_category ( int s1, int s2, int r1, int r2, Structure *ST, char **cat, int n_sub_cat); -char * get_structure_residue (int s, int r, Structure *S); -int parse_category_list ( char *category_list, char ***category, int *sub_n_categories); -int struc_matches_pattern ( char *struc, char *pattern); -float **get_aln_compare_sim ( Alignment *A, Structure *S, char **cat, int n_cat, char *matrix); -float **analyse_sim ( Alignment *A, float **dis); - -/*Output*/ -FILE *output_format (char *iof, FILE *fp, Result *R); -FILE *output_pair_wise_sequence_results (FILE *fp, Result *R); -FILE *output_sequence_results (FILE *fp, Result *R); -FILE *output_total_results (FILE *fp, Result *R); -FILE *output_header (FILE *fp, Result *R); -FILE *output_large_header ( FILE *fp, Result *R); - -/*Parameter Checking*/ -int is_a_struc_format (char *format); -void get_separating_char ( char s, char *l, char *r); -void output_informations (); - -int check_configuration4program(); -typedef struct - { - Alignment *A; - Weights *W; - Sequence *S; - int **M; - Structure *RNA_ST; - NT_node T; - Constraint_list *CL; - char format[100]; - char file[100]; - int rm_gap; - -}Sequence_data_struc; - -typedef struct - { - char **symbol_list; - int n_symbol; - char *coor_file; - int rm_gap; - int keep_case; - int keep_name; - int use_consensus; -}Action_data_struc; - -/*Control of alignment sizes*/ -int set_landscape_msa (int len); -int get_msa_line_length (int line, int aln_len); - -int seq_reformat (int argc, char **argv); - -Sequence_data_struc *read_data_structure ( char *in_format, char *in_file,Action_data_struc *RAD); -Alignment * main_read_aln ( char *name, Alignment *A); -Sequence * read_sequences ( char *name); -Sequence * read_alifold ( char *name); -Alignment *alifold2aln ( char *name); -Sequence * main_read_seq ( char *mname); -int output_format_aln ( char *format, Alignment *A, Alignment *EA,char *name); -int main_output ( Sequence_data_struc *D1, Sequence_data_struc *D2, Sequence_data_struc *DST, char *out_format, char *out_file); - -char * identify_seq_format ( char *file); -char * name2type_name ( char *name); -char identify_format (char **fname); -char **identify_list_format ( char **list, int n); - -int type_is_exon_boundaries(char **seq, int n); - -int format_is_oligo ( char *file); -int format_is_msf ( char *file); -int format_is_fasta( char *file); -int format_is_fasta_aln( char *file); -int format_is_fasta_seq( char *file); -int is_pir_name (char *name); -int format_is_pir ( char *file); -int format_is_pir_aln( char *file); -int format_is_pir_seq( char *file); -int pir_name (char *name); -int format_is_conc_aln (char *file); -int format_is_saga ( char *file); -int format_is_swissprot (char *name); - -int is_seq ( char *name); -int is_aln ( char *name); -int has_pdb (char *name); -int is_stockhom_aln ( char *name); -int is_blast_file (char *name); -int is_sap_file (char *name); -int is_pdb_file ( char *name); -int is_simple_pdb_file ( char *name); -char *fix_pdb_file (char *name); - -int is_pdb_name ( char *name); -char* get_pdb_id(char *name); -char* get_pdb_struc(char *name, int start, int end); -char* seq_is_pdb_struc ( Sequence *S, int i); -char* is_pdb_struc ( char *name); /*Returns NULL if not a PDB structure Or a the name of a file containing a PDB structure*/ -int is_matrix (char *name); - -int is_lib (char *name); -int is_lib_01 (char *name); -int is_lib_02 (char *name); -int is_lib_list ( char *name); -int is_single_seq_weight_file (char *fname); -int is_newick (char *name); - -int is_method ( char *file); - -char *format_name2aln_format_name (char *name); -int is_in_format_list ( char *name); -int is_out_format_list ( char *name); -int is_struc_in_format_list ( char *name); -int is_struc_out_format_list ( char *name); -/*******************************************************************************************/ -/* */ -/* */ -/* INPUT MISC */ -/* */ -/***************************************************************************************** */ - -char *** read_rename_file ( char *fname, int mode); -void get_barton_list_tc_seq ( char *in_file); -int process_barton_entry (char *buf, char *name); - -Structure *read_rna_struc_number ( Alignment *A, char *fname); -char ** read_lib_list (char *name, int *n); -/*******************************************************************************************/ -/* */ -/* */ -/* INPUT WEIGHTS */ -/* */ -/***************************************************************************************** */ -Weights* get_amps_sd_scores ( char *fname); -Weights *read_seq_weight (char **name, int nseq, char* seq_weight); -/*******************************************************************************************/ -/* */ -/* */ -/* INPUT SEQUENCES */ -/* */ -/***************************************************************************************** */ -char ***read_group ( char *file); -Sequence* get_pdb_sequence ( char *fname); -Sequence* get_struc_gor ( char *fname); -Sequence* get_dialign_sequence ( char *fname); -Sequence* get_pima_sequence ( char *fname); -Sequence* get_sequence_dali ( char *fname); -Sequence* get_pir_sequence ( char *fname, char *comment_name); -Sequence* perl_reformat2fasta ( char *perl_script, char *file); - -Sequence* get_fasta_tree ( char *fname, char *comment_name); -Sequence* get_fasta_sequence ( char *fname, char *comment_name); -Sequence* get_fasta_sequence_num ( char *fname, char *comment_name); -Sequence* get_fasta_sequence_raw ( char *fname, char *comment_name); -Sequence *get_file_list ( char *fname); -Sequence *get_tree_file_list ( char *fname); - -Sequence* get_gor_sequence ( char *fname, char *comment_name); -Sequence* get_swissprot_sequence ( char *fname, char *comment_name); -int fscanf_seq_name ( FILE *fp, char *sname); - -void read_check ( Alignment *A, char *check_file); -void read_stockholm_aln ( char *fname, Alignment *A); -void read_aln ( char *fname, Alignment *A); -void read_number_aln ( char *fname, Alignment *A); -Alignment *read_blast_aln ( char *fname, Alignment *A); -void read_msf_aln ( char *fname, Alignment *A); -void read_amps_aln ( char *in_file, Alignment *A); -int get_amps_seq_name ( char **name, char* fname); -Alignment *read_gotoh_aln ( char *fname, Alignment *A); - -void undump_msa ( Alignment *A, char *tmp); -void dump_msa ( char *file,Alignment *A, int nseq, int *lseq); -/*******************************************************************************************/ -/* */ -/* */ -/* OUTPUT MATRICES */ -/* */ -/***************************************************************************************** */ -int output_freq_mat ( char *outfile, Alignment *A); -/*******************************************************************************************/ -/* */ -/* */ -/* OUTPUT P-Values */ -/* */ -/***************************************************************************************** */ -float output_maln_pval ( char *outfile, Alignment *A); -/*******************************************************************************************/ -/* */ -/* */ -/* OUTPUT WEIGHTS */ -/* */ -/***************************************************************************************** */ -void output_similarities (char *file, Alignment *A, char *mode); -void output_similarities_pw (char *file, Alignment *A, Alignment *B, char *mode); -Alignment * similarities_file2aln ( char *file); -int** input_similarities (char *file, Alignment *A, char *mode); - -void output_statistics (char *file, Alignment *A, char *mode); -void output_pw_weights4saga ( Weights *W, float **w_list, char *wfile); -int output_seq_weights ( Weights *W, char *wfile); -FILE * display_weights (Weights *W, FILE *fp); -/*******************************************************************************************/ -/* */ -/* */ -/* OUTPUT SEQ */ -/* */ -/***************************************************************************************** */ -char** clean_seq_names (char **names, int n, int mode); -char *clean_seq_name (char *name, int mode); - - -void output_pir_seq1 (char *fname, Alignment*A ); -void output_pir_seq (char *fname, Alignment*A ); -void output_gor_seq (char *fname, Alignment*A ); -void output_mult_fasta_seq (char *fname, Alignment*A, int n ); - -void main_output_fasta_seq ( char *fname, Alignment *A, int header); -void output_fasta_tree ( char *fname, Alignment *A); - -void output_fasta_seq1 (char *fname, Alignment*A ); -char *output_fasta_seqX (char *name, char *mode, Sequence *S, Alignment *A, int i); - -void output_pir_check (char *fname,int nseq, char **A ); -void output_fasta_seq (char *fname, Alignment*A ); -void output_gotoh_seq (char *fname, Alignment*A ); -void output_est_prf (char *fname, Alignment *A); -void output_gor_seq (char *fname, Alignment*A ); -/*******************************************************************************************/ -/* */ -/* */ -/* OUTPUT ALN */ -/* */ -/***************************************************************************************** */ -void output_pir_aln ( char *fname,Alignment*A); -void output_model_aln ( char *fname,Alignment*A ); -char * output_fasta_sub_aln (char *fname, Alignment*A, int ns, int *ls ); -char * output_fasta_sub_aln2 (char *fname, Alignment*A, int *ns, int **ls ); - -void ouput_suchard_aln ( char *fname,Alignment*A); -void output_fasta_aln ( char *fname,Alignment*A); -void output_msf_aln ( char *fname,Alignment*B); -FILE * output_generic_interleaved_aln (FILE *fp, Alignment *B, int line, char gap, char *mode); -void output_stockholm_aln (char *file, Alignment *A, Alignment *ST); -void output_clustal_aln( char *name, Alignment*B); -void output_strict_clustal_aln( char *name, Alignment*B); -void output_generic_clustal_aln( char *name, Alignment*B, char *format); -void output_saga_aln ( char *name, Alignment*B); -void output_phylip_aln ( char *name, Alignment*B); -void output_mocca_aln ( char *name, Alignment*B,Alignment*S); -void output_rnalign (char *out_file, Alignment*A,Sequence *STRUC); -void output_pw_lib_saga_aln (char *lib_name, Alignment *A ); -void output_lib (char *lib_name, Alignment *A ); -void output_compact_aln( char *name, Alignment *B); - -void print_sub_aln ( Alignment *B, int *ns, int **ls); -void print_aln ( Alignment *B); -FILE * output_aln( Alignment *B, FILE *fp); - - -FILE * output_aln_score ( Alignment *B, FILE *fp); -FILE * output_aln_with_res_number ( Alignment *B, FILE *fp); - - -FILE* output_Alignment ( Alignment *B, FILE *fp); -FILE* output_Alignment_without_header ( Alignment *B, FILE *fp); -FILE * output_Alignment_score ( Alignment *B, FILE *fp); -FILE * output_Alignment_with_res_number ( Alignment *B, FILE *fp); -void output_constraints ( char *fname, char *mode, Alignment *A); - -Alignment *input_conc_aln ( char *name, Alignment *A); -void output_conc_aln ( char *name, Alignment *B); -void output_glalign ( char *name, Alignment *B, Alignment *S); -void output_lalign_header( char *name, Alignment *B); -void output_lalign ( char *name, Alignment *B); -void output_lalign_aln ( char *name, Alignment *B); - -/**************************************************************************************************/ -/* */ -/* */ -/* INPUT/OUTPUT MATRICES */ -/* */ -/**************************************************************************************************/ -int is_blast_matrix (char *fname); -int is_pavie_matrix (char *fname); -int is_clustalw_matrix (char *fname); - -int is_distance_matrix_file (char *name); -int is_similarity_matrix_file (char *name); - -void aln2mat (Sequence *S); -void aln2mat_diaa (Sequence *S); -int **seq2latmat ( Sequence *S, char *fname); -int output_mat (int **mat, char *fname, char *alp, int offset); -int ** read_blast_matrix ( char *mat_name); -int output_blast_mat (int **mat, char *fname); -double* mat2cmp (int **mat1, int **mat2); - -void output_pavie_mat (int **mat, char *fname, double gep, char *alp); -int ** read_pavie_matrix ( char *mat_name); - -/****************************************************************************************************/ -/*************************** *************************************/ -/*************************** PROCESSING *************************************/ -/*************************** *************************************/ -/*******************************************************************************************/ -/* */ -/* */ -/* THREADING */ -/***************************************************************************************** */ - - - - -Structure * declare_rna_structure_num (Sequence *SA); - -char *thread_aa_seq_on_dna_seq( char *s); -void thread_seq_struc2aln ( Alignment *A, Sequence *ST); -Alignment *thread_dnaseq_on_prot_aln (Sequence *S, Alignment *A); -void cache_id ( Alignment *A); - - - -int process_est_sequence ( Sequence *S, int *cluster_list); -char * invert_seq ( char *seq); -int get_best_match ( char *seq1, char *seq2); -int** extract_m_diag_streches ( int ** m, int l1, int l2,char *seq1, char *seq2, int *n_mdiag); -int is_strech ( char *AA, char *seq1, char *seq2, int len, int x, int y); - -int search_for_cluster ( int seq, int cluster_number, int *cluster_list, int T, int nseq, int **S); -int * SHC ( int nseq, int **NST, int **ST); -int mutate_sol (int *sol, int nseq); -int evaluate_sol ( int*sol, int nseq, int **ST, int **NST); - - - -char **make_symbols ( char *name, int *n); -Alignment *code_dna_aln (Alignment *A); -char* back_translate_dna_codon ( char aa, int deterministic); -int translate_dna_codon ( char *seq, char stop); -char* mutate_amino_acid ( char aa, char *mode); -Alignment * mutate_aln ( Alignment *A, char *r); - - -Sequence * transform_sequence ( Sequence *S, char *mode); -Alignment *translate_splice_dna_aln (Alignment *A,Alignment *ST ); -Alignment * mutate_cdna_aln ( Alignment *A); - -char * translate_dna_seq_on3frame ( char *dna_seq, char stop, char *prot); -char * translate_dna_seq ( char *dna_seq, int frame, char stop, char *prot); - -char * back_translate_dna_seq ( char *in_seq,char *out_seq, int mode); -Alignment *back_translate_dna_aln (Alignment *A); -Alignment *translate_dna_aln (Alignment *A, int frame); -Alignment *clean_gdna_aln (Alignment *A); -Alignment *clean_cdna_aln (Alignment *A); -Alignment *clean_est (Alignment *A); -/**************************************************************************************************/ -/******************************** ********************************************/ -/******************************** PROCESSING ********************************************/ -/*************** **************** ********************************************/ -void modify_data (Sequence_data_struc *D1, Sequence_data_struc *D2, Sequence_data_struc *DST, char **action_list,int n_actions, Action_data_struc *RAD); - -// -// Name MAnipulation -// - -Alignment *clean_aln (Alignment *A); -Sequence *clean_sequence ( Sequence *S); -char ** translate_names (int n, char **name); -char * translate_name ( char *name); -char *decode_name (char *name, int mode); -FILE * display_sequences_names (Sequence *S, FILE *fp, int check_pdb_status, int print_templates); -Sequence *add_file2file_list (char *name, Sequence *S); -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/t_coffee_source/util_make_tree.c b/binaries/src/tcoffee/t_coffee_source/util_make_tree.c deleted file mode 100644 index 0ace7b8..0000000 --- a/binaries/src/tcoffee/t_coffee_source/util_make_tree.c +++ /dev/null @@ -1,1472 +0,0 @@ -#include -#include -#include -#include - -#include "io_lib_header.h" -#include "util_lib_header.h" -#include "dp_lib_header.h" -#include "define_header.h" - -static int first_seq, last_seq; - -static int nseqs; -static char **names; /* the seq. names */ - -static double **tmat; -static double *av; -static double *left_branch, *right_branch; -static int *tkill; - - -/*! - * \file util_make_tree.c - * \brief Source code tree algorithms - */ - -NT_node ** seq2cw_tree ( Sequence *S, char *tree) -{ - Alignment *A; - char *aln,command[1000]; - int tot_node; - - - - A=seq2clustalw_aln (S); - aln=vtmpnam (NULL); if (!tree)tree=vtmpnam (NULL); - - output_fasta_aln (aln, A); - - sprintf ( command, "clustalw -infile=%s -newtree=%s -tree %s", aln, tree, TO_NULL_DEVICE); - my_system (command); - return read_tree (tree, &tot_node, S->nseq, S->name); -} - -NT_node ** make_upgma_tree ( Alignment *A,int **distances,int gop, int gep, char **out_seq, char **out_seq_name, int out_nseq, char *tree_file, char *tree_mode) - { - - if ( distances==NULL && A==NULL) - { - fprintf ( stderr, "\nError: make_nj_tree, must provide an alignment or a distance matrix [FATAL:%s]", PROGRAM); - myexit (EXIT_FAILURE); - } - else if ( distances==NULL) - { - - distances=get_dist_aln_array (A, "idmat"); - } - return int_dist2upgma_tree (distances,A, A->nseq,tree_file); - } -NT_node ** make_nj_tree ( Alignment *A,int **distances,int gop, int gep, char **out_seq, char **out_seq_name, int out_nseq, char *tree_file, char *tree_mode) - { - - if ( distances==NULL && A==NULL) - { - fprintf ( stderr, "\nError: make_nj_tree, must provide an alignment or a distance matrix [FATAL:%s]", PROGRAM); - myexit (EXIT_FAILURE); - } - else if ( distances==NULL) - { - distances=get_dist_aln_array (A, "idmat"); - } - return int_dist2nj_tree (distances,out_seq_name, out_nseq,tree_file); - } - -NT_node ** int_dist2nj_tree (int **distances, char **out_seq_name, int out_nseq, char *tree_file) - { - int a, b; - double **d; - NT_node **T; - - d=declare_double( out_nseq, out_nseq); - for ( a=0; a< out_nseq; a++) - for ( b=0; b< out_nseq; b++) - d[a][b]=distances[a][b]; - - T=dist2nj_tree ( d, out_seq_name, out_nseq, tree_file); - - free_double (d, -1); - return T; - } -NT_node ** float_dist2nj_tree (float **distances, char **out_seq_name, int out_nseq, char *tree_file) - { - int a, b; - double **d; - NT_node **T; - - d=declare_double( out_nseq, out_nseq); - for ( a=0; a< out_nseq; a++) - for ( b=0; b< out_nseq; b++) - d[a][b]=distances[a][b]; - T=dist2nj_tree ( d, out_seq_name, out_nseq, tree_file); - free_double (d, -1); - return T; - } -NT_node ** dist2nj_tree (double **distances, char **out_seq_name, int out_nseq, char *tree_file) - { - int a, b; - double **d_dist; - int tot_node=0; - NT_node **T; - - if ( !tree_file)tree_file=vtmpnam(NULL); - d_dist=declare_double( out_nseq+1, out_nseq+1); - - for ( a=0; aMY_EPS) - tmin = total; - mini = ii; - minj = jj; - } - } - - -/*.................compute branch lengths and print the results ........*/ - - - dio = djo = 0.0; - for(i=1; i<=nseqs; ++i) { - dio = dio + tmat[i][mini]; - djo = djo + tmat[i][minj]; - } - - dmin = tmat[mini][minj]; - dio = (dio - dmin) / fnseqs2; - djo = (djo - dmin) / fnseqs2; - bi = (dmin + dio - djo) * 0.5; - bj = dmin - bi; - bi = bi - av[mini]; - bj = bj - av[minj]; - - if( av[mini] > 0.0 ) - typei = 0; - else - typei = 1; - if( av[minj] > 0.0 ) - typej = 0; - else - typej = 1; - - - -/* - set negative branch lengths to zero. Also set any tiny positive - branch lengths to zero. -*/ - if( fabs(bi) < 0.0001) bi = 0.0; - if( fabs(bj) < 0.0001) bj = 0.0; - - - - - left_branch[nc] = bi; - right_branch[nc] = bj; - - for(i=1; i<=nseqs; i++) - tree_description[nc][i] = 0; - - if(typei == 0) { - for(i=nc-1; i>=1; i--) - if(tree_description[i][mini] == 1) { - for(j=1; j<=nseqs; j++) - if(tree_description[i][j] == 1) - tree_description[nc][j] = 1; - break; - } - } - else - tree_description[nc][mini] = 1; - - if(typej == 0) { - for(i=nc-1; i>=1; i--) - if(tree_description[i][minj] == 1) { - for(j=1; j<=nseqs; j++) - if(tree_description[i][j] == 1) - tree_description[nc][j] = 1; - break; - } - } - else - tree_description[nc][minj] = 1; - - -/* - Here is where the -0.00005 branch lengths come from for 3 or more - identical seqs. -*/ -/* if(dmin <= 0.0) dmin = 0.0001; */ - if(dmin <= 0.0) dmin = 0.000001; - av[mini] = dmin * 0.5; - - /*........................Re-initialisation................................*/ - - fnseqs = fnseqs - 1.0; - tkill[minj] = 1; - - for(j=1; j<=nseqs; ++j) - if( tkill[j] != 1 ) { - da = ( tmat[mini][j] + tmat[minj][j] ) * 0.5; - if( (mini - j) < 0 ) - tmat[mini][j] = da; - if( (mini - j) > 0) - tmat[j][mini] = da; - } - - for(j=1; j<=nseqs; ++j) - tmat[minj][j] = tmat[j][minj] = 0.0; - - - - } - /*end main cycle**/ - -/******************************Last Cycle (3 Seqs. left)********************/ - - nude = 1; - - - for(i=1; i<=nseqs; ++i) - if( tkill[i] != 1 ) { - l[nude] = i; - nude = nude + 1; - } - - b1 = (tmat[l[1]][l[2]] + tmat[l[1]][l[3]] - tmat[l[2]][l[3]]) * 0.5; - b2 = tmat[l[1]][l[2]] - b1; - b3 = tmat[l[1]][l[3]] - b1; - - branch[1] = b1 - av[l[1]]; - branch[2] = b2 - av[l[2]]; - branch[3] = b3 - av[l[3]]; - -/* Reset tiny negative and positive branch lengths to zero */ - if( fabs(branch[1]) < 0.0001) branch[1] = 0.0; - if( fabs(branch[2]) < 0.0001) branch[2] = 0.0; - if( fabs(branch[3]) < 0.0001) branch[3] = 0.0; - - left_branch[nseqs-2] = branch[1]; - left_branch[nseqs-1] = branch[2]; - left_branch[nseqs] = branch[3]; - - for(i=1; i<=nseqs; i++) - tree_description[nseqs-2][i] = 0; - - - - for(i=1; i<=3; ++i) { - if( av[l[i]] > 0.0) { - - for(k=nseqs-3; k>=1; k--) - if(tree_description[k][l[i]] == 1) { - for(j=1; j<=nseqs; j++) - if(tree_description[k][j] == 1) - tree_description[nseqs-2][j] = i; - break; - } - } - else { - - tree_description[nseqs-2][l[i]] = i; - } - if(i < 3) { - } - } -} - -void print_phylip_tree(char **tree_description, FILE *tree, int bootstrap) -{ - - fprintf(tree,"(\n"); - - two_way_split(tree_description, tree, nseqs-2,1,bootstrap); - fprintf(tree,":%7.5f,\n",left_branch[nseqs-2]); - two_way_split(tree_description, tree, nseqs-2,2,bootstrap); - fprintf(tree,":%7.5f,\n",left_branch[nseqs-1]); - two_way_split(tree_description, tree, nseqs-2,3,bootstrap); - - fprintf(tree,":%7.5f)",left_branch[nseqs]); - if (bootstrap) fprintf(tree,"TRICHOTOMY"); - fprintf(tree,";\n"); -} - - -void two_way_split -(char **tree_description, FILE *tree, int start_row, int flag, int bootstrap) -{ - int row, new_row, col, test_col=0; - int single_seq; - - if(start_row != nseqs-2) fprintf(tree,"(\n"); - - for(col=1; col<=nseqs; col++) { - if(tree_description[start_row][col] == flag) { - test_col = col; - break; - } - } - - single_seq = TRUE; - for(row=start_row-1; row>=1; row--) - if(tree_description[row][test_col] == 1) { - single_seq = FALSE; - new_row = row; - break; - } - - if(single_seq) { - tree_description[start_row][test_col] = 0; - fprintf(tree,"%s",names[test_col+0-1]); - } - else { - for(col=1; col<=nseqs; col++) { - if((tree_description[start_row][col]==1)&& - (tree_description[new_row][col]==1)) - tree_description[start_row][col] = 0; - } - two_way_split(tree_description, tree, new_row, (int)1, bootstrap); - } - - if(start_row == nseqs-2) { -/* if (bootstrap && (flag==1)) fprintf(tree,"[TRICHOTOMY]"); -*/ - return; - } - - fprintf(tree,":%7.5f,\n",left_branch[start_row]); - - for(col=1; col<=nseqs; col++) - if(tree_description[start_row][col] == flag) { - test_col = col; - break; - } - - single_seq = TRUE; - for(row=start_row-1; row>=1; row--) - if(tree_description[row][test_col] == 1) { - single_seq = FALSE; - new_row = row; - break; - } - - if(single_seq) { - tree_description[start_row][test_col] = 0; - fprintf(tree,"%s",names[test_col+0-1]); - } - else { - for(col=1; col<=nseqs; col++) { - if((tree_description[start_row][col]==1)&& - (tree_description[new_row][col]==1)) - tree_description[start_row][col] = 0; - } - two_way_split(tree_description, tree, new_row, (int)1, bootstrap); - } - - fprintf(tree,":%7.5f)\n",right_branch[start_row]); - - -} - - - - -/**************************************************************************** - * [ Improvement ideas in fast_nj_tree() ] by DDBJ & FUJITSU Limited. - * written by Tadashi Koike - * (takoike@genes.nig.ac.jp) - ******************* - * : Store the value of sum of the score to temporary array, - * and use again and again. - * - * In the main cycle, these are calculated again and again : - * diq = sum of tmat[n][ii] (n:1 to last_seq-first_seq+1), - * djq = sum of tmat[n][jj] (n:1 to last_seq-first_seq+1), - * dio = sum of tmat[n][mini] (n:1 to last_seq-first_seq+1), - * djq = sum of tmat[n][minj] (n:1 to last_seq-first_seq+1) - * // 'last_seq' and 'first_seq' are both constant values // - * and the result of above calculations is always same until - * a best pair of neighbour nodes is joined. - * - * So, we change the logic to calculate the sum[i] (=sum of tmat[n][i] - * (n:1 to last_seq-first_seq+1)) and store it to array, before - * beginning to find a best pair of neighbour nodes, and after that - * we use them again and again. - * - * tmat[i][j] - * 1 2 3 4 5 - * +---+---+---+---+---+ - * 1 | | | | | | - * +---+---+---+---+---+ - * 2 | | | | | | 1) calculate sum of tmat[n][i] - * +---+---+---+---+---+ (n: 1 to last_seq-first_seq+1) - * 3 | | | | | | 2) store that sum value to sum[i] - * +---+---+---+---+---+ - * 4 | | | | | | 3) use sum[i] during finding a best - * +---+---+---+---+---+ pair of neibour nodes. - * 5 | | | | | | - * +---+---+---+---+---+ - * | | | | | - * V V V V V Calculate sum , and store it to sum[i] - * +---+---+---+---+---+ - * sum[i] | | | | | | - * +---+---+---+---+---+ - * - * At this time, we thought that we use upper triangle of the matrix - * because tmat[i][j] is equal to tmat[j][i] and tmat[i][i] is equal - * to zero. Therefore, we prepared sum_rows[i] and sum_cols[i] instead - * of sum[i] for storing the sum value. - * - * tmat[i][j] - * 1 2 3 4 5 sum_cols[i] - * +---+---+---+---+---+ +---+ - * 1 | # | # | # | # | --> | | ... sum of tmat[1][2..5] - * + - +---+---+---+---+ +---+ - * 2 | # | # | # | --> | | ... sum of tmat[2][3..5] - * + - + - +---+---+---+ +---+ - * 3 | # | # | --> | | ... sum of tmat[3][4..5] - * + - + - + - +---+---+ +---+ - * 4 | # | --> | | ... sum of tmat[4][5] - * + - + - + - + - +---+ +---+ - * 5 | --> | | ... zero - * + - + - + - + - + - + +---+ - * | | | | | - * V V V V V Calculate sum , sotre to sum[i] - * +---+---+---+---+---+ - * sum_rows[i] | | | | | | - * +---+---+---+---+---+ - * | | | | | - * | | | | +----- sum of tmat[1..4][5] - * | | | +--------- sum of tmat[1..3][4] - * | | +------------- sum of tmat[1..2][3] - * | +----------------- sum of tmat[1][2] - * +--------------------- zero - * - * And we use (sum_rows[i] + sum_cols[i]) instead of sum[i]. - * - ******************* - * : We manage valid nodes with chain list, instead of - * tkill[i] flag array. - * - * In original logic, invalid(killed?) nodes after nodes-joining - * are managed with tkill[i] flag array (set to 1 when killed). - * By this method, it is conspicuous to try next node but skip it - * at the latter of finding a best pair of neighbor nodes. - * - * So, we thought that we managed valid nodes by using a chain list - * as below: - * - * 1) declare the list structure. - * struct { - * int n; // entry number of node. - * void *prev; // pointer to previous entry. - * void *next; // pointer to next entry. - * } - * 2) construct a valid node list. - * - * +-----+ +-----+ +-----+ +-----+ +-----+ - * NULL<-|prev |<---|prev |<---|prev |<---|prev |<- - - -|prev | - * | 0 | | 1 | | 2 | | 3 | | n | - * | next|--->| next|--->| next|--->| next|- - - ->| next|->NULL - * +-----+ +-----+ +-----+ +-----+ +-----+ - * - * 3) when finding a best pair of neighbor nodes, we use - * this chain list as loop counter. - * - * 4) If an entry was killed by node-joining, this chain list is - * modified to remove that entry. - * - * EX) remove the entry No 2. - * +-----+ +-----+ +-----+ +-----+ - * NULL<-|prev |<---|prev |<--------------|prev |<- - - -|prev | - * | 0 | | 1 | | 3 | | n | - * | next|--->| next|-------------->| next|- - - ->| next|->NULL - * +-----+ +-----+ +-----+ +-----+ - * +-----+ - * NULL<-|prev | - * | 2 | - * | next|->NULL - * +-----+ - * - * By this method, speed is up at the latter of finding a best pair of - * neighbor nodes. - * - ******************* - * : Cut the frequency of division. - * - * At comparison between 'total' and 'tmin' in the main cycle, total is - * divided by (2.0*fnseqs2) before comparison. If N nodes are available, - * that division happen (N*(N-1))/2 order. - * - * We thought that the comparison relation between tmin and total/(2.0*fnseqs2) - * is equal to the comparison relation between (tmin*2.0*fnseqs2) and total. - * Calculation of (tmin*2.0*fnseqs2) is only one time. so we stop dividing - * a total value and multiply tmin and (tmin*2.0*fnseqs2) instead. - * - ******************* - * : some transformation of the equation (to cut operations). - * - * We transform an equation of calculating 'total' in the main cycle. - * - */ - - -void fast_nj_tree(char **tree_description) -{ - register int i; - int l[4],nude,k; - int nc,mini,minj,j,ii,jj; - double fnseqs,fnseqs2=0,sumd; - double diq,djq,dij,dio,djo,da; - double tmin,total,dmin; - double bi,bj,b1,b2,b3,branch[4]; - int typei,typej; /* 0 = node; 1 = OTU */ - - /* IMPROVEMENT 1, STEP 0 : declare variables */ - double *sum_cols, *sum_rows, *join; - - /* IMPROVEMENT 2, STEP 0 : declare variables */ - int loop_limit; - typedef struct _ValidNodeID { - int n; - struct _ValidNodeID *prev; - struct _ValidNodeID *next; - } ValidNodeID; - ValidNodeID *tvalid, *lpi, *lpj, *lpii, *lpjj, *lp_prev, *lp_next; - - /* - * correspondence of the loop counter variables. - * i .. lpi->n, ii .. lpii->n - * j .. lpj->n, jj .. lpjj->n - */ - - fnseqs = (double)last_seq-first_seq+1; - -/*********************** First initialisation ***************************/ - - - if (fnseqs == 2) { - return; - } - - mini = minj = 0; - - left_branch = (double *) ckalloc( (nseqs+2) * sizeof (double) ); - right_branch = (double *) ckalloc( (nseqs+2) * sizeof (double) ); - tkill = (int *) ckalloc( (nseqs+1) * sizeof (int) ); - av = (double *) ckalloc( (nseqs+1) * sizeof (double) ); - - /* IMPROVEMENT 1, STEP 1 : Allocate memory */ - sum_cols = (double *) ckalloc( (nseqs+1) * sizeof (double) ); - sum_rows = (double *) ckalloc( (nseqs+1) * sizeof (double) ); - join = (double *) ckalloc( (nseqs+1) * sizeof (double) ); - - /* IMPROVEMENT 2, STEP 1 : Allocate memory */ - tvalid = (ValidNodeID *) ckalloc( (nseqs+1) * sizeof (ValidNodeID) ); - /* tvalid[0] is special entry in array. it points a header of valid entry list */ - tvalid[0].n = 0; - tvalid[0].prev = NULL; - tvalid[0].next = &tvalid[1]; - - /* IMPROVEMENT 2, STEP 2 : Construct and initialize the entry chain list */ - for(i=1, loop_limit = last_seq-first_seq+1, - lpi=&tvalid[1], lp_prev=&tvalid[0], lp_next=&tvalid[2] ; - i<=loop_limit ; - ++i, ++lpi, ++lp_prev, ++lp_next) - { - tmat[i][i] = av[i] = 0.0; - tkill[i] = 0; - lpi->n = i; - lpi->prev = lp_prev; - lpi->next = lp_next; - - /* IMPROVEMENT 1, STEP 2 : Initialize arrays */ - sum_cols[i] = sum_rows[i] = join[i] = 0.0; - } - tvalid[loop_limit].next = NULL; - - /* - * IMPROVEMENT 1, STEP 3 : Calculate the sum of score value that - * is sequence[i] to others. - */ - sumd = 0.0; - for (lpj=tvalid[0].next ; lpj!=NULL ; lpj = lpj->next) { - double tmp_sum = 0.0; - j = lpj->n; - /* calculate sum_rows[j] */ - for (lpi=tvalid[0].next ; lpi->n < j ; lpi = lpi->next) { - i = lpi->n; - tmp_sum += tmat[i][j]; - /* tmat[j][i] = tmat[i][j]; */ - } - sum_rows[j] = tmp_sum; - - tmp_sum = 0.0; - /* Set lpi to that lpi->n is greater than j */ - if ((lpi != NULL) && (lpi->n == j)) { - lpi = lpi->next; - } - /* calculate sum_cols[j] */ - for( ; lpi!=NULL ; lpi = lpi->next) { - i = lpi->n; - tmp_sum += tmat[j][i]; - /* tmat[i][j] = tmat[j][i]; */ - } - sum_cols[j] = tmp_sum; - } - -/*********************** Enter The Main Cycle ***************************/ - - for(nc=1, loop_limit = (last_seq-first_seq+1-3); nc<=loop_limit; ++nc) { - - sumd = 0.0; - /* IMPROVEMENT 1, STEP 4 : use sum value */ - for(lpj=tvalid[0].next ; lpj!=NULL ; lpj = lpj->next) { - sumd += sum_cols[lpj->n]; - } - - /* IMPROVEMENT 3, STEP 0 : multiply tmin and 2*fnseqs2 */ - fnseqs2 = fnseqs - 2.0; /* Set fnseqs2 at this point. */ - tmin = 99999.0 * 2.0 * fnseqs2; - - -/*.................compute SMATij values and find the smallest one ........*/ - - mini = minj = 0; - - /* jj must starts at least 2 */ - if ((tvalid[0].next != NULL) && (tvalid[0].next->n == 1)) { - lpjj = tvalid[0].next->next; - } else { - lpjj = tvalid[0].next; - } - - for( ; lpjj != NULL; lpjj = lpjj->next) { - jj = lpjj->n; - for(lpii=tvalid[0].next ; lpii->n < jj ; lpii = lpii->next) { - ii = lpii->n; - diq = djq = 0.0; - - /* IMPROVEMENT 1, STEP 4 : use sum value */ - diq = sum_cols[ii] + sum_rows[ii]; - djq = sum_cols[jj] + sum_rows[jj]; - /* - * always ii < jj in this point. Use upper - * triangle of score matrix. - */ - dij = tmat[ii][jj]; - - /* - * IMPROVEMENT 3, STEP 1 : fnseqs2 is - * already calculated. - */ - /* fnseqs2 = fnseqs - 2.0 */ - - /* IMPROVEMENT 4 : transform the equation */ - /*-------------------------------------------------------------------* - * OPTIMIZE of expression 'total = d2r + fnseqs2*dij + dr*2.0' * - * total = d2r + fnseq2*dij + 2.0*dr * - * = d2r + fnseq2*dij + 2(sumd - dij - d2r) * - * = d2r + fnseq2*dij + 2*sumd - 2*dij - 2*d2r * - * = fnseq2*dij + 2*sumd - 2*dij - 2*d2r + d2r * - * = fnseq2*dij + 2*sumd - 2*dij - d2r * - * = fnseq2*dij + 2*sumd - 2*dij - (diq + djq - 2*dij) * - * = fnseq2*dij + 2*sumd - 2*dij - diq - djq + 2*dij * - * = fnseq2*dij + 2*sumd - 2*dij + 2*dij - diq - djq * - * = fnseq2*dij + 2*sumd - diq - djq * - *-------------------------------------------------------------------*/ - total = fnseqs2*dij + 2.0*sumd - diq - djq; - - /* - * IMPROVEMENT 3, STEP 2 : abbrevlate - * the division on comparison between - * total and tmin. - */ - /* total = total / (2.0*fnseqs2); */ - - if(total < tmin) { - tmin = total; - mini = ii; - minj = jj; - } - } - } - - /* MEMO: always ii < jj in avobe loop, so mini < minj */ - -/*.................compute branch lengths and print the results ........*/ - - - dio = djo = 0.0; - - /* IMPROVEMENT 1, STEP 4 : use sum value */ - dio = sum_cols[mini] + sum_rows[mini]; - djo = sum_cols[minj] + sum_rows[minj]; - - dmin = tmat[mini][minj]; - dio = (dio - dmin) / fnseqs2; - djo = (djo - dmin) / fnseqs2; - bi = (dmin + dio - djo) * 0.5; - bj = dmin - bi; - bi = bi - av[mini]; - bj = bj - av[minj]; - - if( av[mini] > 0.0 ) - typei = 0; - else - typei = 1; - if( av[minj] > 0.0 ) - typej = 0; - else - typej = 1; - - -/* - set negative branch lengths to zero. Also set any tiny positive - branch lengths to zero. -*/ if( fabs(bi) < 0.0001) bi = 0.0; - if( fabs(bj) < 0.0001) bj = 0.0; - - - left_branch[nc] = bi; - right_branch[nc] = bj; - - for(i=1; i<=last_seq-first_seq+1; i++) - tree_description[nc][i] = 0; - - if(typei == 0) { - for(i=nc-1; i>=1; i--) - if(tree_description[i][mini] == 1) { - for(j=1; j<=last_seq-first_seq+1; j++) - if(tree_description[i][j] == 1) - tree_description[nc][j] = 1; - break; - } - } - else - tree_description[nc][mini] = 1; - - if(typej == 0) { - for(i=nc-1; i>=1; i--) - if(tree_description[i][minj] == 1) { - for(j=1; j<=last_seq-first_seq+1; j++) - if(tree_description[i][j] == 1) - tree_description[nc][j] = 1; - break; - } - } - else - tree_description[nc][minj] = 1; - - -/* - Here is where the -0.00005 branch lengths come from for 3 or more - identical seqs. -*/ -/* if(dmin <= 0.0) dmin = 0.0001; */ - if(dmin <= 0.0) dmin = 0.000001; - av[mini] = dmin * 0.5; - -/*........................Re-initialisation................................*/ - - fnseqs = fnseqs - 1.0; - tkill[minj] = 1; - - /* IMPROVEMENT 2, STEP 3 : Remove tvalid[minj] from chain list. */ - /* [ Before ] - * +---------+ +---------+ +---------+ - * |prev |<-------|prev |<-------|prev |<--- - * | n | | n(=minj)| | n | - * | next|------->| next|------->| next|---- - * +---------+ +---------+ +---------+ - * - * [ After ] - * +---------+ +---------+ - * |prev |<--------------------------|prev |<--- - * | n | | n | - * | next|-------------------------->| next|---- - * +---------+ +---------+ - * +---------+ - * NULL---|prev | - * | n(=minj)| - * | next|---NULL - * +---------+ - */ - (tvalid[minj].prev)->next = tvalid[minj].next; - if (tvalid[minj].next != NULL) { - (tvalid[minj].next)->prev = tvalid[minj].prev; - } - tvalid[minj].prev = tvalid[minj].next = NULL; - - /* IMPROVEMENT 1, STEP 5 : re-calculate sum values. */ - for(lpj=tvalid[0].next ; lpj != NULL ; lpj = lpj->next) { - double tmp_di = 0.0; - double tmp_dj = 0.0; - j = lpj->n; - - /* - * subtrace a score value related with 'minj' from - * sum arrays . - */ - if (j < minj) { - tmp_dj = tmat[j][minj]; - sum_cols[j] -= tmp_dj; - } else if (j > minj) { - tmp_dj = tmat[minj][j]; - sum_rows[j] -= tmp_dj; - } /* nothing to do when j is equal to minj. */ - - - /* - * subtrace a score value related with 'mini' from - * sum arrays . - */ - if (j < mini) { - tmp_di = tmat[j][mini]; - sum_cols[j] -= tmp_di; - } else if (j > mini) { - tmp_di = tmat[mini][j]; - sum_rows[j] -= tmp_di; - } /* nothing to do when j is equal to mini. */ - - /* - * calculate a score value of the new inner node. - * then, store it temporary to join[] array. - */ - join[j] = (tmp_dj + tmp_di) * 0.5; - } - - /* - * 1) - * Set the score values (stored in join[]) into the matrix, - * row/column position is 'mini'. - * 2) - * Add a score value of the new inner node to sum arrays. - */ - for(lpj=tvalid[0].next ; lpj != NULL; lpj = lpj->next) { - j = lpj->n; - if (j < mini) { - tmat[j][mini] = join[j]; - sum_cols[j] += join[j]; - } else if (j > mini) { - tmat[mini][j] = join[j]; - sum_rows[j] += join[j]; - } /* nothing to do when j is equal to mini. */ - } - - /* Re-calculate sum_rows[mini],sum_cols[mini]. */ - sum_cols[mini] = sum_rows[mini] = 0.0; - - /* calculate sum_rows[mini] */ - da = 0.0; - for(lpj=tvalid[0].next ; lpj->n < mini ; lpj = lpj->next) { - da += join[lpj->n]; - } - sum_rows[mini] = da; - - /* skip if 'lpj->n' is equal to 'mini' */ - if ((lpj != NULL) && (lpj->n == mini)) { - lpj = lpj->next; - } - - /* calculate sum_cols[mini] */ - da = 0.0; - for( ; lpj != NULL; lpj = lpj->next) { - da += join[lpj->n]; - } - sum_cols[mini] = da; - - /* - * Clean up sum_rows[minj], sum_cols[minj] and score matrix - * related with 'minj'. - */ - sum_cols[minj] = sum_rows[minj] = 0.0; - for(j=1; j<=last_seq-first_seq+1; ++j) - tmat[minj][j] = tmat[j][minj] = join[j] = 0.0; - - -/****/ } /*end main cycle**/ - -/******************************Last Cycle (3 Seqs. left)********************/ - - nude = 1; - - for(lpi=tvalid[0].next; lpi != NULL; lpi = lpi->next) { - l[nude] = lpi->n; - ++nude; - } - - b1 = (tmat[l[1]][l[2]] + tmat[l[1]][l[3]] - tmat[l[2]][l[3]]) * 0.5; - b2 = tmat[l[1]][l[2]] - b1; - b3 = tmat[l[1]][l[3]] - b1; - - branch[1] = b1 - av[l[1]]; - branch[2] = b2 - av[l[2]]; - branch[3] = b3 - av[l[3]]; - -/* Reset tiny negative and positive branch lengths to zero */ - if( fabs(branch[1]) < 0.0001) branch[1] = 0.0; - if( fabs(branch[2]) < 0.0001) branch[2] = 0.0; - if( fabs(branch[3]) < 0.0001) branch[3] = 0.0; - - left_branch[last_seq-first_seq+1-2] = branch[1]; - left_branch[last_seq-first_seq+1-1] = branch[2]; - left_branch[last_seq-first_seq+1] = branch[3]; - - for(i=1; i<=last_seq-first_seq+1; i++) - tree_description[last_seq-first_seq+1-2][i] = 0; - - - for(i=1; i<=3; ++i) { - if( av[l[i]] > 0.0) { - - for(k=last_seq-first_seq+1-3; k>=1; k--) - if(tree_description[k][l[i]] == 1) { - for(j=1; j<=last_seq-first_seq+1; j++) - if(tree_description[k][j] == 1) - tree_description[last_seq-first_seq+1-2][j] = i; - break; - } - } - else { - tree_description[last_seq-first_seq+1-2][l[i]] = i; - } - if(i < 3) {; - } - } - ckfree(sum_cols); - ckfree(sum_rows); - ckfree(join); - ckfree(tvalid); -} -////////////////////////////////////////////////////////////////////////////// -// -// UPGMA_aln -////////////////////////////////////////////////////////////////////////////// - -Alignment * upgma_merge_aln_rows (Alignment *A, int *ns, int **ls,int N, int**mat,int *used, int *n, Constraint_list *CL); -int upgma_pair_wise (Alignment *A, int *ls0, int ns0, int *ls2, int ns2, Constraint_list *CL); - - -Alignment * upgma_tree_aln ( Alignment*A, int nseq, Constraint_list *CL) -{ - int a, b,n, *used; - static int **mat; - int **ls; - int *ns; - nseq=(CL->S)->nseq; - mat=declare_int (nseq, nseq); - ls=declare_int (nseq,nseq); - ns=vcalloc (nseq,sizeof (int)); - - for (a=0; a1) - { - upgma_merge_aln_rows (A,ns, ls,nseq, mat, used, &n,CL); - } - print_aln (A); - HERE ("finished"); - free_int ( mat, -1); - free_int (ls, -1); - vfree (ns); - return A; -} - -Alignment * upgma_merge_aln_rows (Alignment *A, int *ns, int **ls,int N, int**mat,int *used, int *n, Constraint_list *CL) -{ - - int a, b, w, best_a, best_b, set; - float best_s; - - for (set=0,a=0; abest_s) - { - best_s=w; - best_a=a; - best_b=b; - set=1; - } - } - } - used[best_b]=1; - - //merge a and b - mat[best_a][best_b]=upgma_pair_wise (A, ls[best_a], ns[best_a], ls[best_b], ns[best_b], CL); - for (a=0; anseq; a++) - { - if (a!=best_a && !used[a]) - mat[best_a][a]=mat[a][best_a]=upgma_pair_wise (A, ls[best_a], ns[best_a], ls[a], ns[a], CL); - } - - HERE ("DONE"); - - n[0]--; - return A; -} -int upgma_pair_wise (Alignment *A, int *ls0, int ns0, int *ls1, int ns1, Constraint_list *CL) -{ - static int **ls; - static int *ns; - static int *fl; - int a, b, n; - - if ( !ls ) - { - ls=vcalloc (2, sizeof (int*)); - ns=vcalloc (2, sizeof (int)); - fl=vcalloc ((CL->S)->nseq, sizeof (int)); - } - ls[0]=ls0; - ls[1]=ls1; - ns[0]=ns0; ns[1]=ns1; - - fprintf ( stderr, "\n"); - for (a=0; anseq; a++) - { - NL[a]=new_declare_tree_node (); - upgma_node_heap (NL[a]); - sprintf (NL[a]->name, "%s", A->name[a]); - NL[a]->isseq=1; - NL[a]->leaf=1; - } - - used=vcalloc ( A->nseq, sizeof (int)); - n=A->nseq; - while (n>1) - { - T=upgma_merge (mat, NL,used, &n, A->nseq); - } - - vfree (used); - vfclose (print_tree (T, "newick", vfopen (fname, "w"))); - upgma_node_heap (NULL); - vfree (NL); - - return read_tree (fname,&tot_node,A->nseq, A->name); -} -NT_node upgma_merge (int **mat, NT_node *NL, int *used, int *n, int N) -{ - NT_node P, LC, RC; - int a, b, w, best_a, best_b, set; - float best_s; - P=new_declare_tree_node(); - upgma_node_heap (P); - - for (set=0,a=0; adist=RC->dist=best_s; - LC->parent=RC->parent=P; - P->left=LC; - P->right=RC; - NL[best_a]=P; - n[0]--; - return P; - -} - - -////////////////////////////////////////////////////////////////////////////// -// -// SPLIT UPGMA -/////////////////////////////////////////////////////////////////////////////// -int upgma_node_heap (NT_node X); -int upgma_node_heap (NT_node X) -{ - static int n; - static NT_node *h; - if ( X==NULL) - { - int a,r; - if (n==0) return 0; - for (a=0; anseq; a++) - { - - NL[a]=new_declare_tree_node (); - NL[a]->lseq2=vcalloc (A->nseq+1, sizeof (int)); - NL[a]->lseq2[a]=1; - sprintf (NL[a]->name, "%s", A->name[a]); - NL[a]->isseq=1; - NL[a]->leaf=1; - NL[a]->dist=1; - upgma_node_heap (NL[a]); - } - used=vcalloc ( A->nseq, sizeof (int)); - n=A->nseq; - while (n>1) - { - T=split_upgma_merge (A,S, NL,used, &n, A->nseq); - } - vfree (used); - fprintf ( stdout, "\n"); - vfclose (print_tree (T, "newick", vfopen (fname, "w"))); - upgma_node_heap (NULL); - return T; -} -NT_node split_upgma_merge (Alignment *A, Split **S, NT_node *NL, int *used, int *n, int N) -{ - NT_node P, LC, RC; - int a, b, w, best_a, best_b, set; - float best_s; - static int **mat; - - if (!mat) - { - mat=declare_int (N, N); - for (a=0; alseq2=vcalloc (N, sizeof (int)); - for (set=0,a=0; adist=1-best_s; - LC->parent=RC->parent=P; - P->left=LC; - P->right=RC; - P->bootstrap=best_s*100; - used[best_b]=1; - - n[0]--; - - for (a=0; anseq; a++) - { - P->lseq2[a]=(LC->lseq2[a] || RC->lseq2[a])?1:0; - } - - for (a=0; anseq+1, sizeof (char)); - } - - - - for ( a=0; anseq; a++) - split[a]=((L->lseq2[a] || R->lseq2[a])?1:0)+'0'; - - n=0; - while (S[n]) - { - float score; - if ( strm (S[n]->split,split)) - { - return score=100-S[n]->score; - } - n++; - } - return 100; -} -/*********************************COPYRIGHT NOTICE**********************************/ -/*© Centro de Regulacio Genomica */ -/*and */ -/*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ -/*All rights reserved.*/ -/*This file is part of T-COFFEE.*/ -/**/ -/* T-COFFEE is free software; you can redistribute it and/or modify*/ -/* it under the terms of the GNU General Public License as published by*/ -/* the Free Software Foundation; either version 2 of the License, or*/ -/* (at your option) any later version.*/ -/**/ -/* T-COFFEE is distributed in the hope that it will be useful,*/ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ -/* GNU General Public License for more details.*/ -/**/ -/* You should have received a copy of the GNU General Public License*/ -/* along with Foobar; if not, write to the Free Software*/ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ -/*............................................... |*/ -/* If you need some more information*/ -/* cedric.notredame@europe.com*/ -/*............................................... |*/ -/**/ -/**/ -/* */ -/*********************************COPYRIGHT NOTICE**********************************/ diff --git a/binaries/src/tcoffee/tclinkdb.txt b/binaries/src/tcoffee/tclinkdb.txt deleted file mode 100644 index b8a7f55..0000000 --- a/binaries/src/tcoffee/tclinkdb.txt +++ /dev/null @@ -1,290 +0,0 @@ -PG t_coffee 4_TCOFFEE TCOFFEE -PG t_coffee type sequence_multiple_aligner -PG t_coffee ADDRESS http://www.tcoffee.org -PG t_coffee language C -PG t_coffee language2 C -PG t_coffee source http://www.tcoffee.org/Packages/T-COFFEE_distribution.tar.gz -PG t_coffee update_action always -PG t_coffee mode tcoffee,mcoffee,rcoffee,expresso,3dcoffee - - -PG clustalw2 4_TCOFFEE CLUSTALW2 -PG clustalw2 type sequence_multiple_aligner -PG clustalw2 ADDRESS http://www.clustal.org -PG clustalw2 language C++ -PG clustalw2 language2 CXX -PG clustalw2 source http://www.clustal.org/download/2.0.10/clustalw-2.0.10-src.tar.gz -PG clustalw2 mode mcoffee,rcoffee - -PG clustalw 4_TCOFFEE CLUSTALW -PG clustalw type sequence_multiple_aligner -PG clustalw ADDRESS http://www.clustal.org -PG clustalw language C -PG clustalw language2 C -PG clustalw source http://www.clustal.org/download/1.X/ftp-igbmc.u-strasbg.fr/pub/ClustalW/clustalw1.82.UNIX.tar.gz -PG clustalw mode mcoffee,rcoffee - -PG dialign-t 4_TCOFFEE DIALIGNT -PG dialign-t type sequence_multiple_aligner -PG dialign-t ADDRESS http://dialign-tx.gobics.de/ -PG dialign-t DIR /usr/share/dialign-tx/ -PG dialign-t language C -PG dialign-t language2 C -PG dialign-t source http://dialign-tx.gobics.de/DIALIGN-TX_1.0.1.tar.gz -PG dialign-t mode mcoffee -PG dialign-t binary dialign-t - -PG dialign-tx 4_TCOFFEE DIALIGNTX -PG dialign-tx type sequence_multiple_aligner -PG dialign-tx ADDRESS http://dialign-tx.gobics.de/ -PG dialign-tx DIR /usr/share/dialign-tx/ -PG dialign-tx language C -PG dialign-tx language2 C -PG dialign-tx source http://dialign-tx.gobics.de/DIALIGN-TX_1.0.1.tar.gz -PG dialign-tx mode mcoffee -PG dialign-tx binary dialign-tx - -PG poa 4_TCOFFEE POA -PG poa type sequence_multiple_aligner -PG poa ADDRESS http://www.bioinformatics.ucla.edu/poa/ -PG poa language C -PG poa language2 C -PG poa source http://downloads.sourceforge.net/poamsa/poaV2.tar.gz -PG poa DIR /usr/share/ -PG poa FILE1 blosum80.mat -PG poa mode mcoffee -PG poa binary poa - -PG probcons 4_TCOFFEE PROBCONS -PG probcons type sequence_multiple_aligner -PG probcons ADDRESS http://probcons.stanford.edu/ -PG probcons language2 CXX -PG probcons language C++ -PG probcons source http://probcons.stanford.edu/probcons_v1_12.tar.gz -PG probcons mode mcoffee -PG probcons binary probcons - -PG mafft 4_TCOFFEE MAFFT -PG mafft type sequence_multiple_aligner -PG mafft ADDRESS http://align.bmr.kyushu-u.ac.jp/mafft/online/server/ -PG mafft language C -PG mafft language C -PG mafft source http://align.bmr.kyushu-u.ac.jp/mafft/software/mafft-6.603-with-extensions-src.tgz -PG mafft windows http://align.bmr.kyushu-u.ac.jp/mafft/software/mafft-6.603-mingw.tar -PG mafft mode mcoffee,rcoffee -PG mafft binary mafft.tar.gz - -PG muscle 4_TCOFFEE MUSCLE -PG muscle type sequence_multiple_aligner -PG muscle ADDRESS http://www.drive5.com/muscle/ -PG muscle language C++ -PG muscle language2 GPP -PG muscle source http://www.drive5.com/muscle/downloads3.6/muscle3.6_src.tar.gz -PG muscle windows http://www.drive5.com/muscle/downloads3.6/muscle3.6_win32.zip -PG muscle linux http://www.drive5.com/muscle/downloads3.6/muscle3.6_linux_ia32.tar.gz -PG muscle mode mcoffee,rcoffee - - -PG pcma 4_TCOFFEE PCMA -PG pcma type sequence_multiple_aligner -PG pcma ADDRESS ftp://iole.swmed.edu/pub/PCMA/ -PG pcma language C -PG pcma language2 C -PG pcma source ftp://iole.swmed.edu/pub/PCMA/pcma.tar.gz -PG pcma mode mcoffee - -PG kalign 4_TCOFFEE KALIGN -PG kalign type sequence_multiple_aligner -PG kalign ADDRESS http://msa.cgb.ki.se -PG kalign language C -PG kalign language2 C -PG kalign source http://msa.cgb.ki.se/downloads/kalign/current.tar.gz -PG kalign mode mcoffee - - -PG amap 4_TCOFFEE AMAP -PG amap type sequence_multiple_aligner -PG amap ADDRESS http://bio.math.berkeley.edu/amap/ -PG amap language C++ -PG amap language2 CXX -PG amap source http://baboon.math.berkeley.edu/amap/download/amap.2.2.tar.gz -PG amap mode mcoffee - - -PG proda 4_TCOFFEE PRODA -PG proda type sequence_multiple_aligner -PG proda ADDRESS http://proda.stanford.edu -PG proda language C++ -PG proda language2 CXX -PG proda source http://proda.stanford.edu/proda_1_0.tar.gz -PG proda mode mcoffee - -PG prank 4_TCOFFEE PRANK -PG prank type sequence_multiple_aligner -PG prank ADDRESS http://www.ebi.ac.uk/goldman-srv/prank/ -PG prank language C++ -PG prank language2 CXX -PG prank source http://www.ebi.ac.uk/goldman-srv/prank/src/old/prank.src.081202.tgz -PG prank mode mcoffee - -PG sap 4_TCOFFEE SAP -PG sap type structure_pairwise_aligner -PG sap ADDRESS http://mathbio.nimr.mrc.ac.uk/wiki/Software -PG sap language C -PG sap language2 C -PG sap source http://www.tcoffee.org/Packages/sap_distribution_TCC_0.6.tar.gz -PG sap mode expresso,3dcoffee - -PG TMalign 4_TCOFFEE TMALIGN -PG TMalign type structure_pairwise_aligner -PG TMalign ADDRESS http://zhang.bioinformatics.ku.edu/TM-align/TMalign.f -PG TMalign language Fortran -PG TMalign language2 Fortran -PG TMalign source http://zhang.bioinformatics.ku.edu/TM-align/TMalign.f -PG TMalign linux http://zhang.bioinformatics.ku.edu/TM-align/TMalign_32.gz -PG TMalign mode expresso,3dcoffee - - -PG mustang 4_TCOFFEE MUSTANG -PG mustang type structure_pairwise_aligner -PG mustang ADDRESS http://www.cs.mu.oz.au/~arun/mustang -PG mustang language C++ -PG mustang language2 CXX -PG mustang source http://www.cs.mu.oz.au/~arun/mustang/mustang_v.3.tgz -PG mustang mode expresso,3dcoffee - -PG lsqman 4_TCOFFEE LSQMAN -PG lsqman type structure_pairwise_aligner -PG lsqman ADDRESS empty -PG lsqman language empty -PG lsqman language2 empty -PG lsqman source empty -PG lsqman update_action never -PG lsqman mode expresso,3dcoffee - -PG align_pdb 4_TCOFFEE ALIGN_PDB -PG align_pdb type structure_pairwise_aligner -PG align_pdb ADDRESS empty -PG align_pdb language empty -PG align_pdb language2 empty -PG align_pdb source empty -PG align_pdb update_action never -PG align_pdb mode expresso,3dcoffee - - -PG fugueali 4_TCOFFEE FUGUE -PG fugueali type structure_pairwise_aligner -PG fugueali ADDRESS http://www-cryst.bioc.cam.ac.uk/fugue/download.html -PG fugueali language empty -PG fugueali language2 empty -PG fugueali source empty -PG fugueali update_action never -PG fugueali mode expresso,3dcoffee - -PG dalilite.pl 4_TCOFFEE DALILITEc -PG dalilite.pl type structure_pairwise_aligner -PG dalilite.pl ADDRESS built_in -PG dalilite.pl ADDRESS2 http://www.ebi.ac.uk/Tools/webservices/services/dalilite -PG dalilite.pl language Perl -PG dalilite.pl language2 Perl -PG dalilite.pl source empty -PG dalilite.pl update_action never -PG dalilite.pl mode expresso,3dcoffee - -PG probconsRNA 4_TCOFFEE PROBCONSRNA -PG probconsRNA type RNA_multiple_aligner -PG probconsRNA ADDRESS http://probcons.stanford.edu/ -PG probconsRNA language C++ -PG probconsRNA language2 CXX -PG probconsRNA source http://probcons.stanford.edu/probconsRNA.tar.gz -PG probconsRNA mode mcoffee,rcoffee - -PG sfold 4_TCOFFEE CONSAN -PG sfold type RNA_pairwise_aligner -PG sfold ADDRESS http://selab.janelia.org/software/consan/ -PG sfold language empty -PG sfold language2 empty -PG sfold source empty -PG sfold update_action never -PG sfold mode rcoffee - -PG RNAplfold 4_TCOFFEE RNAPLFOLD -PG RNAplfold type RNA_secondarystructure_predictor -PG RNAplfold ADDRESS http://www.tbi.univie.ac.at/~ivo/RNA/ -PG RNAplfold language C -PG RNAplfold language2 C -PG RNAplfold source http://www.tbi.univie.ac.at/~ivo/RNA/ViennaRNA-1.7.2.tar.gz -PG RNAplfold mode rcoffee - -PG hmmtop 4_TCOFFEE HMMTOP -PG hmmtop type protein_secondarystructure_predictor -PG hmmtop ADDRESS www.enzim.hu/hmmtop/ -PG hmmtop language C -PG hmmtop language2 C -PG hmmtop source empty -PG hmmtop update_action never -PG hmmtop mode tcoffee - -PG gorIV 4_TCOFFEE GOR4 -PG gorIV type protein_secondarystructure_predictor -PG gorIV ADDRESS http://mig.jouy.inra.fr/logiciels/gorIV/ -PG gorIV language C -PG gorIV language2 C -PG gorIV source http://mig.jouy.inra.fr/logiciels/gorIV/GOR_IV.tar.gz -PG gorIV update_action never -PG gorIV mode tcoffee - -PG wublast.pl 4_TCOFFEE EBIWUBLASTc -PG wublast.pl type protein_homology_predictor -PG wublast.pl ADDRESS built_in -PG wublast.pl ADDRESS2 http://www.ebi.ac.uk/Tools/webservices/services/wublast -PG wublast.pl language Perl -PG wublast.pl language2 Perl -PG wublast.pl source empty -PG wublast.pl update_action never -PG wublast.pl mode psicoffee,expresso,3dcoffee - -PG blastpgp.pl 4_TCOFFEE EBIBLASTPGPc -PG blastpgp.pl type protein_homology_predictor -PG blastpgp.pl ADDRESS built_in -PG blastpgp.pl ADDRESS2 http://www.ebi.ac.uk/Tools/webservices/services/blastpgp -PG blastpgp.pl language Perl -PG blastpgp.pl language2 Perl -PG blastpgp.pl source empty -PG blastpgp.pl update_action never -PG blastpgp.pl mode psicoffee,expresso,3dcoffee - - -PG blastcl3 4_TCOFFEE NCBIWEBBLAST -PG blastcl3 type protein_homology_predictor -PG blastcl3 ADDRESS ftp://ftp.ncbi.nih.gov/blast/executables/LATEST -PG blastcl3 language C -PG blastcl3 language2 C -PG blastcl3 source empty -PG blastcl3 update_action never -PG blastcl3 mode psicoffee,expresso,3dcoffee - - -PG blastpgp 4_TCOFFEE NCBIBLAST -PG blastpgp type protein_homology_predictor -PG blastpgp ADDRESS ftp://ftp.ncbi.nih.gov/blast/executables/LATEST -PG blastpgp language C -PG blastpgp language2 C -PG blastpgp source empty -PG blastpgp update_action never -PG blastpgp mode psicoffee,expresso,3dcoffee - -PG SOAP::Lite 4_TCOFFEE SOAPLITE -PG SOAP::Lite type library -PG SOAP::Lite ADDRESS http://cpansearch.perl.org/src/MKUTTER/SOAP-Lite-0.710.08/Makefile.PL -PG SOAP::Lite language Perl -PG SOAP::Lite language2 Perl -PG SOAP::Lite source empty -PG SOAP::Lite mode psicoffee,expresso,3dcoffee - - -MODE tcoffee name tcoffee -MODE rcoffee name rcoffee -MODE 3dcoffee name 3dcoffee -MODE mcoffee name mcoffee -MODE expresso name expresso